GlobRegexUtil.java
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.common;
import com.google.re2j.Pattern;
import java.util.LinkedHashMap;
import java.util.Map;
/**
* Utility to convert a restricted glob pattern into a regex.
*
* Supported glob syntax:
* - '*' matches any sequence of characters
* - '?' matches any single character
* - '[...]' matches any character in the brackets
* - '[!...]' matches any character not in the brackets
* - '\\' escapes the next character
*/
public final class GlobRegexUtil {
// Small LRU to cap compiled pattern memory
private static final int REGEX_CACHE_CAPACITY = 256;
private static final Map<String, Pattern> REGEX_CACHE = new LinkedHashMap<String, Pattern>(
REGEX_CACHE_CAPACITY, 0.75f, true) {
@Override
protected boolean removeEldestEntry(Map.Entry<String, Pattern> eldest) {
return size() > REGEX_CACHE_CAPACITY;
}
};
private GlobRegexUtil() {
}
public static Pattern getOrCompilePattern(String globPattern) {
synchronized (REGEX_CACHE) {
Pattern cached = REGEX_CACHE.get(globPattern);
if (cached != null) {
return cached;
}
String regex = globToRegex(globPattern);
Pattern compiled = Pattern.compile(regex);
REGEX_CACHE.put(globPattern, compiled);
return compiled;
}
}
public static String globToRegex(String pattern) {
StringBuilder regexBuilder = new StringBuilder();
regexBuilder.append("^");
boolean isEscaped = false;
int patternLength = pattern.length();
for (int index = 0; index < patternLength; index++) {
char currentChar = pattern.charAt(index);
if (isEscaped) {
appendEscapedRegexChar(regexBuilder, currentChar);
isEscaped = false;
continue;
}
if (currentChar == '\\') {
isEscaped = true;
continue;
}
if (currentChar == '*') {
regexBuilder.append(".*");
continue;
}
if (currentChar == '?') {
regexBuilder.append('.');
continue;
}
if (currentChar == '[') {
int classIndex = index + 1;
boolean classClosed = false;
boolean isClassEscaped = false;
StringBuilder classBuffer = new StringBuilder();
if (classIndex < patternLength
&& (pattern.charAt(classIndex) == '!' || pattern.charAt(classIndex) == '^')) {
classBuffer.append('^');
classIndex++;
}
for (; classIndex < patternLength; classIndex++) {
char classChar = pattern.charAt(classIndex);
if (isClassEscaped) {
classBuffer.append(classChar);
isClassEscaped = false;
continue;
}
if (classChar == '\\') {
isClassEscaped = true;
continue;
}
if (classChar == ']') {
classClosed = true;
break;
}
classBuffer.append(classChar);
}
if (!classClosed) {
throw new IllegalArgumentException("Unclosed character class in glob pattern: " + pattern);
}
regexBuilder.append('[').append(classBuffer).append(']');
index = classIndex;
continue;
}
appendEscapedRegexChar(regexBuilder, currentChar);
}
if (isEscaped) {
appendEscapedRegexChar(regexBuilder, '\\');
}
regexBuilder.append("$");
return regexBuilder.toString();
}
private static void appendEscapedRegexChar(StringBuilder regexBuilder, char ch) {
switch (ch) {
case '.':
case '^':
case '$':
case '+':
case '*':
case '?':
case '(':
case ')':
case '|':
case '{':
case '}':
case '[':
case ']':
case '\\':
regexBuilder.append('\\').append(ch);
break;
default:
regexBuilder.append(ch);
break;
}
}
}