AnalyzerSelector.java
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.analysis;
import org.apache.doris.catalog.Index;
import com.google.common.base.Strings;
import java.util.Collections;
import java.util.Map;
/**
* Helper for selecting the analyzer that should be attached to a MATCH predicate.
* The current implementation is intentionally simple so that future rule-based
* selection can plug in without touching the planner surface again.
*/
public final class AnalyzerSelector {
private AnalyzerSelector() {
}
public static Selection select(Index index, String requestedAnalyzer) {
Map<String, String> properties = index == null ? Collections.emptyMap() : index.getProperties();
return select(properties, requestedAnalyzer, index == null);
}
public static Selection select(Map<String, String> properties, String requestedAnalyzer) {
// When called directly with properties (not through Index overload),
// assume an index exists if properties is provided (even if empty).
return select(properties, requestedAnalyzer, properties == null);
}
private static Selection select(Map<String, String> properties, String requestedAnalyzer, boolean noIndexExists) {
String normalizedRequest = normalize(requestedAnalyzer);
String preferredAnalyzer = selectDefaultAnalyzer(properties);
String rawParser = InvertedIndexUtil.getInvertedIndexParser(properties);
// For tables without index, use english parser which provides more aggressive
// tokenization (splits on all non-letter characters) for slow path matching.
// Note: empty properties with an existing index means keyword index (parser=none).
String parser = noIndexExists
? InvertedIndexUtil.INVERTED_INDEX_PARSER_ENGLISH
: rawParser;
if (!Strings.isNullOrEmpty(normalizedRequest)) {
return new Selection(normalizedRequest, parser, true);
}
String resolvedAnalyzer;
if (!Strings.isNullOrEmpty(preferredAnalyzer)) {
// Use custom analyzer from index properties
resolvedAnalyzer = preferredAnalyzer;
} else if (!Strings.isNullOrEmpty(parser)
&& !InvertedIndexUtil.INVERTED_INDEX_PARSER_NONE.equalsIgnoreCase(parser)) {
// Use builtin parser (english, chinese, standard, etc.)
resolvedAnalyzer = parser;
} else {
// Keyword index (parser=none) - no tokenization needed
resolvedAnalyzer = "";
}
return new Selection(resolvedAnalyzer, parser, false);
}
private static String normalize(String analyzer) {
// Policy names are case-insensitive, convert to lowercase for consistent lookup
return analyzer == null ? "" : analyzer.trim().toLowerCase();
}
private static String selectDefaultAnalyzer(Map<String, String> properties) {
// Placeholder for future rule-based selection. At the moment we simply
// reuse the analyzer attached to the index, if any.
return InvertedIndexUtil.getPreferredAnalyzer(properties);
}
public static final class Selection {
private final String analyzer;
private final String parser;
private final boolean explicit;
private Selection(String analyzer, String parser, boolean explicit) {
this.analyzer = normalize(analyzer);
String normalizedParser = normalize(parser);
this.parser = Strings.isNullOrEmpty(normalizedParser)
? InvertedIndexUtil.INVERTED_INDEX_PARSER_NONE
: normalizedParser;
this.explicit = explicit;
}
public String analyzer() {
return analyzer;
}
public String parser() {
return parser;
}
public boolean explicit() {
return explicit;
}
/**
* Computes the effective analyzer name for BE execution.
* - If we have a specific analyzer name (custom or builtin), return that name.
* This ensures BE can create/use the correct analyzer for both index path
* and slow path (full scan).
* - For keyword index (parser=none, no custom analyzer), return empty string
* to signal BE should not tokenize.
* - If explicit: use the user-specified analyzer (e.g., "none", "chinese")
* This tells BE to use the exact index with that analyzer key.
*/
public String effectiveAnalyzerName(boolean hasIndex, String fallbackParser) {
// If we have a specific analyzer name (from index properties or explicit),
// return it so BE can create/use the correct analyzer.
if (!Strings.isNullOrEmpty(analyzer)) {
return analyzer;
}
// No analyzer means keyword index or no index - in either case, the parser
// field already indicates what to do (parser=english for no index, parser=none
// for keyword index). Return empty string to let BE use parser_type for decisions.
if (!explicit) {
return "";
}
return Strings.isNullOrEmpty(fallbackParser) ? "" : fallbackParser;
}
}
}