MatchPredicate.java
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.analysis;
import org.apache.doris.catalog.Function;
import org.apache.doris.catalog.Function.NullableMode;
import org.apache.doris.catalog.Index;
import org.apache.doris.catalog.TableIf;
import org.apache.doris.catalog.TableIf.TableType;
import org.apache.doris.catalog.Type;
import org.apache.doris.thrift.TExprNode;
import org.apache.doris.thrift.TExprNodeType;
import org.apache.doris.thrift.TExprOpcode;
import org.apache.doris.thrift.TMatchPredicate;
import com.google.common.base.Strings;
import com.google.common.collect.Lists;
import com.google.gson.annotations.SerializedName;
import java.util.Collections;
import java.util.Map;
import java.util.Objects;
/**
* filed MATCH query_str
*/
public class MatchPredicate extends Predicate {
public enum Operator {
MATCH_ANY("MATCH_ANY", "match_any", TExprOpcode.MATCH_ANY),
MATCH_ALL("MATCH_ALL", "match_all", TExprOpcode.MATCH_ALL),
MATCH_PHRASE("MATCH_PHRASE", "match_phrase", TExprOpcode.MATCH_PHRASE),
MATCH_PHRASE_PREFIX("MATCH_PHRASE_PREFIX", "match_phrase_prefix", TExprOpcode.MATCH_PHRASE_PREFIX),
MATCH_REGEXP("MATCH_REGEXP", "match_regexp", TExprOpcode.MATCH_REGEXP),
MATCH_PHRASE_EDGE("MATCH_PHRASE_EDGE", "match_phrase_edge", TExprOpcode.MATCH_PHRASE_EDGE);
private final String description;
private final String name;
private final TExprOpcode opcode;
Operator(String description,
String name,
TExprOpcode opcode) {
this.description = description;
this.name = name;
this.opcode = opcode;
}
@Override
public String toString() {
return description;
}
public String getName() {
return name;
}
public TExprOpcode getOpcode() {
return opcode;
}
}
@SerializedName("op")
private Operator op;
// Fields for thrift serialization (restored from old version)
private String invertedIndexParser;
private String invertedIndexParserMode;
private Map<String, String> invertedIndexCharFilter;
private boolean invertedIndexParserLowercase = true;
private String invertedIndexParserStopwords = "";
private String invertedIndexAnalyzerName = "";
// Fields for SQL generation
private String explicitAnalyzer = "";
private MatchPredicate() {
// use for serde only
invertedIndexParser = InvertedIndexUtil.INVERTED_INDEX_PARSER_UNKNOWN;
invertedIndexParserMode = InvertedIndexUtil.INVERTED_INDEX_PARSER_FINE_GRANULARITY;
}
protected MatchPredicate(MatchPredicate other) {
super(other);
op = other.op;
invertedIndexParser = other.invertedIndexParser;
invertedIndexParserMode = other.invertedIndexParserMode;
invertedIndexCharFilter = other.invertedIndexCharFilter;
invertedIndexParserLowercase = other.invertedIndexParserLowercase;
invertedIndexParserStopwords = other.invertedIndexParserStopwords;
invertedIndexAnalyzerName = other.invertedIndexAnalyzerName;
explicitAnalyzer = other.explicitAnalyzer;
}
/**
* use for Nereids ONLY
*/
public MatchPredicate(Operator op, Expr e1, Expr e2, Type retType,
NullableMode nullableMode, Index invertedIndex, boolean nullable) {
this(op, e1, e2, retType, nullableMode, invertedIndex, nullable, null);
}
public MatchPredicate(Operator op, Expr e1, Expr e2, Type retType,
NullableMode nullableMode, Index invertedIndex, boolean nullable, String analyzer) {
super();
this.op = op;
children.add(e1);
children.add(e2);
Map<String, String> properties = invertedIndex == null ? Collections.emptyMap() : invertedIndex.getProperties();
AnalyzerSelector.Selection selection = AnalyzerSelector.select(properties, analyzer);
// Set parser and analyzer fields
this.invertedIndexParser = selection.parser();
// When table has no inverted index (invertedIndex == null), and analyzer is not explicitly
// specified, we should pass "__default__" to BE to let it use default tokenization.
// This enables match functions to work on tables without inverted index.
// When user explicitly specifies "using analyzer none", or when index is configured with
// parser=none, we should pass "none" to skip tokenization.
if (invertedIndex == null && !selection.explicit()
&& InvertedIndexUtil.INVERTED_INDEX_PARSER_NONE.equalsIgnoreCase(selection.analyzer())) {
// Table has no index, use default tokenization
this.invertedIndexAnalyzerName = "__default__";
} else {
this.invertedIndexAnalyzerName = selection.analyzer();
if (Strings.isNullOrEmpty(this.invertedIndexAnalyzerName)) {
this.invertedIndexAnalyzerName = this.invertedIndexParser;
}
}
// Extract additional index properties for thrift serialization
this.invertedIndexParserMode = InvertedIndexUtil.getInvertedIndexParserMode(properties);
this.invertedIndexCharFilter = InvertedIndexUtil.getInvertedIndexCharFilter(properties);
this.invertedIndexParserLowercase = InvertedIndexUtil.getInvertedIndexParserLowercase(properties);
this.invertedIndexParserStopwords = InvertedIndexUtil.getInvertedIndexParserStopwords(properties);
if (!Strings.isNullOrEmpty(analyzer)) {
this.explicitAnalyzer = analyzer.trim();
}
fn = new Function(new FunctionName(op.name), Lists.newArrayList(e1.getType(), e2.getType()), retType,
false, true, nullableMode);
this.nullable = nullable;
}
@Override
public Expr clone() {
return new MatchPredicate(this);
}
public Operator getOp() {
return this.op;
}
@Override
public boolean equals(Object obj) {
if (!super.equals(obj)) {
return false;
}
MatchPredicate other = (MatchPredicate) obj;
return other.op == op
&& Objects.equals(explicitAnalyzer, other.explicitAnalyzer)
&& Objects.equals(invertedIndexAnalyzerName, other.invertedIndexAnalyzerName)
&& Objects.equals(invertedIndexParser, other.invertedIndexParser);
}
@Override
public String toSqlImpl() {
return getChild(0).toSql() + " " + op.toString() + " " + getChild(1).toSql()
+ analyzerSqlFragment();
}
@Override
public String toSqlImpl(boolean disableTableName, boolean needExternalSql, TableType tableType,
TableIf table) {
return getChild(0).toSql(disableTableName, needExternalSql, tableType, table) + " " + op.toString() + " "
+ getChild(1).toSql(disableTableName, needExternalSql, tableType, table)
+ analyzerSqlFragment();
}
@Override
protected void toThrift(TExprNode msg) {
msg.node_type = TExprNodeType.MATCH_PRED;
msg.setOpcode(op.getOpcode());
// Use new TMatchPredicate constructor with required fields
msg.match_predicate = new TMatchPredicate(invertedIndexParser, invertedIndexParserMode);
msg.match_predicate.setCharFilterMap(invertedIndexCharFilter);
msg.match_predicate.setParserLowercase(invertedIndexParserLowercase);
msg.match_predicate.setParserStopwords(invertedIndexParserStopwords);
msg.match_predicate.setAnalyzerName(invertedIndexAnalyzerName);
}
@Override
public int hashCode() {
return Objects.hash(super.hashCode(), op, explicitAnalyzer, invertedIndexAnalyzerName, invertedIndexParser);
}
private String analyzerSqlFragment() {
if (explicitAnalyzer == null || explicitAnalyzer.isEmpty()) {
return "";
}
if (explicitAnalyzer.matches("[A-Za-z_][A-Za-z0-9_]*")) {
return " USING ANALYZER " + explicitAnalyzer;
}
return " USING ANALYZER '" + explicitAnalyzer.replace("'", "''") + "'";
}
}