MatchPredicate.java

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.

package org.apache.doris.analysis;

import org.apache.doris.catalog.Function;
import org.apache.doris.catalog.Function.NullableMode;
import org.apache.doris.catalog.Index;
import org.apache.doris.catalog.TableIf;
import org.apache.doris.catalog.TableIf.TableType;
import org.apache.doris.catalog.Type;
import org.apache.doris.thrift.TExprNode;
import org.apache.doris.thrift.TExprNodeType;
import org.apache.doris.thrift.TExprOpcode;
import org.apache.doris.thrift.TMatchPredicate;

import com.google.common.base.Strings;
import com.google.common.collect.Lists;
import com.google.gson.annotations.SerializedName;

import java.util.Collections;
import java.util.Map;
import java.util.Objects;

/**
 * filed MATCH query_str
 */
public class MatchPredicate extends Predicate {

    public enum Operator {
        MATCH_ANY("MATCH_ANY", "match_any", TExprOpcode.MATCH_ANY),
        MATCH_ALL("MATCH_ALL", "match_all", TExprOpcode.MATCH_ALL),
        MATCH_PHRASE("MATCH_PHRASE", "match_phrase", TExprOpcode.MATCH_PHRASE),
        MATCH_PHRASE_PREFIX("MATCH_PHRASE_PREFIX", "match_phrase_prefix", TExprOpcode.MATCH_PHRASE_PREFIX),
        MATCH_REGEXP("MATCH_REGEXP", "match_regexp", TExprOpcode.MATCH_REGEXP),
        MATCH_PHRASE_EDGE("MATCH_PHRASE_EDGE", "match_phrase_edge", TExprOpcode.MATCH_PHRASE_EDGE);

        private final String description;
        private final String name;
        private final TExprOpcode opcode;

        Operator(String description,
                 String name,
                 TExprOpcode opcode) {
            this.description = description;
            this.name = name;
            this.opcode = opcode;
        }

        @Override
        public String toString() {
            return description;
        }

        public String getName() {
            return name;
        }

        public TExprOpcode getOpcode() {
            return opcode;
        }
    }

    @SerializedName("op")
    private Operator op;
    // Fields for thrift serialization (restored from old version)
    private String invertedIndexParser;
    private String invertedIndexParserMode;
    private Map<String, String> invertedIndexCharFilter;
    private boolean invertedIndexParserLowercase = true;
    private String invertedIndexParserStopwords = "";
    private String invertedIndexAnalyzerName = "";
    // Fields for SQL generation
    private String explicitAnalyzer = "";

    private MatchPredicate() {
        // use for serde only
        invertedIndexParser = InvertedIndexUtil.INVERTED_INDEX_PARSER_UNKNOWN;
        invertedIndexParserMode = InvertedIndexUtil.INVERTED_INDEX_PARSER_FINE_GRANULARITY;
    }

    protected MatchPredicate(MatchPredicate other) {
        super(other);
        op = other.op;
        invertedIndexParser = other.invertedIndexParser;
        invertedIndexParserMode = other.invertedIndexParserMode;
        invertedIndexCharFilter = other.invertedIndexCharFilter;
        invertedIndexParserLowercase = other.invertedIndexParserLowercase;
        invertedIndexParserStopwords = other.invertedIndexParserStopwords;
        invertedIndexAnalyzerName = other.invertedIndexAnalyzerName;
        explicitAnalyzer = other.explicitAnalyzer;
    }

    /**
     * use for Nereids ONLY
     */
    public MatchPredicate(Operator op, Expr e1, Expr e2, Type retType,
            NullableMode nullableMode, Index invertedIndex, boolean nullable) {
        this(op, e1, e2, retType, nullableMode, invertedIndex, nullable, null);
    }

    public MatchPredicate(Operator op, Expr e1, Expr e2, Type retType,
            NullableMode nullableMode, Index invertedIndex, boolean nullable, String analyzer) {
        super();
        this.op = op;
        children.add(e1);
        children.add(e2);
        Map<String, String> properties = invertedIndex == null ? Collections.emptyMap() : invertedIndex.getProperties();
        AnalyzerSelector.Selection selection = AnalyzerSelector.select(properties, analyzer);

        // Set parser and analyzer fields
        this.invertedIndexParser = selection.parser();
        // When table has no inverted index (invertedIndex == null), and analyzer is not explicitly
        // specified, we should pass "__default__" to BE to let it use default tokenization.
        // This enables match functions to work on tables without inverted index.
        // When user explicitly specifies "using analyzer none", or when index is configured with
        // parser=none, we should pass "none" to skip tokenization.
        if (invertedIndex == null && !selection.explicit()
                && InvertedIndexUtil.INVERTED_INDEX_PARSER_NONE.equalsIgnoreCase(selection.analyzer())) {
            // Table has no index, use default tokenization
            this.invertedIndexAnalyzerName = "__default__";
        } else {
            this.invertedIndexAnalyzerName = selection.analyzer();
            if (Strings.isNullOrEmpty(this.invertedIndexAnalyzerName)) {
                this.invertedIndexAnalyzerName = this.invertedIndexParser;
            }
        }

        // Extract additional index properties for thrift serialization
        this.invertedIndexParserMode = InvertedIndexUtil.getInvertedIndexParserMode(properties);
        this.invertedIndexCharFilter = InvertedIndexUtil.getInvertedIndexCharFilter(properties);
        this.invertedIndexParserLowercase = InvertedIndexUtil.getInvertedIndexParserLowercase(properties);
        this.invertedIndexParserStopwords = InvertedIndexUtil.getInvertedIndexParserStopwords(properties);

        if (!Strings.isNullOrEmpty(analyzer)) {
            this.explicitAnalyzer = analyzer.trim();
        }
        fn = new Function(new FunctionName(op.name), Lists.newArrayList(e1.getType(), e2.getType()), retType,
                false, true, nullableMode);
        this.nullable = nullable;
    }

    @Override
    public Expr clone() {
        return new MatchPredicate(this);
    }

    public Operator getOp() {
        return this.op;
    }

    @Override
    public boolean equals(Object obj) {
        if (!super.equals(obj)) {
            return false;
        }
        MatchPredicate other = (MatchPredicate) obj;
        return other.op == op
                && Objects.equals(explicitAnalyzer, other.explicitAnalyzer)
                && Objects.equals(invertedIndexAnalyzerName, other.invertedIndexAnalyzerName)
                && Objects.equals(invertedIndexParser, other.invertedIndexParser);
    }

    @Override
    public String toSqlImpl() {
        return getChild(0).toSql() + " " + op.toString() + " " + getChild(1).toSql()
                + analyzerSqlFragment();
    }

    @Override
    public String toSqlImpl(boolean disableTableName, boolean needExternalSql, TableType tableType,
            TableIf table) {
        return getChild(0).toSql(disableTableName, needExternalSql, tableType, table) + " " + op.toString() + " "
                + getChild(1).toSql(disableTableName, needExternalSql, tableType, table)
                + analyzerSqlFragment();
    }

    @Override
    protected void toThrift(TExprNode msg) {
        msg.node_type = TExprNodeType.MATCH_PRED;
        msg.setOpcode(op.getOpcode());
        // Use new TMatchPredicate constructor with required fields
        msg.match_predicate = new TMatchPredicate(invertedIndexParser, invertedIndexParserMode);
        msg.match_predicate.setCharFilterMap(invertedIndexCharFilter);
        msg.match_predicate.setParserLowercase(invertedIndexParserLowercase);
        msg.match_predicate.setParserStopwords(invertedIndexParserStopwords);
        msg.match_predicate.setAnalyzerName(invertedIndexAnalyzerName);
    }

    @Override
    public int hashCode() {
        return Objects.hash(super.hashCode(), op, explicitAnalyzer, invertedIndexAnalyzerName, invertedIndexParser);
    }

    private String analyzerSqlFragment() {
        if (explicitAnalyzer == null || explicitAnalyzer.isEmpty()) {
            return "";
        }
        if (explicitAnalyzer.matches("[A-Za-z_][A-Za-z0-9_]*")) {
            return " USING ANALYZER " + explicitAnalyzer;
        }
        return " USING ANALYZER '" + explicitAnalyzer.replace("'", "''") + "'";
    }
}