MatchPredicate.java

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.

package org.apache.doris.analysis;

import org.apache.doris.catalog.ArrayType;
import org.apache.doris.catalog.Function;
import org.apache.doris.catalog.Function.NullableMode;
import org.apache.doris.catalog.FunctionSet;
import org.apache.doris.catalog.Index;
import org.apache.doris.catalog.OlapTable;
import org.apache.doris.catalog.ScalarFunction;
import org.apache.doris.catalog.Type;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.thrift.TExprNode;
import org.apache.doris.thrift.TExprNodeType;
import org.apache.doris.thrift.TExprOpcode;
import org.apache.doris.thrift.TMatchPredicate;

import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import com.google.gson.annotations.SerializedName;

import java.util.List;
import java.util.Map;
import java.util.Objects;

/**
 * filed MATCH query_str
 */
public class MatchPredicate extends Predicate {

    public enum Operator {
        MATCH_ANY("MATCH_ANY", "match_any", TExprOpcode.MATCH_ANY),
        MATCH_ALL("MATCH_ALL", "match_all", TExprOpcode.MATCH_ALL),
        MATCH_PHRASE("MATCH_PHRASE", "match_phrase", TExprOpcode.MATCH_PHRASE),
        MATCH_PHRASE_PREFIX("MATCH_PHRASE_PREFIX", "match_phrase_prefix", TExprOpcode.MATCH_PHRASE_PREFIX),
        MATCH_REGEXP("MATCH_REGEXP", "match_regexp", TExprOpcode.MATCH_REGEXP),
        MATCH_PHRASE_EDGE("MATCH_PHRASE_EDGE", "match_phrase_edge", TExprOpcode.MATCH_PHRASE_EDGE);

        private final String description;
        private final String name;
        private final TExprOpcode opcode;

        Operator(String description,
                 String name,
                 TExprOpcode opcode) {
            this.description = description;
            this.name = name;
            this.opcode = opcode;
        }

        @Override
        public String toString() {
            return description;
        }

        public String getName() {
            return name;
        }

        public TExprOpcode getOpcode() {
            return opcode;
        }
    }

    public static void initBuiltins(FunctionSet functionSet) {
        String symbolNotUsed = "symbol_not_used";

        for (Type t : Type.getStringTypes()) {
            functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator(
                    Operator.MATCH_ANY.getName(),
                    symbolNotUsed,
                    Lists.<Type>newArrayList(t, t),
                    Type.BOOLEAN));
            functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator(
                    Operator.MATCH_ANY.getName(),
                    symbolNotUsed,
                    Lists.<Type>newArrayList(new ArrayType(t), t),
                    Type.BOOLEAN));

            functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator(
                    Operator.MATCH_ALL.getName(),
                    symbolNotUsed,
                    Lists.<Type>newArrayList(t, t),
                    Type.BOOLEAN));
            functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator(
                    Operator.MATCH_ALL.getName(),
                    symbolNotUsed,
                    Lists.<Type>newArrayList(new ArrayType(t), t),
                    Type.BOOLEAN));

            functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator(
                    Operator.MATCH_PHRASE.getName(),
                    symbolNotUsed,
                    Lists.<Type>newArrayList(t, t),
                    Type.BOOLEAN));
            functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator(
                    Operator.MATCH_PHRASE.getName(),
                    symbolNotUsed,
                    Lists.<Type>newArrayList(new ArrayType(t), t),
                    Type.BOOLEAN));
            functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator(
                    Operator.MATCH_PHRASE_PREFIX.getName(),
                    symbolNotUsed,
                    Lists.<Type>newArrayList(t, t),
                    Type.BOOLEAN));
            functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator(
                    Operator.MATCH_PHRASE_PREFIX.getName(),
                    symbolNotUsed,
                    Lists.<Type>newArrayList(new ArrayType(t), t),
                    Type.BOOLEAN));
            functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator(
                    Operator.MATCH_REGEXP.getName(),
                    symbolNotUsed,
                    Lists.<Type>newArrayList(t, t),
                    Type.BOOLEAN));
            functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator(
                    Operator.MATCH_REGEXP.getName(),
                    symbolNotUsed,
                    Lists.<Type>newArrayList(new ArrayType(t), t),
                    Type.BOOLEAN));
            functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator(
                    Operator.MATCH_PHRASE_EDGE.getName(),
                    symbolNotUsed,
                    Lists.<Type>newArrayList(t, t),
                    Type.BOOLEAN));
            functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator(
                    Operator.MATCH_PHRASE_EDGE.getName(),
                    symbolNotUsed,
                    Lists.<Type>newArrayList(new ArrayType(t), t),
                    Type.BOOLEAN));
        }
    }

    @SerializedName("op")
    private Operator op;
    private String invertedIndexParser;
    private String invertedIndexParserMode;
    private Map<String, String> invertedIndexCharFilter;
    private boolean invertedIndexParserLowercase = true;
    private String invertedIndexParserStopwords = "";

    private MatchPredicate() {
        // use for serde only
        invertedIndexParser = InvertedIndexUtil.INVERTED_INDEX_PARSER_UNKNOWN;
        invertedIndexParserMode = InvertedIndexUtil.INVERTED_INDEX_PARSER_FINE_GRANULARITY;
    }

    public MatchPredicate(Operator op, Expr e1, Expr e2) {
        super();
        this.op = op;
        Preconditions.checkNotNull(e1);
        children.add(e1);
        Preconditions.checkNotNull(e2);
        children.add(e2);
        // TODO: Calculate selectivity
        selectivity = Expr.DEFAULT_SELECTIVITY;
        invertedIndexParser = InvertedIndexUtil.INVERTED_INDEX_PARSER_UNKNOWN;
        invertedIndexParserMode = InvertedIndexUtil.INVERTED_INDEX_PARSER_FINE_GRANULARITY;
    }

    protected MatchPredicate(MatchPredicate other) {
        super(other);
        op = other.op;
        invertedIndexParser = other.invertedIndexParser;
        invertedIndexParserMode = other.invertedIndexParserMode;
        invertedIndexCharFilter = other.invertedIndexCharFilter;
        invertedIndexParserLowercase = other.invertedIndexParserLowercase;
        invertedIndexParserStopwords = other.invertedIndexParserStopwords;
    }

    /**
     * use for Nereids ONLY
     */
    public MatchPredicate(Operator op, Expr e1, Expr e2, Type retType,
            NullableMode nullableMode, Index invertedIndex) {
        this(op, e1, e2);
        if (invertedIndex != null) {
            this.invertedIndexParser = invertedIndex.getInvertedIndexParser();
            this.invertedIndexParserMode = invertedIndex.getInvertedIndexParserMode();
            this.invertedIndexCharFilter = invertedIndex.getInvertedIndexCharFilter();
            this.invertedIndexParserLowercase = invertedIndex.getInvertedIndexParserLowercase();
            this.invertedIndexParserStopwords = invertedIndex.getInvertedIndexParserStopwords();
        }
        fn = new Function(new FunctionName(op.name), Lists.newArrayList(e1.getType(), e2.getType()), retType,
                false, true, nullableMode);
    }

    @Override
    public Expr clone() {
        return new MatchPredicate(this);
    }

    public Operator getOp() {
        return this.op;
    }

    @Override
    public boolean equals(Object obj) {
        if (!super.equals(obj)) {
            return false;
        }
        return ((MatchPredicate) obj).op == op;
    }

    @Override
    public String toSqlImpl() {
        return getChild(0).toSql() + " " + op.toString() + " " + getChild(1).toSql();
    }

    @Override
    protected void toThrift(TExprNode msg) {
        msg.node_type = TExprNodeType.MATCH_PRED;
        msg.setOpcode(op.getOpcode());
        msg.match_predicate = new TMatchPredicate(invertedIndexParser, invertedIndexParserMode);
        msg.match_predicate.setCharFilterMap(invertedIndexCharFilter);
        msg.match_predicate.setParserLowercase(invertedIndexParserLowercase);
        msg.match_predicate.setParserStopwords(invertedIndexParserStopwords);
    }

    @Override
    public void analyzeImpl(Analyzer analyzer) throws AnalysisException {
        super.analyzeImpl(analyzer);
        if (getChild(0).getType().isObjectStored()) {
            throw new AnalysisException(
                    "left operand of " + op.toString() + " must not be Bitmap or HLL: " + toSql());
        }

        if (!getChild(0).getType().isStringType() && !getChild(0).getType().isArrayType()
                    && !getChild(0).getType().isVariantType()) {
            throw new AnalysisException(
                    "left operand of " + op.toString() + " must be of type STRING, ARRAY or VARIANT: " + toSql());
        }

        fn = getBuiltinFunction(op.toString(),
                collectChildReturnTypes(), Function.CompareMode.IS_NONSTRICT_SUPERTYPE_OF);
        if (fn == null) {
            throw new AnalysisException(
                    "no function found for " + op.toString() + "," + toSql());
        }
        Expr e1 = getChild(0);
        Expr e2 = getChild(1);

        // CAST variant to right expr type
        if (e1.type.isVariantType()) {
            setChild(0, e1.castTo(e2.getType()));
        }

        if (e1 instanceof SlotRef) {
            SlotRef slotRef = (SlotRef) e1;
            SlotDescriptor slotDesc = slotRef.getDesc();
            if (slotDesc != null && slotDesc.isScanSlot()) {
                TupleDescriptor slotParent = slotDesc.getParent();
                OlapTable olapTbl = (OlapTable) slotParent.getTable();
                List<Index> indexes = olapTbl.getIndexes();
                for (Index index : indexes) {
                    if (index.getIndexType() == IndexDef.IndexType.INVERTED) {
                        List<String> columns = index.getColumns();
                        if (slotRef.getColumnName().equals(columns.get(0))) {
                            invertedIndexParser = index.getInvertedIndexParser();
                            invertedIndexParserMode = index.getInvertedIndexParserMode();
                            invertedIndexCharFilter = index.getInvertedIndexCharFilter();
                            invertedIndexParserLowercase = index.getInvertedIndexParserLowercase();
                            invertedIndexParserStopwords = index.getInvertedIndexParserStopwords();
                            break;
                        }
                    }
                }
            }
        }
    }

    @Override
    public int hashCode() {
        return 31 * super.hashCode() + Objects.hashCode(op);
    }
}