InPredicate.java
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
// This file is copied from
// https://github.com/apache/impala/blob/branch-2.9.0/fe/src/main/java/org/apache/impala/InPredicate.java
// and modified by Doris
package org.apache.doris.analysis;
import org.apache.doris.catalog.Function;
import org.apache.doris.catalog.Function.NullableMode;
import org.apache.doris.catalog.FunctionSet;
import org.apache.doris.catalog.PrimitiveType;
import org.apache.doris.catalog.ScalarFunction;
import org.apache.doris.catalog.Type;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.common.Reference;
import org.apache.doris.thrift.TExprNode;
import org.apache.doris.thrift.TExprNodeType;
import org.apache.doris.thrift.TExprOpcode;
import org.apache.doris.thrift.TInPredicate;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import com.google.gson.annotations.SerializedName;
import java.util.ArrayList;
import java.util.List;
/**
* Class representing a [NOT] IN predicate. It determines if a specified value
* (first child) matches any value in a subquery (second child) or a list
* of values (remaining children).
*/
public class InPredicate extends Predicate {
private static final String IN_SET_LOOKUP = "in_set_lookup";
private static final String NOT_IN_SET_LOOKUP = "not_in_set_lookup";
private static final String IN_ITERATE = "in_iterate";
private static final String NOT_IN_ITERATE = "not_in_iterate";
@SerializedName("ini")
private boolean isNotIn;
private static final NullLiteral NULL_LITERAL = new NullLiteral();
public static void initBuiltins(FunctionSet functionSet) {
for (Type t : Type.getSupportedTypes()) {
if (t.isNull()) {
continue;
}
// TODO we do not support codegen for CHAR and the In predicate must be
// codegened
// because it has variable number of arguments. This will force CHARs to be
// cast up to strings; meaning that "in" comparisons will not have CHAR
// comparison
// semantics.
if (t.getPrimitiveType() == PrimitiveType.CHAR) {
continue;
}
functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltin(IN_ITERATE,
Type.BOOLEAN, Lists.newArrayList(t, t), true,
null, null, null, false));
functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltin(NOT_IN_ITERATE,
Type.BOOLEAN, Lists.newArrayList(t, t), true,
null, null, null, false));
functionSet.addBuiltin(ScalarFunction.createBuiltin(IN_SET_LOOKUP,
Type.BOOLEAN, Lists.newArrayList(t, t), true,
null, null, null, false));
functionSet.addBuiltin(ScalarFunction.createBuiltin(NOT_IN_SET_LOOKUP,
Type.BOOLEAN, Lists.newArrayList(t, t), true,
null, null, null, false));
}
}
private InPredicate() {
// use for serde only
}
// First child is the comparison expr for which we
// should check membership in the inList (the remaining children).
public InPredicate(Expr compareExpr, List<Expr> inList, boolean isNotIn) {
children.add(compareExpr);
children.addAll(inList);
this.isNotIn = isNotIn;
}
/**
* use for Nereids ONLY
*/
public InPredicate(Expr compareExpr, List<Expr> inList, boolean isNotIn, boolean allConstant) {
this(compareExpr, inList, isNotIn);
type = Type.BOOLEAN;
if (allConstant) {
opcode = isNotIn ? TExprOpcode.FILTER_NOT_IN : TExprOpcode.FILTER_IN;
} else {
opcode = isNotIn ? TExprOpcode.FILTER_NEW_NOT_IN : TExprOpcode.FILTER_NEW_IN;
fn = new Function(new FunctionName(isNotIn ? NOT_IN_ITERATE : IN_ITERATE),
Lists.newArrayList(getChild(0).getType(), getChild(1).getType()), Type.BOOLEAN,
true, true, NullableMode.DEPEND_ON_ARGUMENT);
}
}
protected InPredicate(InPredicate other) {
super(other);
isNotIn = other.isNotIn();
}
public int getInElementNum() {
// the first child is compare expr
return getChildren().size() - 1;
}
@Override
public InPredicate clone() {
return new InPredicate(this);
}
// C'tor for initializing an [NOT] IN predicate with a subquery child.
public InPredicate(Expr compareExpr, Expr subquery, boolean isNotIn) {
Preconditions.checkNotNull(compareExpr);
Preconditions.checkNotNull(subquery);
children.add(compareExpr);
children.add(subquery);
this.isNotIn = isNotIn;
}
/**
* Negates an InPredicate.
*/
@Override
public Expr negate() {
return new InPredicate(getChild(0), children.subList(1, children.size()), !isNotIn);
}
public List<Expr> getListChildren() {
return children.subList(1, children.size());
}
public boolean isNotIn() {
return isNotIn;
}
public boolean isLiteralChildren() {
for (int i = 1; i < children.size(); ++i) {
if (!(children.get(i) instanceof LiteralExpr)) {
return false;
}
}
return true;
}
@Override
public void analyzeImpl(Analyzer analyzer) throws AnalysisException {
super.analyzeImpl(analyzer);
this.checkIncludeBitmap();
if (contains(Subquery.class)) {
// An [NOT] IN predicate with a subquery must contain two children, the second
// of
// which is a Subquery.
if (children.size() != 2 || !(getChild(1) instanceof Subquery)) {
throw new AnalysisException("Unsupported IN predicate with a subquery: " + toSql());
}
Subquery subquery = (Subquery) getChild(1);
if (!subquery.returnsScalarColumn()) {
throw new AnalysisException("Subquery must return a single column: " + subquery.toSql());
}
// Ensure that the column in the lhs of the IN predicate and the result of
// the subquery are type compatible. No need to perform any
// casting at this point. Any casting needed will be performed when the
// subquery is unnested.
ArrayList<Expr> subqueryExprs = subquery.getStatement().getResultExprs();
Expr compareExpr = children.get(0);
Expr subqueryExpr = subqueryExprs.get(0);
if (subqueryExpr.getType().isBitmapType()) {
if (!compareExpr.getType().isIntegerType()) {
throw new AnalysisException(
String.format("Incompatible return types '%s' and '%s' of exprs '%s' and '%s'.",
compareExpr.getType().toSql(), subqueryExpr.getType().toSql(), compareExpr.toSql(),
subqueryExpr.toSql()));
}
if (!compareExpr.getType().isBigIntType()) {
children.set(0, compareExpr.castTo(Type.BIGINT));
}
} else {
analyzer.getCompatibleType(compareExpr.getType(), compareExpr, subqueryExpr);
}
} else {
analyzer.castAllToCompatibleType(children);
}
boolean allConstant = true;
for (int i = 1; i < children.size(); ++i) {
if (!children.get(i).isConstant()) {
allConstant = false;
break;
}
}
boolean useSetLookup = allConstant;
// Only lookup fn_ if all subqueries have been rewritten. If the second child is
// a
// subquery, it will have type ArrayType, which cannot be resolved to a builtin
// function and will fail analysis.
Type[] argTypes = { getChild(0).type, getChild(1).type };
if (useSetLookup) {
// fn = getBuiltinFunction(analyzer, isNotIn ? NOT_IN_SET_LOOKUP :
// IN_SET_LOOKUP,
// argTypes, Function.CompareMode.IS_NONSTRICT_SUPERTYPE_OF);
opcode = isNotIn ? TExprOpcode.FILTER_NOT_IN : TExprOpcode.FILTER_IN;
} else {
fn = getBuiltinFunction(isNotIn ? NOT_IN_ITERATE : IN_ITERATE,
argTypes, Function.CompareMode.IS_NONSTRICT_SUPERTYPE_OF);
opcode = isNotIn ? TExprOpcode.FILTER_NEW_NOT_IN : TExprOpcode.FILTER_NEW_IN;
}
Reference<SlotRef> slotRefRef = new Reference<SlotRef>();
Reference<Integer> idxRef = new Reference<Integer>();
if (isSingleColumnPredicate(slotRefRef, idxRef)
&& idxRef.getRef() == 0 && slotRefRef.getRef().getNumDistinctValues() > 0) {
selectivity = (double) (getChildren().size() - 1) / (double) slotRefRef.getRef()
.getNumDistinctValues();
selectivity = Math.max(0.0, Math.min(1.0, selectivity));
} else {
selectivity = Expr.DEFAULT_SELECTIVITY;
}
}
public InPredicate union(InPredicate inPredicate) {
Preconditions.checkState(inPredicate.isLiteralChildren());
Preconditions.checkState(this.isLiteralChildren());
Preconditions.checkState(getChild(0).equals(inPredicate.getChild(0)));
List<Expr> unionChildren = new ArrayList<>(getListChildren());
unionChildren.removeAll(inPredicate.getListChildren());
unionChildren.addAll(inPredicate.getListChildren());
InPredicate union = new InPredicate(getChild(0), unionChildren, isNotIn);
return union;
}
public InPredicate intersection(InPredicate inPredicate) {
Preconditions.checkState(inPredicate.isLiteralChildren());
Preconditions.checkState(this.isLiteralChildren());
Preconditions.checkState(getChild(0).equals(inPredicate.getChild(0)));
List<Expr> intersectChildren = new ArrayList<>(getListChildren());
intersectChildren.retainAll(inPredicate.getListChildren());
InPredicate intersection = new InPredicate(getChild(0), intersectChildren, isNotIn);
return intersection;
}
@Override
protected void toThrift(TExprNode msg) {
// Can't serialize a predicate with a subquery
Preconditions.checkState(!contains(Subquery.class));
msg.in_predicate = new TInPredicate(isNotIn);
msg.node_type = TExprNodeType.IN_PRED;
msg.setOpcode(opcode);
}
@Override
public String toSqlImpl() {
StringBuilder strBuilder = new StringBuilder();
String notStr = (isNotIn) ? "NOT " : "";
strBuilder.append(getChild(0).toSql() + " " + notStr + "IN (");
for (int i = 1; i < children.size(); ++i) {
strBuilder.append(getChild(i).toSql());
strBuilder.append((i + 1 != children.size()) ? ", " : "");
}
strBuilder.append(")");
return strBuilder.toString();
}
@Override
public String toDigestImpl() {
StringBuilder strBuilder = new StringBuilder();
String notStr = (isNotIn) ? "NOT " : "";
strBuilder.append(getChild(0).toDigest() + " " + notStr + "IN (");
for (int i = 1; i < children.size(); ++i) {
strBuilder.append(getChild(i).toDigest());
strBuilder.append((i + 1 != children.size()) ? ", " : "");
}
strBuilder.append(")");
return strBuilder.toString();
}
@Override
public String toString() {
return toSql();
}
@Override
public Expr getResultValue(boolean forPushDownPredicatesToView) throws AnalysisException {
recursiveResetChildrenResult(forPushDownPredicatesToView);
final Expr leftChildValue = getChild(0);
if (!(leftChildValue instanceof LiteralExpr) || !isLiteralChildren()) {
return this;
}
if (leftChildValue instanceof NullLiteral) {
return leftChildValue;
}
List<Expr> inListChildren = children.subList(1, children.size());
boolean containsLeftChild = inListChildren.contains(leftChildValue);
// See QueryPlanTest.java testConstantInPredicate() for examples.
// This logic should be same as logic in in_predicate.cpp: get_boolean_val()
if (containsLeftChild) {
return new BoolLiteral(!isNotIn);
}
if (inListChildren.contains(NULL_LITERAL)) {
return new NullLiteral();
}
return new BoolLiteral(isNotIn);
}
@Override
public boolean equals(Object obj) {
if (super.equals(obj)) {
InPredicate expr = (InPredicate) obj;
if (isNotIn == expr.isNotIn) {
return true;
}
}
return false;
}
@Override
public boolean isNullable() {
return hasNullableChild();
}
}