Analyzer.java
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
// This file is copied from
// https://github.com/apache/impala/blob/branch-2.9.0/fe/src/main/java/org/apache/impala/Analyzer.java
// and modified by Doris
package org.apache.doris.analysis;
import org.apache.doris.catalog.Env;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.common.ErrorCode;
import org.apache.doris.common.ErrorReport;
import org.apache.doris.common.IdGenerator;
import org.apache.doris.common.Pair;
import org.apache.doris.qe.ConnectContext;
import com.google.common.base.Preconditions;
import com.google.common.base.Strings;
import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Multimap;
import com.google.common.collect.Sets;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Set;
/**
* Repository of analysis state for single select block.
* <p/>
* All conjuncts are assigned a unique id when initially registered, and all
* registered conjuncts are referenced by their id (ie, there are no containers
* other than the one holding the referenced conjuncts), to make substitute()
* simple.
*/
public class Analyzer {
private static final Logger LOG = LogManager.getLogger(Analyzer.class);
// NOTE: Alias of table is case sensitive
// UniqueAlias used to check whether the table ref or the alias is unique
// table/view used db.table, inline use alias
private final Set<String> uniqueTableAliasSet = Sets.newHashSet();
private final Multimap<String, TupleDescriptor> tupleByAlias = ArrayListMultimap.create();
// map from tuple id to list of conjuncts referencing tuple
private final Map<TupleId, List<ExprId>> tuplePredicates = Maps.newHashMap();
// map from slot id to list of conjuncts referencing slot
private final Map<SlotId, List<ExprId>> slotPredicates = Maps.newHashMap();
// Current depth of nested analyze() calls. Used for enforcing a
// maximum expr-tree depth. Needs to be manually maintained by the user
// of this Analyzer with incrementCallDepth() and decrementCallDepth().
private int callDepth = 0;
// Flag indicating if this analyzer instance belongs to a subquery.
private boolean isSubquery = false;
private static class InferPredicateState {
// map from two table tuple ids to JoinOperator between two tables.
// NOTE: first tupleId's position in front of the second tupleId.
public final Map<Pair<TupleId, TupleId>, JoinOperator> anyTwoTalesJoinOperator = Maps.newHashMap();
// slotEqSlotExpr: Record existing and infer equivalent connections
private final List<Expr> onSlotEqSlotExpr = new ArrayList<>();
// slotEqSlotDeDuplication: De-Duplication for slotEqSlotExpr
private final Set<Pair<Expr, Expr>> onSlotEqSlotDeDuplication = Sets.newHashSet();
// slotToLiteralExpr: Record existing and infer expr which slot and literal are equal
private final List<Expr> onSlotToLiteralExpr = new ArrayList<>();
// slotToLiteralDeDuplication: De-Duplication for slotToLiteralExpr
private final Set<Pair<Expr, Expr>> onSlotToLiteralDeDuplication = Sets.newHashSet();
// inExpr: Recoud existing and infer expr which in predicate
private final List<Expr> onInExpr = new ArrayList<>();
// inExprDeDuplication: De-Duplication for inExpr
private final Set<Expr> onInDeDuplication = Sets.newHashSet();
// isNullExpr: Record existing and infer not null predicate
private final List<Expr> onIsNullExpr = new ArrayList<>();
//isNullDeDuplication: De-Duplication for isNullExpr
private final Set<Expr> onIsNullDeDuplication = Sets.newHashSet();
// slotToLiteralDeDuplication: De-Duplication for slotToLiteralExpr. Contain on and where.
private final Set<Pair<Expr, Expr>> globalSlotToLiteralDeDuplication = Sets.newHashSet();
// inExprDeDuplication: De-Duplication for inExpr. Contain on and where
private final Set<Expr> globalInDeDuplication = Sets.newHashSet();
public InferPredicateState() {
}
public InferPredicateState(InferPredicateState that) {
anyTwoTalesJoinOperator.putAll(that.anyTwoTalesJoinOperator);
onSlotEqSlotExpr.addAll(that.onSlotEqSlotExpr);
onSlotEqSlotDeDuplication.addAll(that.onSlotEqSlotDeDuplication);
onSlotToLiteralExpr.addAll(that.onSlotToLiteralExpr);
onSlotToLiteralDeDuplication.addAll(that.onSlotToLiteralDeDuplication);
onInExpr.addAll(that.onInExpr);
onInDeDuplication.addAll(that.onInDeDuplication);
onIsNullExpr.addAll(that.onIsNullExpr);
onIsNullDeDuplication.addAll(that.onIsNullDeDuplication);
globalSlotToLiteralDeDuplication.addAll(that.globalSlotToLiteralDeDuplication);
globalInDeDuplication.addAll(that.globalInDeDuplication);
}
}
// state shared between all objects of an Analyzer tree
// TODO: Many maps here contain properties about tuples, e.g., whether
// a tuple is outer/semi joined, etc. Remove the maps in favor of making
// them properties of the tuple descriptor itself.
private static class GlobalState {
private final DescriptorTable descTbl = new DescriptorTable();
private final Env env;
private final IdGenerator<ExprId> conjunctIdGenerator = ExprId.createGenerator();
private final ConnectContext context;
// Record the statement clazz that the analyzer would to analyze,
// this give an opportunity to control analyzing behavior according to the statement type.
public Class<? extends StatementBase> rootStatementClazz;
// all registered conjuncts (map from id to Predicate)
private final Map<ExprId, Expr> conjuncts = Maps.newHashMap();
// eqJoinConjuncts[tid] contains all conjuncts of the form
// "<lhs> = <rhs>" in which either lhs or rhs is fully bound by tid
// and the other side is not bound by tid (ie, predicates that express equi-join
// conditions between two tablerefs).
// A predicate such as "t1.a = t2.b" has two entries, one for 't1' and
// another one for 't2'.
private final Map<TupleId, List<ExprId>> eqJoinConjuncts = Maps.newHashMap();
// map from outer-joined tuple id, ie, one that is nullable in this select block,
// to the last Join clause (represented by its rhs table ref) that outer-joined it
private final Map<TupleId, TableRef> outerJoinedTupleIds = Maps.newHashMap();
// Map of registered conjunct to the last full outer join (represented by its
// rhs table ref) that outer joined it.
public final Map<ExprId, TableRef> fullOuterJoinedConjuncts = Maps.newHashMap();
// Map of full-outer-joined tuple id to the last full outer join that outer-joined it
public final Map<TupleId, TableRef> fullOuterJoinedTupleIds = Maps.newHashMap();
// map from slot id to the analyzer/block in which it was registered
private final Map<SlotId, Analyzer> blockBySlot = Maps.newHashMap();
private final Map<String, TupleDescriptor> markTuples = Maps.newHashMap();
private final Map<TableRef, TupleId> markTupleIdByInnerRef = Maps.newHashMap();
private final Set<TupleId> markTupleIdsNotProcessed = Sets.newHashSet();
public GlobalState(Env env, ConnectContext context) {
this.env = env;
this.context = context;
}
}
private final GlobalState globalState;
private final InferPredicateState inferPredicateState;
// An analyzer stores analysis state for a single select block. A select block can be
// a top level select statement, or an inline view select block.
// ancestors contains the Analyzers of the enclosing select blocks of 'this'
// (ancestors[0] contains the immediate parent, etc.).
private final ArrayList<Analyzer> ancestors;
// Map from lowercase table alias to descriptor. Tables without an explicit alias
// are assigned two implicit aliases: the unqualified and fully-qualified table name.
// Such tables have two entries pointing to the same descriptor. If an alias is
// ambiguous, then this map retains the first entry with that alias to simplify error
// checking (duplicate vs. ambiguous alias).
private final Map<String, TupleDescriptor> aliasMap = Maps.newHashMap();
// Map from tuple id to its corresponding table ref.
private final Map<TupleId, TableRef> tableRefMap = Maps.newHashMap();
// Set of lowercase ambiguous implicit table aliases.
private final Set<String> ambiguousAliases = Sets.newHashSet();
public Analyzer(Env env, ConnectContext context) {
ancestors = Lists.newArrayList();
globalState = new GlobalState(env, context);
inferPredicateState = new InferPredicateState();
}
/**
* Analyzer constructor for nested select block. Catalog and DescriptorTable
* is inherited from the parentAnalyzer.
*
* @param parentAnalyzer the analyzer of the enclosing select block
*/
public Analyzer(Analyzer parentAnalyzer) {
this(parentAnalyzer, parentAnalyzer.globalState, parentAnalyzer.inferPredicateState);
if (parentAnalyzer.isSubquery) {
this.isSubquery = true;
}
}
/**
* Analyzer constructor for nested select block with the specified global state.
*/
private Analyzer(Analyzer parentAnalyzer, GlobalState globalState, InferPredicateState inferPredicateState) {
ancestors = Lists.newArrayList(parentAnalyzer);
ancestors.addAll(parentAnalyzer.ancestors);
this.globalState = globalState;
this.inferPredicateState = new InferPredicateState(inferPredicateState);
}
public void setRootStatementClazz(Class<? extends StatementBase> statementClazz) {
globalState.rootStatementClazz = statementClazz;
}
public Class<? extends StatementBase> getRootStatementClazz() {
return globalState.rootStatementClazz;
}
public int incrementCallDepth() {
return ++callDepth;
}
public int decrementCallDepth() {
return --callDepth;
}
public int getCallDepth() {
return callDepth;
}
/**
* Creates an returns an empty TupleDescriptor for the given table ref and registers
* it against all its legal aliases. For tables refs with an explicit alias, only the
* explicit alias is legal. For tables refs with no explicit alias, the fully-qualified
* and unqualified table names are legal aliases. Column references against unqualified
* implicit aliases can be ambiguous, therefore, we register such ambiguous aliases
* here. Requires that all views have been substituted.
* Throws if an existing explicit alias or implicit fully-qualified alias
* has already been registered for another table ref.
*/
public TupleDescriptor registerTableRef(TableRef ref) throws AnalysisException {
String uniqueAlias = ref.getUniqueAlias();
if (uniqueTableAliasSet.contains(uniqueAlias)) {
ErrorReport.reportAnalysisException(ErrorCode.ERR_NONUNIQ_TABLE, uniqueAlias);
}
uniqueTableAliasSet.add(uniqueAlias);
// If ref has no explicit alias, then the unqualified and the fully-qualified table
// names are legal implicit aliases. Column references against unqualified implicit
// aliases can be ambiguous, therefore, we register such ambiguous aliases here.
String unqualifiedAlias = null;
String[] aliases = ref.getAliases();
if (aliases.length > 1) {
unqualifiedAlias = aliases[1];
TupleDescriptor tupleDesc = aliasMap.get(unqualifiedAlias);
if (tupleDesc != null) {
if (tupleDesc.hasExplicitAlias()) {
ErrorReport.reportAnalysisException(ErrorCode.ERR_NONUNIQ_TABLE, uniqueAlias);
} else {
ambiguousAliases.add(unqualifiedAlias);
}
}
}
// Delegate creation of the tuple descriptor to the concrete table ref.
TupleDescriptor result = ref.createTupleDescriptor(this);
result.setRef(ref);
result.setAliases(aliases, ref.hasExplicitAlias());
// Register all legal aliases.
for (String alias : aliases) {
// TODO(zc)
// aliasMap_.put(alias, result);
tupleByAlias.put(alias, result);
}
tableRefMap.put(result.getId(), ref);
// for mark join, init three context
// 1. markTuples to records all tuples belong to mark slot
// 2. markTupleIdByInnerRef to records relationship between inner table of mark join and the mark tuple
// 3. markTupleIdsNotProcessed to records un-process mark tuple id. if an expr contains slot belong to
// the un-process mark tuple, it should not assign to current join node and should pop up its
// mark slot until all mark tuples in this expr has been processed.
if (ref.getJoinOp() != null && ref.isMark()) {
TupleDescriptor markTuple = getDescTbl().createTupleDescriptor();
markTuple.setAliases(new String[]{ref.getMarkTupleName()}, true);
globalState.markTuples.put(ref.getMarkTupleName(), markTuple);
globalState.markTupleIdByInnerRef.put(ref, markTuple.getId());
globalState.markTupleIdsNotProcessed.add(markTuple.getId());
}
return result;
}
/**
* Creates a new slot descriptor and related state in globalState.
*/
public SlotDescriptor addSlotDescriptor(TupleDescriptor tupleDesc) {
SlotDescriptor result = globalState.descTbl.addSlotDescriptor(tupleDesc);
globalState.blockBySlot.put(result.getId(), this);
return result;
}
/**
* Register conjuncts that are outer joined by a full outer join. For a given
* predicate, we record the last full outer join that outer-joined any of its
* tuple ids. We need this additional information because full-outer joins obey
* different rules with respect to predicate pushdown compared to left and right
* outer joins.
*/
public void registerFullOuterJoinedConjunct(Expr e) {
Preconditions.checkState(
!globalState.fullOuterJoinedConjuncts.containsKey(e.getId()));
List<TupleId> tids = Lists.newArrayList();
e.getIds(tids, null);
for (TupleId tid : tids) {
if (!globalState.fullOuterJoinedTupleIds.containsKey(tid)) {
continue;
}
TableRef currentOuterJoin = globalState.fullOuterJoinedTupleIds.get(tid);
globalState.fullOuterJoinedConjuncts.put(e.getId(), currentOuterJoin);
break;
}
if (LOG.isDebugEnabled()) {
LOG.debug("registerFullOuterJoinedConjunct: " + globalState.fullOuterJoinedConjuncts);
}
}
/**
* Register individual conjunct with all tuple and slot ids it references
* and with the global conjunct list.
*/
private void registerConjunct(Expr e) {
// this conjunct would already have an id assigned if it is being re-registered
// in a subquery analyzer
e.setId(globalState.conjunctIdGenerator.getNextId());
globalState.conjuncts.put(e.getId(), e);
// LOG.info("registered conjunct " + p.getId().toString() + ": " + p.toSql());
ArrayList<TupleId> tupleIds = Lists.newArrayList();
ArrayList<SlotId> slotIds = Lists.newArrayList();
e.getIds(tupleIds, slotIds);
// register full join conjuncts
registerFullOuterJoinedConjunct(e);
// update tuplePredicates
for (TupleId id : tupleIds) {
if (!tuplePredicates.containsKey(id)) {
List<ExprId> conjunctIds = Lists.newArrayList();
conjunctIds.add(e.getId());
tuplePredicates.put(id, conjunctIds);
} else {
tuplePredicates.get(id).add(e.getId());
}
}
// update slotPredicates
for (SlotId id : slotIds) {
if (!slotPredicates.containsKey(id)) {
List<ExprId> conjunctIds = Lists.newArrayList();
conjunctIds.add(e.getId());
slotPredicates.put(id, conjunctIds);
} else {
slotPredicates.get(id).add(e.getId());
}
}
// check whether this is an equi-join predicate, ie, something of the
// form <expr1> = <expr2> where at least one of the exprs is bound by
// exactly one tuple id
if (!(e instanceof BinaryPredicate)) {
return;
}
BinaryPredicate binaryPred = (BinaryPredicate) e;
if (!binaryPred.getOp().isEquivalence()) {
return;
}
if (tupleIds.size() < 2) {
return;
}
// examine children and update eqJoinConjuncts
for (int i = 0; i < 2; ++i) {
List<TupleId> lhsTupleIds = Lists.newArrayList();
binaryPred.getChild(i).getIds(lhsTupleIds, null);
if (lhsTupleIds.size() == 1) {
if (!globalState.eqJoinConjuncts.containsKey(lhsTupleIds.get(0))) {
List<ExprId> conjunctIds = Lists.newArrayList();
conjunctIds.add(e.getId());
globalState.eqJoinConjuncts.put(lhsTupleIds.get(0), conjunctIds);
} else {
globalState.eqJoinConjuncts.get(lhsTupleIds.get(0)).add(e.getId());
}
binaryPred.setIsEqJoinConjunct(true);
}
}
}
/**
* Create and register an auxiliary predicate to express an equivalence
* between two exprs (BinaryPredicate with EQ); this predicate does not need
* to be assigned, but it's used for equivalence class computation. Does
* nothing if the lhs or rhs expr are NULL. Registering an equivalence with
* NULL would be incorrect, because <expr> = NULL is false (even NULL =
* NULL).
*/
public void createAuxEquivPredicate(Expr lhs, Expr rhs) {
// Check the expr type as well as the class because NullLiteral could
// have been
// implicitly cast to a type different than NULL.
if (lhs instanceof NullLiteral || rhs instanceof NullLiteral
|| lhs.getType().isNull() || rhs.getType().isNull()) {
return;
}
// create an eq predicate between lhs and rhs
BinaryPredicate p = new BinaryPredicate(BinaryPredicate.Operator.EQ, lhs, rhs);
p.setIsAuxExpr();
if (LOG.isDebugEnabled()) {
LOG.debug("register equiv predicate: " + p.toSql() + " " + p.debugString());
}
registerConjunct(p);
}
/**
* Returns the fully-qualified table name of tableName. If tableName
* is already fully qualified, returns tableName.
*/
public TableName getFqTableName(TableName tableName) {
if (Strings.isNullOrEmpty(tableName.getCtl())) {
tableName.setCtl(getDefaultCatalog());
}
if (Strings.isNullOrEmpty(tableName.getDb())) {
tableName.setDb(getDefaultDb());
}
return tableName;
}
public DescriptorTable getDescTbl() {
return globalState.descTbl;
}
public Env getEnv() {
return globalState.env;
}
public Set<String> getAliases() {
return uniqueTableAliasSet;
}
/**
* return equal conjuncts, used by OlapScanNode.normalizePredicate and SelectStmt.reorderTable
*/
public List<Expr> getEqJoinConjuncts(TupleId id) {
final List<ExprId> conjunctIds = globalState.eqJoinConjuncts.get(id);
if (conjunctIds == null) {
return Lists.newArrayList();
}
final List<Expr> result = Lists.newArrayList();
for (ExprId conjunctId : conjunctIds) {
final Expr e = globalState.conjuncts.get(conjunctId);
Preconditions.checkState(e != null);
result.add(e);
}
return result;
}
public String getDefaultCatalog() {
return globalState.context.getDefaultCatalog();
}
public String getDefaultDb() {
return globalState.context.getDatabase();
}
public String getQualifiedUser() {
return globalState.context.getQualifiedUser();
}
// TODO: `globalState.context` could be null, refactor return value type to
// `Optional<ConnectContext>`.
public ConnectContext getContext() {
return globalState.context;
}
/**
* Change all outer joined slots to nullable
* Returns the slots actually be changed from not nullable to nullable
*/
public List<SlotDescriptor> changeSlotToNullableOfOuterJoinedTuples() {
List<SlotDescriptor> result = new ArrayList<>();
for (TupleId id : globalState.outerJoinedTupleIds.keySet()) {
TupleDescriptor tupleDescriptor = globalState.descTbl.getTupleDesc(id);
if (tupleDescriptor != null) {
for (SlotDescriptor desc : tupleDescriptor.getSlots()) {
if (!desc.getIsNullable()) {
desc.setIsNullable(true);
result.add(desc);
}
}
}
}
return result;
}
public void changeSlotsToNotNullable(List<SlotDescriptor> slots) {
for (SlotDescriptor slot : slots) {
slot.setIsNullable(false);
}
}
}