TableFunctionNode.java

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.

package org.apache.doris.planner;

import org.apache.doris.analysis.Analyzer;
import org.apache.doris.analysis.Expr;
import org.apache.doris.analysis.LateralViewRef;
import org.apache.doris.analysis.SelectStmt;
import org.apache.doris.analysis.SlotId;
import org.apache.doris.analysis.SlotRef;
import org.apache.doris.analysis.TupleId;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.common.UserException;
import org.apache.doris.statistics.StatisticalType;
import org.apache.doris.statistics.StatsRecursiveDerive;
import org.apache.doris.thrift.TExplainLevel;
import org.apache.doris.thrift.TPlanNode;
import org.apache.doris.thrift.TPlanNodeType;
import org.apache.doris.thrift.TTableFunctionNode;

import com.google.common.collect.Lists;
import com.google.common.collect.Sets;

import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;

public class TableFunctionNode extends PlanNode {
    private List<LateralViewRef> lateralViewRefs;
    private ArrayList<Expr> fnCallExprList;
    private List<TupleId> lateralViewTupleIds;

    // The output slot ids of TableFunctionNode
    // Only the slot whose id is in this list will be output by TableFunctionNode
    private List<SlotId> outputSlotIds = Lists.newArrayList();

    public TableFunctionNode(PlanNodeId id, PlanNode inputNode, TupleId lateralViewTupleId,
            ArrayList<Expr> fnCallExprList, List<SlotId> outputSlotIds) {
        super(id, "TABLE FUNCTION NODE", StatisticalType.TABLE_FUNCTION_NODE);
        if (inputNode.outputTupleDesc != null) {
            tupleIds.add(inputNode.outputTupleDesc.getId());
        } else {
            List<TupleId> childOutputTupleIds = inputNode.getOutputTupleIds();
            if (childOutputTupleIds != null && !childOutputTupleIds.isEmpty()) {
                tupleIds.addAll(childOutputTupleIds);
            } else {
                tupleIds.addAll(inputNode.getTupleIds());
            }
        }
        tupleIds.add(lateralViewTupleId);
        this.lateralViewTupleIds = Lists.newArrayList(lateralViewTupleId);
        this.fnCallExprList = fnCallExprList;
        this.outputSlotIds = outputSlotIds;
        this.children.add(inputNode);
    }

    protected TableFunctionNode(PlanNodeId id, PlanNode inputNode, List<LateralViewRef> lateralViewRefs) {
        super(id, "TABLE FUNCTION NODE", StatisticalType.TABLE_FUNCTION_NODE);
        tupleIds.addAll(inputNode.getOutputTupleIds());
        tblRefIds.addAll(inputNode.getTupleIds());
        tblRefIds.addAll(inputNode.getTblRefIds());
        lateralViewTupleIds = lateralViewRefs.stream().map(e -> e.getDesc().getId())
                .collect(Collectors.toList());
        tupleIds.addAll(lateralViewTupleIds);
        tblRefIds.addAll(lateralViewTupleIds);
        children.add(inputNode);
        this.lateralViewRefs = lateralViewRefs;
    }

    public void setOutputSlotIds(List<SlotId> outputSlotIds) {
        this.outputSlotIds = outputSlotIds;
    }

    /**
     * This function is mainly used to calculate @outputSlotIds.
     * After the PlanNode executes the @fnCallExpr,
     * it needs to perform projection operation.
     * This function is used to calculate which columns should be projected.
     * The slot belongs to outputSlotIds should be retained after the projection is completed.
     * Slots in selectItems and unassigned predicates should be projected.
     * <p>
     * Case1: The slot belongs to selectItems. The outputSlotIds should include it.
     * For example:
     * Query: select k1, v1 from table lateral view explode_split(v1, ",") t1 as c1;
     * The outputSlots: [k1, v1, c1]
     * <p>
     * Case2: The slot belongs to where clause and the predicate has not been assigned.
     * Query: select k1 from table a lateral view explode_split(v1, ",") t1 as c1, table b where a.v1=b.v1;
     * The outputSlots: [a.k1, a.v1, t1.c1]
     * <p>
     * Case3: The slot neither is part of the unassigned predicate, nor appears in the selectItems.
     * Query: select k1 from table a lateral view explode_split(v1, ",") t1 as c1;
     * The outputSlots: [k1, c1]
     */
    // TODO(ml): Unified to projectplanner
    public void projectSlots(Analyzer analyzer, SelectStmt selectStmt) throws AnalysisException {
        // TODO(ml): Support project calculations that include aggregation and sorting in select stmt
        if ((selectStmt.hasAggInfo() || selectStmt.getSortInfo() != null || selectStmt.hasAnalyticInfo())
                && selectStmt.hasInlineView()) {
            // The query must be rewritten like TableFunctionPlanTest.aggColumnInOuterQuery()
            throw new AnalysisException("Please treat the query containing the lateral view as a inline view"
                    + "and extract your aggregation/sort/window functions to the outer query."
                    + "For example select sum(a) from (select a from table lateral view xxx) tmp1");
        }
        Set<SlotRef> outputSlotRef = Sets.newHashSet();
        // case1
        List<Expr> baseTblResultExprs = Expr.substituteList(selectStmt.getResultExprs(),
                outputSmap, analyzer, false);
        for (Expr resultExpr : baseTblResultExprs) {
            // find all slotRef bound by tupleIds in resultExpr
            resultExpr.getSlotRefsBoundByTupleIds(tupleIds, outputSlotRef);

            // For vec engine while lateral view involves subquery
            Expr dst = outputSmap.get(resultExpr);
            if (dst != null) {
                dst.getSlotRefsBoundByTupleIds(tupleIds, outputSlotRef);
            }
        }
        // case2
        List<Expr> remainConjuncts = analyzer.getRemainConjuncts(tupleIds);
        for (Expr expr : remainConjuncts) {
            expr.getSlotRefsBoundByTupleIds(tupleIds, outputSlotRef);

            // For vec engine while lateral view involves subquery
            Expr dst = outputSmap.get(expr);
            if (dst != null) {
                dst.getSlotRefsBoundByTupleIds(tupleIds, outputSlotRef);
            }
        }
        // set output slot ids
        for (SlotRef slotRef : outputSlotRef) {
            outputSlotIds.add(slotRef.getSlotId());
        }
    }

    @Override
    public void init(Analyzer analyzer) throws UserException {
        super.init(analyzer);
        fnCallExprList = new ArrayList<>(lateralViewRefs.stream().map(e -> e.getFnExpr()).collect(Collectors.toList()));
        Set<SlotRef> outputSlotRef = Sets.newHashSet();
        for (Expr expr : conjuncts) {
            expr.getSlotRefsBoundByTupleIds(tupleIds, outputSlotRef);
            Expr dst = outputSmap.get(expr);
            if (dst != null) {
                dst.getSlotRefsBoundByTupleIds(tupleIds, outputSlotRef);
            }
        }
        for (SlotRef slotRef : outputSlotRef) {
            outputSlotIds.add(slotRef.getSlotId());
        }
        /*
        When the expression of the lateral view involves the column of the subquery,
        the column needs to be rewritten as the real column in the subquery through childrenSmap.
        Example:
          select e1 from (select a from t1) tmp1 lateral view explode_split(a, ",") tmp2 as e1
          Slot 'a' is originally linked to tuple 'tmp1'. <tmp1.a>
          But tmp1 is just a virtual and unreal inline view tuple.
          So we need to push down 'a' and hang it on the real tuple 't1'. <t1.a>
         */
        outputSmap = getCombinedChildSmap();
        fnCallExprList = Expr.substituteList(fnCallExprList, outputSmap, analyzer, false);
        // end

        computeStats(analyzer);
    }

    @Override
    protected void computeStats(Analyzer analyzer) throws UserException {
        super.computeStats(analyzer);

        StatsRecursiveDerive.getStatsRecursiveDerive().statsRecursiveDerive(this);
        cardinality = (long) statsDeriveResult.getRowCount();
    }

    @Override
    public String getNodeExplainString(String prefix, TExplainLevel detailLevel) {
        StringBuilder output = new StringBuilder();
        output.append(prefix).append("table function: ");
        for (Expr fnExpr : fnCallExprList) {
            output.append(fnExpr.toSql()).append(" ");
        }
        output.append("\n");

        output.append(prefix).append("lateral view tuple id: ");
        for (TupleId tupleId : lateralViewTupleIds) {
            output.append(tupleId.asInt()).append(" ");
        }
        output.append("\n");

        if (detailLevel == TExplainLevel.BRIEF) {
            output.append(prefix).append(String.format("cardinality=%,d", cardinality)).append("\n");
            return output.toString();
        }

        output.append(prefix).append("output slot id: ");
        for (SlotId slotId : outputSlotIds) {
            output.append(slotId.asInt()).append(" ");
        }
        output.append("\n");

        if (!conjuncts.isEmpty()) {
            output.append(prefix).append("PREDICATES: ").append(
                    getExplainString(conjuncts)).append("\n");
        }
        output.append(prefix).append(String.format("cardinality=%,d", cardinality)).append("\n");
        return output.toString();
    }

    @Override
    protected void toThrift(TPlanNode msg) {
        msg.node_type = TPlanNodeType.TABLE_FUNCTION_NODE;
        msg.table_function_node = new TTableFunctionNode();
        msg.table_function_node.setFnCallExprList(Expr.treesToThrift(fnCallExprList));
        for (SlotId slotId : outputSlotIds) {
            msg.table_function_node.addToOutputSlotIds(slotId.asInt());
        }
    }
}