SortInfo.java

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.
// This file is copied from
// https://github.com/apache/impala/blob/branch-2.9.0/fe/src/main/java/org/apache/impala/SortInfo.java
// and modified by Doris

package org.apache.doris.analysis;

import org.apache.doris.common.TreeNode;
import org.apache.doris.thrift.TSortInfo;

import com.google.common.base.Preconditions;
import com.google.common.base.Predicates;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

import java.util.List;
import java.util.Set;

/**
 * Encapsulates all the information needed to compute ORDER BY
 * This doesn't contain aliases or positional exprs.
 * TODO: reorganize this completely, this doesn't really encapsulate anything; this
 * should move into planner/ and encapsulate the implementation of the sort of a
 * particular input row (materialize all row slots)
 */
public class SortInfo {
    private static final Logger LOG = LogManager.getLogger(SortInfo.class);
    // All ordering exprs with cost greater than this will be materialized. Since we don't
    // currently have any information about actual function costs, this value is intended to
    // ensure that all expensive functions will be materialized while still leaving simple
    // operations unmaterialized, for example 'SlotRef + SlotRef' should have a cost below
    // this threshold.
    // TODO: rethink this when we have a better cost model.
    private static final float SORT_MATERIALIZATION_COST_THRESHOLD = Expr.FUNCTION_CALL_COST;

    private List<Expr> orderingExprs;
    private List<Expr> origOrderingExprs;
    private final List<Boolean> isAscOrder;
    // True if "NULLS FIRST", false if "NULLS LAST", null if not specified.
    private final List<Boolean> nullsFirstParams;
    // Subset of ordering exprs that are materialized. Populated in
    // createMaterializedOrderExprs(), used for EXPLAIN output.
    private List<Expr> materializedOrderingExprs;
    // The single tuple that is materialized, sorted, and output by a sort operator
    // (i.e. SortNode or TopNNode)
    private TupleDescriptor sortTupleDesc;
    // Input expressions materialized into sortTupleDesc_. One expr per slot in
    // sortTupleDesc_.
    private List<Expr> sortTupleSlotExprs;
    private boolean useTwoPhaseRead = false;

    public SortInfo(List<Expr> orderingExprs, List<Boolean> isAscOrder,
                    List<Boolean> nullsFirstParams) {
        Preconditions.checkArgument(orderingExprs.size() == isAscOrder.size());
        Preconditions.checkArgument(orderingExprs.size() == nullsFirstParams.size());
        this.orderingExprs = orderingExprs;
        this.isAscOrder = isAscOrder;
        this.nullsFirstParams = nullsFirstParams;
        materializedOrderingExprs = Lists.newArrayList();
    }

    /**
     * Used by new optimizer.
     */
    public SortInfo(List<Expr> orderingExprs,
                    List<Boolean> isAscOrder,
                    List<Boolean> nullsFirstParams,
                    TupleDescriptor sortTupleDesc) {
        this.orderingExprs = orderingExprs;
        this.isAscOrder = isAscOrder;
        this.nullsFirstParams = nullsFirstParams;
        this.sortTupleDesc = sortTupleDesc;
    }

    /**
     * C'tor for cloning.
     */
    private SortInfo(SortInfo other) {
        orderingExprs = Expr.cloneList(other.orderingExprs);
        isAscOrder = Lists.newArrayList(other.isAscOrder);
        nullsFirstParams = Lists.newArrayList(other.nullsFirstParams);
        materializedOrderingExprs = Expr.cloneList(other.materializedOrderingExprs);
        sortTupleDesc = other.sortTupleDesc;
        if (other.sortTupleSlotExprs != null) {
            sortTupleSlotExprs = Expr.cloneList(other.sortTupleSlotExprs);
        }
    }

    /**
     * Sets sortTupleDesc_, which is the internal row representation to be materialized and
     * sorted. The source exprs of the slots in sortTupleDesc_ are changed to those in
     * tupleSlotExprs.
     */
    public void setMaterializedTupleInfo(
            TupleDescriptor tupleDesc, List<Expr> tupleSlotExprs) {
        Preconditions.checkState(tupleDesc.getSlots().size() == tupleSlotExprs.size());
        sortTupleDesc = tupleDesc;
        sortTupleSlotExprs = tupleSlotExprs;
        for (int i = 0; i < sortTupleDesc.getSlots().size(); ++i) {
            SlotDescriptor slotDesc = sortTupleDesc.getSlots().get(i);
            slotDesc.setSourceExpr(sortTupleSlotExprs.get(i));
        }
    }

    public List<Expr> getOrderingExprs() {
        return orderingExprs;
    }

    public List<Expr> getOrigOrderingExprs() {
        return origOrderingExprs;
    }

    public List<Boolean> getIsAscOrder() {
        return isAscOrder;
    }

    public List<Boolean> getNullsFirstParams() {
        return nullsFirstParams;
    }

    public List<Expr> getMaterializedOrderingExprs() {
        return materializedOrderingExprs;
    }

    public void addMaterializedOrderingExpr(Expr expr) {
        if (materializedOrderingExprs == null) {
            materializedOrderingExprs = Lists.newArrayList();
        }
        materializedOrderingExprs.add(expr);
    }

    public List<Expr> getSortTupleSlotExprs() {
        return sortTupleSlotExprs;
    }

    public void setSortTupleSlotExprs(List<Expr> sortTupleSlotExprs) {
        this.sortTupleSlotExprs = sortTupleSlotExprs;
    }

    public void setSortTupleDesc(TupleDescriptor tupleDesc) {
        sortTupleDesc = tupleDesc;
    }

    public void setUseTwoPhaseRead() {
        useTwoPhaseRead = true;
    }

    public boolean useTwoPhaseRead() {
        return useTwoPhaseRead;
    }

    public TupleDescriptor getSortTupleDescriptor() {
        return sortTupleDesc;
    }

    /**
     * Gets the list of booleans indicating whether nulls come first or last, independent
     * of asc/desc.
     */
    public List<Boolean> getNullsFirst() {
        Preconditions.checkState(orderingExprs.size() == nullsFirstParams.size());
        List<Boolean> nullsFirst = Lists.newArrayList();
        for (int i = 0; i < orderingExprs.size(); ++i) {
            nullsFirst.add(OrderByElement.nullsFirst(nullsFirstParams.get(i),
                    isAscOrder.get(i)));
        }
        return nullsFirst;
    }

    /**
     * Materializes the slots in sortTupleDesc_ referenced in the ordering exprs.
     * Materializes the slots referenced by the corresponding sortTupleSlotExpr after
     * applying the 'smap'.
     */
    public void materializeRequiredSlots(Analyzer analyzer, ExprSubstitutionMap smap) {
        Preconditions.checkNotNull(sortTupleDesc);
        Preconditions.checkNotNull(sortTupleSlotExprs);
        Preconditions.checkState(sortTupleDesc.isMaterialized());
        analyzer.materializeSlots(orderingExprs);
        List<SlotDescriptor> sortTupleSlotDescs = sortTupleDesc.getSlots();
        List<Expr> materializedExprs = Lists.newArrayList();
        for (int i = 0; i < sortTupleSlotDescs.size(); ++i) {
            if (sortTupleSlotDescs.get(i).isMaterialized()) {
                materializedExprs.add(sortTupleSlotExprs.get(i));
            }
        }
        List<Expr> substMaterializedExprs =
                Expr.substituteList(materializedExprs, smap, analyzer, false);
        analyzer.materializeSlots(substMaterializedExprs);
    }

    public void substituteOrderingExprs(ExprSubstitutionMap smap, Analyzer analyzer) {
        orderingExprs = Expr.substituteList(orderingExprs, smap, analyzer, false);
    }

    /**
     * Asserts that all ordering exprs are bound by the sort tuple.
     */
    public void checkConsistency() {
        for (Expr orderingExpr : orderingExprs) {
            Preconditions.checkState(orderingExpr.isBound(sortTupleDesc.getId()));
        }
    }

    @Override
    public SortInfo clone() {
        return new SortInfo(this);
    }

    /**
     * Create a tuple descriptor for the single tuple that is materialized, sorted, and
     * output by the sort node. Materializes slots required by 'resultExprs' as well as
     * non-deterministic and expensive order by exprs. The materialized exprs are
     * substituted with slot refs into the new tuple. This simplifies the sorting logic for
     * total and top-n sorts. The substitution map is returned.
     */
    public ExprSubstitutionMap createSortTupleInfo(
            List<Expr> resultExprs, Analyzer analyzer) {
        // The descriptor for the tuples on which the sort operates.
        TupleDescriptor sortTupleDesc = analyzer.getDescTbl().createTupleDescriptor("sort");
        sortTupleDesc.setIsMaterialized(true);
        List<Expr> sortTupleExprs = Lists.newArrayList();

        // substOrderBy is a mapping from exprs evaluated on the sort input that get
        // materialized into the sort tuple to their corresponding SlotRefs in the sort tuple.
        // The following exprs are materialized:
        // 1. Ordering exprs that we chose to materialize
        // 2. SlotRefs against the sort input contained in the result and ordering exprs after
        // substituting the materialized ordering exprs.

        // Case 1:
        ExprSubstitutionMap substOrderBy =
                createMaterializedOrderExprs(sortTupleDesc, analyzer);
        sortTupleExprs.addAll(substOrderBy.getLhs());

        // Case 2: SlotRefs in the result and ordering exprs after substituting the
        // materialized ordering exprs.
        Set<SlotRef> sourceSlots = Sets.newHashSet();
        TreeNode.collect(Expr.substituteList(resultExprs, substOrderBy, analyzer, false),
                Predicates.instanceOf(SlotRef.class), sourceSlots);
        TreeNode.collect(Expr.substituteList(orderingExprs, substOrderBy, analyzer, false),
                Predicates.instanceOf(SlotRef.class), sourceSlots);
        for (SlotRef origSlotRef : sourceSlots) {
            // Don't rematerialize slots that are already in the sort tuple.
            if (origSlotRef.getDesc().getParent().getId() != sortTupleDesc.getId()) {
                SlotDescriptor origSlotDesc = origSlotRef.getDesc();
                SlotDescriptor materializedDesc =
                        analyzer.copySlotDescriptor(origSlotDesc, sortTupleDesc);
                // set to nullable if the origSlot is outer joined
                if (analyzer.isOuterJoined(origSlotDesc.getParent().getId())) {
                    materializedDesc.setIsNullable(true);
                }
                SlotRef cloneRef = new SlotRef(materializedDesc);
                substOrderBy.put(origSlotRef, cloneRef);
                sortTupleExprs.add(origSlotRef);
            }
        }

        // backup before substitute orderingExprs
        origOrderingExprs = orderingExprs;

        // The ordering exprs are evaluated against the sort tuple, so they must reflect the
        // materialization decision above.
        substituteOrderingExprs(substOrderBy, analyzer);

        // Update the tuple descriptor used to materialize the input of the sort.
        setMaterializedTupleInfo(sortTupleDesc, sortTupleExprs);
        if (LOG.isDebugEnabled()) {
            LOG.debug("sortTupleDesc {}", sortTupleDesc);
        }

        return substOrderBy;
    }

    /**
     * Materialize ordering exprs by creating slots for them in 'sortTupleDesc' if they:
     * - contain a non-deterministic expr
     * - contain a UDF (since we don't know if they're deterministic)
     * - are more expensive than a cost threshold
     * - don't have a cost set
     *
     * Populates 'materializedOrderingExprs_' and returns a mapping from the original
     * ordering exprs to the new SlotRefs. It is expected that this smap will be passed into
     * substituteOrderingExprs() by the caller.
     */
    public ExprSubstitutionMap createMaterializedOrderExprs(
            TupleDescriptor sortTupleDesc, Analyzer analyzer) {
        // the sort node exprs may come from the child outer join node
        // we need change the slots to nullable from all outer join nullable side temporarily
        // then the sort node expr would have correct nullable info
        // after create the output tuple we need revert the change by call analyzer.changeSlotsToNotNullable(slots)
        List<SlotDescriptor> slots = analyzer.changeSlotToNullableOfOuterJoinedTuples();
        ExprSubstitutionMap substOrderBy = new ExprSubstitutionMap();
        for (Expr origOrderingExpr : orderingExprs) {
            SlotDescriptor materializedDesc = analyzer.addSlotDescriptor(sortTupleDesc);
            materializedDesc.initFromExpr(origOrderingExpr);
            materializedDesc.setIsMaterialized(true);
            SlotRef origSlotRef = origOrderingExpr.getSrcSlotRef();
            if (LOG.isDebugEnabled()) {
                LOG.debug("origOrderingExpr {}", origOrderingExpr);
            }
            if (origSlotRef != null) {
                // need do this for two phase read of topn query optimization
                // check https://github.com/apache/doris/pull/15642 for detail
                materializedDesc.setSrcColumn(origSlotRef.getColumn());
            }
            SlotRef materializedRef = new SlotRef(materializedDesc);
            substOrderBy.put(origOrderingExpr, materializedRef);
            materializedOrderingExprs.add(origOrderingExpr);
        }
        analyzer.changeSlotsToNotNullable(slots);
        return substOrderBy;
    }

    /**
     * Convert the sort info to TSortInfo.
     */
    public TSortInfo toThrift() {
        TSortInfo sortInfo = new TSortInfo(
                Expr.treesToThrift(orderingExprs),
                isAscOrder,
                nullsFirstParams);
        if (useTwoPhaseRead) {
            sortInfo.setUseTwoPhaseRead(true);
        }
        return sortInfo;
    }
}