LogicalAggregate.java

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.

package org.apache.doris.nereids.trees.plans.logical;

import org.apache.doris.nereids.memo.GroupExpression;
import org.apache.doris.nereids.properties.DataTrait;
import org.apache.doris.nereids.properties.LogicalProperties;
import org.apache.doris.nereids.trees.expressions.AggregateExpression;
import org.apache.doris.nereids.trees.expressions.Alias;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.NamedExpression;
import org.apache.doris.nereids.trees.expressions.Slot;
import org.apache.doris.nereids.trees.expressions.functions.ExpressionTrait;
import org.apache.doris.nereids.trees.expressions.functions.agg.AggregateFunction;
import org.apache.doris.nereids.trees.expressions.functions.agg.AggregatePhase;
import org.apache.doris.nereids.trees.expressions.functions.agg.Count;
import org.apache.doris.nereids.trees.expressions.functions.agg.Ndv;
import org.apache.doris.nereids.trees.plans.Plan;
import org.apache.doris.nereids.trees.plans.PlanType;
import org.apache.doris.nereids.trees.plans.algebra.Aggregate;
import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor;
import org.apache.doris.nereids.util.ExpressionUtils;
import org.apache.doris.nereids.util.Utils;

import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Lists;

import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.stream.Collectors;

/**
 * Logical Aggregate plan.
 * <p>
 * For example SQL:
 * <p>
 * select a, sum(b), c from table group by a, c;
 * <p>
 * groupByExpressions: Column field after group by. eg: a, c;
 * outputExpressions: Column field after select. eg: a, sum(b), c;
 * <p>
 * Each agg node only contains the select statement field of the same layer,
 * and other agg nodes in the subquery contain.
 * Note: In general, the output of agg is a subset of the group by column plus aggregate column.
 * In special cases. this relationship does not hold. for example, select k1+1, sum(v1) from table group by k1.
 */
public class LogicalAggregate<CHILD_TYPE extends Plan>
        extends LogicalUnary<CHILD_TYPE>
        implements Aggregate<CHILD_TYPE> {

    private final List<Expression> groupByExpressions;
    private final List<NamedExpression> outputExpressions;

    // When there are grouping sets/rollup/cube, LogicalAgg is generated by LogicalRepeat.
    private final Optional<LogicalRepeat<?>> sourceRepeat;

    private final boolean normalized;
    private final boolean ordinalIsResolved;
    private final boolean generated;
    private final boolean hasPushed;

    /**
     * Desc: Constructor for LogicalAggregate.
     */
    public LogicalAggregate(
            List<Expression> groupByExpressions,
            List<NamedExpression> outputExpressions,
            CHILD_TYPE child) {
        this(groupByExpressions, outputExpressions,
                false, Optional.empty(), child);
    }

    /**
     * Distinct Agg
     */
    public LogicalAggregate(List<NamedExpression> namedExpressions, boolean generated, CHILD_TYPE child) {
        this(ImmutableList.copyOf(namedExpressions), namedExpressions, false, true, generated, false, Optional.empty(),
                Optional.empty(), Optional.empty(), child);
    }

    public LogicalAggregate(List<NamedExpression> namedExpressions, boolean generated, boolean hasPushed,
            CHILD_TYPE child) {
        this(ImmutableList.copyOf(namedExpressions), namedExpressions, false, true, generated, hasPushed,
                Optional.empty(), Optional.empty(), Optional.empty(), child);
    }

    public LogicalAggregate(List<Expression> groupByExpressions,
            List<NamedExpression> outputExpressions, boolean ordinalIsResolved, CHILD_TYPE child) {
        this(groupByExpressions, outputExpressions, false, ordinalIsResolved, false, false, Optional.empty(),
                Optional.empty(), Optional.empty(), child);
    }

    /**
     * Desc: Constructor for LogicalAggregate.
     * Generated from LogicalRepeat.
     */
    public LogicalAggregate(
            List<Expression> groupByExpressions,
            List<NamedExpression> outputExpressions,
            Optional<LogicalRepeat<?>> sourceRepeat,
            CHILD_TYPE child) {
        this(groupByExpressions, outputExpressions, false, sourceRepeat, child);
    }

    public LogicalAggregate(
            List<Expression> groupByExpressions,
            List<NamedExpression> outputExpressions,
            boolean normalized,
            Optional<LogicalRepeat<?>> sourceRepeat,
            CHILD_TYPE child) {
        this(groupByExpressions, outputExpressions, normalized, false, false, false, sourceRepeat,
                Optional.empty(), Optional.empty(), child);
    }

    /**
     * Whole parameters constructor for LogicalAggregate.
     */
    private LogicalAggregate(
            List<Expression> groupByExpressions,
            List<NamedExpression> outputExpressions,
            boolean normalized,
            boolean ordinalIsResolved,
            boolean generated,
            boolean hasPushed,
            Optional<LogicalRepeat<?>> sourceRepeat,
            Optional<GroupExpression> groupExpression,
            Optional<LogicalProperties> logicalProperties,
            CHILD_TYPE child) {
        super(PlanType.LOGICAL_AGGREGATE, groupExpression, logicalProperties, child);
        this.groupByExpressions = ImmutableList.copyOf(groupByExpressions);
        this.outputExpressions = ImmutableList.copyOf(outputExpressions);
        this.normalized = normalized;
        this.ordinalIsResolved = ordinalIsResolved;
        this.generated = generated;
        this.hasPushed = hasPushed;
        this.sourceRepeat = Objects.requireNonNull(sourceRepeat, "sourceRepeat cannot be null");
    }

    @Override
    public List<Expression> getGroupByExpressions() {
        return groupByExpressions;
    }

    @Override
    public List<NamedExpression> getOutputExpressions() {
        return outputExpressions;
    }

    public NamedExpression getOutputExpression(int index) {
        return outputExpressions.get(index);
    }

    public String getOutputExprsSql() {
        return outputExpressions.stream().map(ExpressionTrait::toSql).collect(Collectors.joining(", "));
    }

    public Optional<LogicalRepeat<?>> getSourceRepeat() {
        return sourceRepeat;
    }

    public boolean isGenerated() {
        return generated;
    }

    public boolean hasPushed() {
        return hasPushed;
    }

    @Override
    public String toString() {
        return Utils.toSqlString("LogicalAggregate[" + id.asInt() + "]",
                "groupByExpr", groupByExpressions,
                "outputExpr", outputExpressions,
                "hasRepeat", sourceRepeat.isPresent()
        );
    }

    @Override
    public String getFingerprint() {
        StringBuilder builder = new StringBuilder();
        // logical agg is mapped to physical GLOBAL
        String aggPhase = "Aggregate" + "(GLOBAL)";
        List<Object> groupByExpressionsArgs = Lists.newArrayList(
                "groupByExpr", groupByExpressions);
        builder.append(Utils.toSqlString(aggPhase, groupByExpressionsArgs.toArray()));

        builder.append("outputExpr=");
        for (NamedExpression expr : outputExpressions) {
            if (expr instanceof Alias) {
                if (expr.child(0) instanceof AggregateExpression) {
                    builder.append(((AggregateExpression) expr.child(0)).getFunction().getName());
                } else if (expr.child(0) instanceof AggregateFunction) {
                    builder.append(((AggregateFunction) expr.child(0)).getName());
                } else {
                    builder.append(Utils.toStringOrNull(expr));
                }
            } else {
                builder.append(Utils.toStringOrNull(expr));
            }
        }

        return builder.toString();
    }

    @Override
    public List<Slot> computeOutput() {
        ImmutableList.Builder<Slot> outputSlots = ImmutableList.builderWithExpectedSize(outputExpressions.size());
        for (NamedExpression outputExpression : outputExpressions) {
            outputSlots.add(outputExpression.toSlot());
        }
        return outputSlots.build();
    }

    @Override
    public <R, C> R accept(PlanVisitor<R, C> visitor, C context) {
        return visitor.visitLogicalAggregate(this, context);
    }

    @Override
    public List<? extends Expression> getExpressions() {
        return new ImmutableList.Builder<Expression>()
                .addAll(outputExpressions)
                .build();
    }

    public boolean isNormalized() {
        return normalized;
    }

    public boolean isOrdinalIsResolved() {
        return ordinalIsResolved;
    }

    /**
     * Determine the equality with another plan
     */
    public boolean equals(Object o) {
        if (this == o) {
            return true;
        }
        if (o == null || getClass() != o.getClass()) {
            return false;
        }
        LogicalAggregate<?> that = (LogicalAggregate<?>) o;
        return Objects.equals(groupByExpressions, that.groupByExpressions)
                && Objects.equals(outputExpressions, that.outputExpressions)
                && normalized == that.normalized
                && ordinalIsResolved == that.ordinalIsResolved
                && generated == that.generated
                && Objects.equals(sourceRepeat, that.sourceRepeat);
    }

    @Override
    public int hashCode() {
        return Objects.hash(groupByExpressions, outputExpressions, normalized, ordinalIsResolved, sourceRepeat);
    }

    @Override
    public LogicalAggregate<Plan> withChildren(List<Plan> children) {
        Preconditions.checkArgument(children.size() == 1);
        return new LogicalAggregate<>(groupByExpressions, outputExpressions, normalized, ordinalIsResolved, generated,
                hasPushed, sourceRepeat, Optional.empty(), Optional.empty(), children.get(0));
    }

    @Override
    public LogicalAggregate<Plan> withGroupExpression(Optional<GroupExpression> groupExpression) {
        return new LogicalAggregate<>(groupByExpressions, outputExpressions, normalized, ordinalIsResolved, generated,
                hasPushed, sourceRepeat, groupExpression, Optional.of(getLogicalProperties()), children.get(0));
    }

    @Override
    public Plan withGroupExprLogicalPropChildren(Optional<GroupExpression> groupExpression,
            Optional<LogicalProperties> logicalProperties, List<Plan> children) {
        Preconditions.checkArgument(children.size() == 1);
        return new LogicalAggregate<>(groupByExpressions, outputExpressions, normalized, ordinalIsResolved, generated,
                hasPushed, sourceRepeat, groupExpression, Optional.of(getLogicalProperties()), children.get(0));
    }

    public LogicalAggregate<Plan> withGroupByAndOutput(List<Expression> groupByExprList,
            List<NamedExpression> outputExpressionList) {
        return new LogicalAggregate<>(groupByExprList, outputExpressionList, normalized, ordinalIsResolved, generated,
                hasPushed, sourceRepeat, Optional.empty(), Optional.empty(), child());
    }

    public LogicalAggregate<Plan> withGroupBy(List<Expression> groupByExprList) {
        return new LogicalAggregate<>(groupByExprList, outputExpressions, normalized, ordinalIsResolved, generated,
                hasPushed, sourceRepeat, Optional.empty(), Optional.empty(), child());
    }

    public LogicalAggregate<Plan> withChildGroupByAndOutput(List<Expression> groupByExprList,
            List<NamedExpression> outputExpressionList, Plan newChild) {
        return new LogicalAggregate<>(groupByExprList, outputExpressionList, normalized, ordinalIsResolved, generated,
                hasPushed, sourceRepeat, Optional.empty(), Optional.empty(), newChild);
    }

    public LogicalAggregate<Plan> withChildAndOutput(CHILD_TYPE child,
                                                       List<NamedExpression> outputExpressionList) {
        return new LogicalAggregate<>(groupByExpressions, outputExpressionList, normalized, ordinalIsResolved,
                generated, hasPushed, sourceRepeat, Optional.empty(),
                Optional.empty(), child);
    }

    @Override
    public List<NamedExpression> getOutputs() {
        return outputExpressions;
    }

    @Override
    public LogicalAggregate<CHILD_TYPE> withAggOutput(List<NamedExpression> newOutput) {
        return new LogicalAggregate<>(groupByExpressions, newOutput, normalized, ordinalIsResolved, generated,
                hasPushed, sourceRepeat, Optional.empty(), Optional.empty(), child());
    }

    public LogicalAggregate<Plan> withAggOutputChild(List<NamedExpression> newOutput, Plan newChild) {
        return new LogicalAggregate<>(groupByExpressions, newOutput, normalized, ordinalIsResolved, generated,
                hasPushed, sourceRepeat, Optional.empty(), Optional.empty(), newChild);
    }

    public LogicalAggregate<Plan> withNormalized(List<Expression> normalizedGroupBy,
            List<NamedExpression> normalizedOutput, Plan normalizedChild) {
        return new LogicalAggregate<>(normalizedGroupBy, normalizedOutput, true, ordinalIsResolved, generated,
                hasPushed, sourceRepeat, Optional.empty(), Optional.empty(), normalizedChild);
    }

    private boolean isUniqueGroupByUnique(NamedExpression namedExpression) {
        if (namedExpression.children().size() != 1) {
            return false;
        }
        Expression agg = namedExpression.child(0);
        return ExpressionUtils.isInjectiveAgg(agg)
                && child().getLogicalProperties().getTrait().isUniqueAndNotNull(agg.getInputSlots());
    }

    private boolean isUniformGroupByUnique(NamedExpression namedExpression) {
        if (namedExpression.children().size() != 1) {
            return false;
        }
        Expression agg = namedExpression.child(0);
        return agg instanceof Count || agg instanceof Ndv;
    }

    @Override
    public void computeUnique(DataTrait.Builder builder) {
        if (this.sourceRepeat.isPresent()) {
            // roll up may generate new data
            return;
        }
        DataTrait childFd = child(0).getLogicalProperties().getTrait();
        ImmutableSet<Slot> groupByKeys = groupByExpressions.stream()
                .map(s -> (Slot) s)
                .collect(ImmutableSet.toImmutableSet());
        // when group by all tuples, the result only have one row
        if (groupByExpressions.isEmpty() || childFd.isUniformAndNotNull(groupByKeys)) {
            getOutput().forEach(builder::addUniqueSlot);
            return;
        }

        // propagate all unique slots
        builder.addUniqueSlot(childFd);

        // group by keys is unique
        builder.addUniqueSlot(groupByKeys);

        // group by unique may has unique aggregate result
        if (childFd.isUniqueAndNotNull(groupByKeys)) {
            for (NamedExpression namedExpression : getOutputExpressions()) {
                if (isUniqueGroupByUnique(namedExpression)) {
                    builder.addUniqueSlot(namedExpression.toSlot());
                }
            }
        }
    }

    @Override
    public void computeUniform(DataTrait.Builder builder) {
        // always propagate uniform
        DataTrait childFd = child(0).getLogicalProperties().getTrait();
        builder.addUniformSlot(childFd);

        if (this.sourceRepeat.isPresent()) {
            // roll up may generate new data
            return;
        }
        ImmutableSet<Slot> groupByKeys = groupByExpressions.stream()
                .map(s -> (Slot) s)
                .collect(ImmutableSet.toImmutableSet());
        // when group by all tuples, the result only have one row
        if (groupByExpressions.isEmpty() || childFd.isUniformAndNotNull(groupByKeys)) {
            getOutput().forEach(builder::addUniformSlot);
            return;
        }

        if (childFd.isUniqueAndNotNull(groupByKeys)) {
            for (NamedExpression namedExpression : getOutputExpressions()) {
                if (isUniformGroupByUnique(namedExpression)) {
                    builder.addUniformSlot(namedExpression.toSlot());
                }
            }
        }
    }

    @Override
    public void computeEqualSet(DataTrait.Builder builder) {
        builder.addEqualSet(child().getLogicalProperties().getTrait());
    }

    @Override
    public void computeFd(DataTrait.Builder builder) {
        builder.addFuncDepsDG(child().getLogicalProperties().getTrait());
    }

    /** supportAggregatePhase */
    public boolean supportAggregatePhase(AggregatePhase aggregatePhase) {
        for (AggregateFunction aggregateFunction : getAggregateFunctions()) {
            if (!aggregateFunction.supportAggregatePhase(aggregatePhase)) {
                return false;
            }
        }
        return true;
    }
}