PhysicalOlapTableSink.java

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.

package org.apache.doris.nereids.trees.plans.physical;

import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.Database;
import org.apache.doris.catalog.DistributionInfo;
import org.apache.doris.catalog.HashDistributionInfo;
import org.apache.doris.catalog.KeysType;
import org.apache.doris.catalog.OlapTable;
import org.apache.doris.catalog.RandomDistributionInfo;
import org.apache.doris.common.Config;
import org.apache.doris.nereids.exceptions.AnalysisException;
import org.apache.doris.nereids.memo.GroupExpression;
import org.apache.doris.nereids.properties.LogicalProperties;
import org.apache.doris.nereids.properties.PhysicalProperties;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.NamedExpression;
import org.apache.doris.nereids.trees.expressions.Slot;
import org.apache.doris.nereids.trees.plans.Plan;
import org.apache.doris.nereids.trees.plans.PlanType;
import org.apache.doris.nereids.trees.plans.algebra.Sink;
import org.apache.doris.nereids.trees.plans.commands.info.DMLCommandType;
import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor;
import org.apache.doris.nereids.util.Utils;
import org.apache.doris.statistics.Statistics;

import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;

import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;

/**
 * physical olap table sink for insert command
 */
public class PhysicalOlapTableSink<CHILD_TYPE extends Plan> extends PhysicalTableSink<CHILD_TYPE> implements Sink {

    private final Database database;
    private final OlapTable targetTable;
    private final List<Column> cols;
    private final List<Long> partitionIds;
    private final boolean singleReplicaLoad;
    private final boolean isPartialUpdate;
    private final DMLCommandType dmlCommandType;
    private final List<Expression> partitionExprList;
    private final Map<Long, Expression> syncMvWhereClauses;
    private final List<Slot> targetTableSlots;

    /**
     * Constructor
     */
    public PhysicalOlapTableSink(Database database, OlapTable targetTable, List<Column> cols,
            List<Long> partitionIds, List<NamedExpression> outputExprs, boolean singleReplicaLoad,
            boolean isPartialUpdate, DMLCommandType dmlCommandType,
            List<Expression> partitionExprList, Map<Long, Expression> syncMvWhereClauses,
            List<Slot> targetTableSlots, Optional<GroupExpression> groupExpression,
            LogicalProperties logicalProperties, CHILD_TYPE child) {
        this(database, targetTable, cols, partitionIds, outputExprs, singleReplicaLoad,
                isPartialUpdate, dmlCommandType, partitionExprList, syncMvWhereClauses,
                targetTableSlots, groupExpression, logicalProperties, PhysicalProperties.GATHER,
                null, child);
    }

    /**
     * Constructor
     */
    public PhysicalOlapTableSink(Database database, OlapTable targetTable, List<Column> cols,
            List<Long> partitionIds, List<NamedExpression> outputExprs, boolean singleReplicaLoad,
            boolean isPartialUpdate, DMLCommandType dmlCommandType,
            List<Expression> partitionExprList, Map<Long, Expression> syncMvWhereClauses,
            List<Slot> targetTableSlots, Optional<GroupExpression> groupExpression,
            LogicalProperties logicalProperties, PhysicalProperties physicalProperties,
            Statistics statistics, CHILD_TYPE child) {
        super(PlanType.PHYSICAL_OLAP_TABLE_SINK, outputExprs, groupExpression,
                logicalProperties, physicalProperties, statistics, child);
        this.database = Objects.requireNonNull(database, "database != null in PhysicalOlapTableSink");
        this.targetTable = Objects.requireNonNull(targetTable, "targetTable != null in PhysicalOlapTableSink");
        this.cols = Utils.copyRequiredList(cols);
        this.partitionIds = Utils.copyRequiredList(partitionIds);
        this.singleReplicaLoad = singleReplicaLoad;
        this.isPartialUpdate = isPartialUpdate;
        this.dmlCommandType = dmlCommandType;
        this.partitionExprList = partitionExprList;
        this.syncMvWhereClauses = syncMvWhereClauses;
        this.targetTableSlots = targetTableSlots;
    }

    public Database getDatabase() {
        return database;
    }

    @Override
    public OlapTable getTargetTable() {
        return targetTable;
    }

    public List<Column> getCols() {
        return cols;
    }

    public List<Long> getPartitionIds() {
        return partitionIds;
    }

    public boolean isSingleReplicaLoad() {
        return singleReplicaLoad;
    }

    public boolean isPartialUpdate() {
        return isPartialUpdate;
    }

    public DMLCommandType getDmlCommandType() {
        return dmlCommandType;
    }

    public List<Expression> getPartitionExprList() {
        return partitionExprList;
    }

    public Map<Long, Expression> getSyncMvWhereClauses() {
        return syncMvWhereClauses;
    }

    public List<Slot> getTargetTableSlots() {
        return targetTableSlots;
    }

    @Override
    public Plan withChildren(List<Plan> children) {
        Preconditions.checkArgument(children.size() == 1, "PhysicalOlapTableSink only accepts one child");
        return new PhysicalOlapTableSink<>(database, targetTable, cols, partitionIds, outputExprs,
                singleReplicaLoad, isPartialUpdate, dmlCommandType, partitionExprList,
                syncMvWhereClauses, targetTableSlots, groupExpression, getLogicalProperties(),
                physicalProperties, statistics, children.get(0));
    }

    @Override
    public boolean equals(Object o) {
        if (this == o) {
            return true;
        }
        if (o == null || getClass() != o.getClass()) {
            return false;
        }
        PhysicalOlapTableSink<?> that = (PhysicalOlapTableSink<?>) o;
        return singleReplicaLoad == that.singleReplicaLoad
                && isPartialUpdate == that.isPartialUpdate
                && dmlCommandType == that.dmlCommandType
                && Objects.equals(database, that.database)
                && Objects.equals(targetTable, that.targetTable)
                && Objects.equals(cols, that.cols)
                && Objects.equals(partitionIds, that.partitionIds);
    }

    @Override
    public int hashCode() {
        return Objects.hash(database, targetTable, cols, partitionIds, singleReplicaLoad,
                isPartialUpdate, dmlCommandType);
    }

    @Override
    public String toString() {
        return Utils.toSqlString("PhysicalOlapTableSink[" + id.asInt() + "]",
                "outputExprs", outputExprs,
                "database", database.getFullName(),
                "targetTable", targetTable.getName(),
                "cols", cols,
                "partitionIds", partitionIds,
                "singleReplicaLoad", singleReplicaLoad,
                "isPartialUpdate", isPartialUpdate,
                "dmlCommandType", dmlCommandType
        );
    }

    @Override
    public <R, C> R accept(PlanVisitor<R, C> visitor, C context) {
        return visitor.visitPhysicalOlapTableSink(this, context);
    }

    @Override
    public List<? extends Expression> getExpressions() {
        return ImmutableList.of();
    }

    @Override
    public Plan withGroupExpression(Optional<GroupExpression> groupExpression) {
        return new PhysicalOlapTableSink<>(database, targetTable, cols, partitionIds, outputExprs,
                singleReplicaLoad, isPartialUpdate, dmlCommandType, partitionExprList,
                syncMvWhereClauses, targetTableSlots, groupExpression, getLogicalProperties(),
                child());
    }

    @Override
    public Plan withGroupExprLogicalPropChildren(Optional<GroupExpression> groupExpression,
            Optional<LogicalProperties> logicalProperties, List<Plan> children) {
        return new PhysicalOlapTableSink<>(database, targetTable, cols, partitionIds, outputExprs,
                singleReplicaLoad, isPartialUpdate, dmlCommandType, partitionExprList,
                syncMvWhereClauses, targetTableSlots, groupExpression, logicalProperties.get(),
                children.get(0));
    }

    @Override
    public PhysicalPlan withPhysicalPropertiesAndStats(PhysicalProperties physicalProperties,
            Statistics statistics) {
        return new PhysicalOlapTableSink<>(database, targetTable, cols, partitionIds, outputExprs,
                singleReplicaLoad, isPartialUpdate, dmlCommandType, partitionExprList,
                syncMvWhereClauses, targetTableSlots, groupExpression, getLogicalProperties(),
                physicalProperties, statistics, child());
    }

    /**
     * get output physical properties
     */
    @Override
    public PhysicalProperties getRequirePhysicalProperties() {
        if (targetTable.isPartitionDistributed()) {
            DistributionInfo distributionInfo = targetTable.getDefaultDistributionInfo();
            if (distributionInfo instanceof HashDistributionInfo) {
                // Do not enable shuffle for duplicate key tables when its tablet num is less than threshold.
                if (targetTable.getKeysType() == KeysType.DUP_KEYS) {
                    final long partitionNums = Math.max(targetTable.getPartitionInfo().getAllPartitions().size(), 1);
                    final long tabletNums = partitionNums * distributionInfo.getBucketNum();
                    if (tabletNums < Config.min_tablets_for_dup_table_shuffle) {
                        return PhysicalProperties.ANY;
                    }
                }
                return PhysicalProperties.TABLET_ID_SHUFFLE;
            } else if (distributionInfo instanceof RandomDistributionInfo) {
                return PhysicalProperties.ANY;
            } else {
                throw new AnalysisException("Unknown distributionInfo for Nereids to calculate physical properties");
            }
        } else {
            return PhysicalProperties.GATHER;
        }
    }

    @Override
    public PhysicalOlapTableSink<Plan> resetLogicalProperties() {
        return new PhysicalOlapTableSink<>(database, targetTable, cols, partitionIds, outputExprs,
                singleReplicaLoad, isPartialUpdate, dmlCommandType, partitionExprList,
                syncMvWhereClauses, targetTableSlots, groupExpression, null, physicalProperties,
                statistics, child());
    }
}