PhysicalConnectorTableSink.java

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.

package org.apache.doris.nereids.trees.plans.physical;

import org.apache.doris.catalog.Column;
import org.apache.doris.datasource.ExternalDatabase;
import org.apache.doris.datasource.ExternalTable;
import org.apache.doris.datasource.PluginDrivenExternalTable;
import org.apache.doris.nereids.memo.GroupExpression;
import org.apache.doris.nereids.properties.DistributionSpecHiveTableSinkHashPartitioned;
import org.apache.doris.nereids.properties.LogicalProperties;
import org.apache.doris.nereids.properties.MustLocalSortOrderSpec;
import org.apache.doris.nereids.properties.OrderKey;
import org.apache.doris.nereids.properties.PhysicalProperties;
import org.apache.doris.nereids.trees.expressions.ExprId;
import org.apache.doris.nereids.trees.expressions.NamedExpression;
import org.apache.doris.nereids.trees.plans.AbstractPlan;
import org.apache.doris.nereids.trees.plans.Plan;
import org.apache.doris.nereids.trees.plans.PlanType;
import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor;
import org.apache.doris.statistics.Statistics;

import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;

/**
 * Physical table sink for plugin-driven connector catalogs.
 */
public class PhysicalConnectorTableSink<CHILD_TYPE extends Plan> extends PhysicalBaseExternalTableSink<CHILD_TYPE> {

    /**
     * constructor
     */
    public PhysicalConnectorTableSink(ExternalDatabase database,
                                      ExternalTable targetTable,
                                      List<Column> cols,
                                      List<NamedExpression> outputExprs,
                                      Optional<GroupExpression> groupExpression,
                                      LogicalProperties logicalProperties,
                                      CHILD_TYPE child) {
        this(database, targetTable, cols, outputExprs, groupExpression, logicalProperties,
                PhysicalProperties.GATHER, null, child);
    }

    /**
     * constructor
     */
    public PhysicalConnectorTableSink(ExternalDatabase database,
                                      ExternalTable targetTable,
                                      List<Column> cols,
                                      List<NamedExpression> outputExprs,
                                      Optional<GroupExpression> groupExpression,
                                      LogicalProperties logicalProperties,
                                      PhysicalProperties physicalProperties,
                                      Statistics statistics,
                                      CHILD_TYPE child) {
        super(PlanType.PHYSICAL_CONNECTOR_TABLE_SINK, database, targetTable, cols, outputExprs, groupExpression,
                logicalProperties, physicalProperties, statistics, child);
    }

    @Override
    public Plan withChildren(List<Plan> children) {
        return AbstractPlan.copyWithSameId(this, () -> new PhysicalConnectorTableSink<>(
                (ExternalDatabase) database, (ExternalTable) targetTable, cols, outputExprs, groupExpression,
                getLogicalProperties(), physicalProperties, statistics, children.get(0)));
    }

    @Override
    public <R, C> R accept(PlanVisitor<R, C> visitor, C context) {
        return visitor.visitPhysicalConnectorTableSink(this, context);
    }

    @Override
    public Plan withGroupExpression(Optional<GroupExpression> groupExpression) {
        return AbstractPlan.copyWithSameId(this, () -> new PhysicalConnectorTableSink<>(
                (ExternalDatabase) database, (ExternalTable) targetTable, cols, outputExprs,
                groupExpression, getLogicalProperties(), child()));
    }

    @Override
    public Plan withGroupExprLogicalPropChildren(Optional<GroupExpression> groupExpression,
                                                 Optional<LogicalProperties> logicalProperties, List<Plan> children) {
        return AbstractPlan.copyWithSameId(this, () -> new PhysicalConnectorTableSink<>(
                (ExternalDatabase) database, (ExternalTable) targetTable, cols, outputExprs,
                groupExpression, logicalProperties.get(), children.get(0)));
    }

    @Override
    public PhysicalPlan withPhysicalPropertiesAndStats(PhysicalProperties physicalProperties, Statistics statistics) {
        return AbstractPlan.copyWithSameId(this, () -> new PhysicalConnectorTableSink<>(
                (ExternalDatabase) database, (ExternalTable) targetTable, cols, outputExprs,
                groupExpression, getLogicalProperties(), physicalProperties, statistics, child()));
    }

    /**
     * Get required physical properties for sink distribution. Generalizes the legacy
     * {@code PhysicalMaxComputeTableSink.getRequirePhysicalProperties()} 3-branch behavior, gated
     * by connector capabilities so non-partitioned connectors (JDBC, ES) keep the GATHER default:
     *
     * <ul>
     *   <li><b>Dynamic-partition write</b> (a partition column is present in {@code cols}) when the
     *       connector declares {@code SINK_REQUIRE_PARTITION_LOCAL_SORT}: hash-distribute by the
     *       partition columns and require a mandatory local sort on them. Streaming partition
     *       writers (MaxCompute Storage API) close the previous partition writer once a different
     *       partition value appears; un-grouped rows cause "writer has been closed".</li>
     *   <li><b>Non-partitioned / all-static-partition write</b> when the connector declares
     *       {@code SUPPORTS_PARALLEL_WRITE}: {@code SINK_RANDOM_PARTITIONED} (parallel writers).</li>
     *   <li><b>Otherwise</b> (e.g. JDBC, ES): {@code GATHER} (single writer) for transactional
     *       safety.</li>
     * </ul>
     *
     * <p><b>Index by full schema, not {@code cols}.</b> For a positional-write connector (one declaring
     * {@code SINK_REQUIRE_FULL_SCHEMA_ORDER}, e.g. MaxCompute), {@code BindSink.bindConnectorTableSink}
     * projects the child to <em>full-schema</em> order (any unmentioned / static-partition columns filled
     * in), exactly like legacy {@code bindMaxComputeTableSink},
     * because the BE writer strips the trailing partition columns by position. So {@code child().getOutput()}
     * is aligned 1:1 with {@code targetTable.getFullSchema()}, while {@code cols} excludes the static
     * partition columns and may be in a different (user-specified) order. Partition columns are therefore
     * located by their position in the full schema. (An earlier revision indexed by {@code cols}, which
     * mislocated the dynamic column whenever {@code cols} order diverged from the full schema ��� the
     * partial-static {@code PARTITION(p1='x') SELECT ..., p2} and reordered-explicit-list cases.)</p>
     */
    @Override
    public PhysicalProperties getRequirePhysicalProperties() {
        if (!(targetTable instanceof PluginDrivenExternalTable)) {
            return PhysicalProperties.GATHER;
        }
        PluginDrivenExternalTable table = (PluginDrivenExternalTable) targetTable;

        if (table.requirePartitionLocalSortOnWrite()) {
            Set<String> partitionNames = table.getPartitionColumns().stream()
                    .map(Column::getName)
                    .collect(Collectors.toSet());
            if (!partitionNames.isEmpty()) {
                // A partition column present in cols == its value comes from the query == a
                // dynamic-partition write (static partition cols are excluded from cols by
                // BindSink.bindConnectorTableSink). If any remains, this is a dynamic / partial-static
                // write that must be hash-distributed and locally sorted by partition columns.
                Set<String> colNames = cols.stream()
                        .map(Column::getName)
                        .collect(Collectors.toSet());
                boolean hasDynamicPartition = partitionNames.stream().anyMatch(colNames::contains);
                if (hasDynamicPartition) {
                    // Index by FULL-SCHEMA position, NOT cols. For a static / partial-static write the
                    // bind layer projects the child to full schema (static partition cols filled), so
                    // child().getOutput() is aligned 1:1 with the full schema while cols excludes the
                    // static partition cols. Indexing by full-schema position is required to hash/sort
                    // by the correct (dynamic) column in the partial-static case. Mirrors legacy
                    // PhysicalMaxComputeTableSink.
                    List<Integer> columnIdx = new ArrayList<>();
                    List<Column> fullSchema = targetTable.getFullSchema();
                    for (int i = 0; i < fullSchema.size(); i++) {
                        if (partitionNames.contains(fullSchema.get(i).getName())) {
                            columnIdx.add(i);
                        }
                    }
                    List<ExprId> exprIds = columnIdx.stream()
                            .map(idx -> child().getOutput().get(idx).getExprId())
                            .collect(Collectors.toList());
                    DistributionSpecHiveTableSinkHashPartitioned shuffleInfo
                            = new DistributionSpecHiveTableSinkHashPartitioned();
                    shuffleInfo.setOutputColExprIds(exprIds);
                    // Local sort by partition columns so rows for the same partition are grouped
                    // together before the streaming partition writer (MaxCompute Storage API closes a
                    // partition writer once a different partition value appears).
                    List<OrderKey> orderKeys = columnIdx.stream()
                            .map(idx -> new OrderKey(child().getOutput().get(idx), true, false))
                            .collect(Collectors.toList());
                    return new PhysicalProperties(shuffleInfo)
                            .withOrderSpec(new MustLocalSortOrderSpec(orderKeys));
                }
                // Partition columns exist but none in cols == all partitions statically specified;
                // fall through to the parallel/gather branch (no sort/shuffle needed).
            }
        }

        if (table.supportsParallelWrite()) {
            return PhysicalProperties.SINK_RANDOM_PARTITIONED;
        }
        return PhysicalProperties.GATHER;
    }
}