CopyFromDesc.java

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.

package org.apache.doris.nereids.trees.plans.commands.info;

import org.apache.doris.analysis.CopyFromParam;
import org.apache.doris.analysis.SlotRef;
import org.apache.doris.analysis.StageAndPattern;
import org.apache.doris.analysis.TableName;
import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.Database;
import org.apache.doris.catalog.Env;
import org.apache.doris.catalog.KeysType;
import org.apache.doris.catalog.OlapTable;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.common.ErrorCode;
import org.apache.doris.common.ErrorReport;
import org.apache.doris.nereids.analyzer.UnboundSlot;
import org.apache.doris.nereids.analyzer.UnboundStar;
import org.apache.doris.nereids.trees.expressions.EqualTo;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.NamedExpression;
import org.apache.doris.nereids.trees.expressions.Slot;
import org.apache.doris.nereids.util.ExpressionUtils;

import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;

/**
 * copy from desc ==> copy from param
 */
public class CopyFromDesc {
    private static final Logger LOG = LogManager.getLogger(CopyFromParam.class);
    private static final String DOLLAR = "$";
    private StageAndPattern stageAndPattern;
    private List<NamedExpression> exprList;
    private Optional<Expression> fileFilterExpr;
    private List<String> fileColumns;
    private List<Expression> columnMappingList;
    private List<String> targetColumns;

    public CopyFromDesc(StageAndPattern stageAndPattern) {
        this.stageAndPattern = stageAndPattern;
    }

    public CopyFromDesc(StageAndPattern stageAndPattern, List<NamedExpression> exprList,
                        Optional<Expression> whereExpr) {
        this.stageAndPattern = stageAndPattern;
        this.exprList = exprList;
        this.fileFilterExpr = whereExpr;
    }

    public void setTargetColumns(List<String> targetColumns) {
        this.targetColumns = targetColumns;
    }

    public StageAndPattern getStageAndPattern() {
        return stageAndPattern;
    }

    public List<Expression> getColumnMappingList() {
        return columnMappingList;
    }

    public List<NamedExpression> getExprList() {
        return exprList;
    }

    public List<String> getFileColumns() {
        return fileColumns;
    }

    public Optional<Expression> getFileFilterExpr() {
        return fileFilterExpr;
    }

    public List<String> getTargetColumns() {
        return targetColumns;
    }

    /**
     * analyze
     */
    public void validate(String fullDbName, TableName tableName, boolean useDeleteSign, String fileType)
            throws AnalysisException {
        if (exprList == null && fileFilterExpr == null && !useDeleteSign) {
            return;
        }
        this.fileColumns = new ArrayList<>();
        this.columnMappingList = new ArrayList<>();

        Database db = Env.getCurrentInternalCatalog().getDbOrAnalysisException(fullDbName);
        OlapTable olapTable = db.getOlapTableOrAnalysisException(tableName.getTbl());

        if (useDeleteSign && olapTable.getKeysType() != KeysType.UNIQUE_KEYS) {
            throw new AnalysisException("copy.use_delete_sign property only support unique table");
        }

        // Analyze table columns mentioned in the statement.
        boolean addDeleteSign = false;
        if (targetColumns == null) {
            targetColumns = new ArrayList<>();
            for (Column col : olapTable.getBaseSchema(false)) {
                targetColumns.add(col.getName());
            }
            if (useDeleteSign) {
                targetColumns.add(getDeleteSignColumn(olapTable).getName());
                addDeleteSign = true;
            }
        } else {
            if (exprList == null || targetColumns.size() != exprList.size()) {
                ErrorReport.reportAnalysisException(ErrorCode.ERR_WRONG_VALUE_COUNT);
            }
            Set<String> mentionedColumns = Sets.newTreeSet(String.CASE_INSENSITIVE_ORDER);
            for (String colName : targetColumns) {
                Column col = olapTable.getColumn(colName);
                if (col == null) {
                    ErrorReport.reportAnalysisException(ErrorCode.ERR_BAD_FIELD_ERROR, colName, olapTable.getName());
                }
                if (!mentionedColumns.add(colName)) {
                    ErrorReport.reportAnalysisException(ErrorCode.ERR_FIELD_SPECIFIED_TWICE, colName);
                }
            }
            if (useDeleteSign && !mentionedColumns.contains(Column.DELETE_SIGN)) {
                targetColumns.add(getDeleteSignColumn(olapTable).getName());
                addDeleteSign = true;
            }
        }

        if (!getFileColumnNames(addDeleteSign)) {
            if (fileType == null || fileType.equalsIgnoreCase("csv")) {
                int maxFileColumnId = getMaxFileColumnId();
                if (useDeleteSign && exprList == null && fileColumns.isEmpty()) {
                    maxFileColumnId = targetColumns.size() > maxFileColumnId ? targetColumns.size() : maxFileColumnId;
                }
                for (int i = 1; i <= maxFileColumnId; i++) {
                    fileColumns.add(DOLLAR + i);
                }
            } else {
                for (Column col : olapTable.getBaseSchema(false)) {
                    fileColumns.add(col.getName());
                }
                if (useDeleteSign) {
                    fileColumns.add(getDeleteSignColumn(olapTable).getName());
                }
            }
        }

        parseColumnNames(fileType, targetColumns);
        parseColumnNames(fileType, fileColumns);

        if (exprList != null) {
            if (!(exprList.size() == 1 && exprList.get(0) instanceof UnboundStar)) {
                if (targetColumns.size() != exprList.size()) {
                    ErrorReport.reportAnalysisException(ErrorCode.ERR_WRONG_VALUE_COUNT);
                }
                for (int i = 0; i < targetColumns.size(); i++) {
                    Expression expr = exprList.get(i);
                    EqualTo binaryPredicate = new EqualTo(new UnboundSlot(targetColumns.get(i)), expr);
                    columnMappingList.add(binaryPredicate);
                }
            }
        } else {
            for (int i = 0; i < targetColumns.size(); i++) {
                // If file column name equals target column name, don't need to add to
                // columnMapping List. Otherwise it will result in invalidation of strict
                // mode. Because if the src data is an expr, strict mode judgment will
                // not be performed.
                if (!fileColumns.get(i).equalsIgnoreCase(targetColumns.get(i))) {
                    EqualTo binaryPredicate = new EqualTo(new UnboundSlot(targetColumns.get(i)),
                            new UnboundSlot(fileColumns.get(i)));
                    columnMappingList.add(binaryPredicate);
                }
            }
        }
    }

    // expr use column name
    private boolean getFileColumnNames(boolean addDeleteSign) throws AnalysisException {
        if (exprList == null) {
            return false;
        }
        List<SlotRef> slotRefs = Lists.newArrayList();
        //        Expr.collectList(exprList, SlotRef.class, slotRefs);
        Set<String> columnSet = Sets.newTreeSet(String.CASE_INSENSITIVE_ORDER);
        for (SlotRef slotRef : slotRefs) {
            String columnName = slotRef.getColumnName();
            if (columnName.startsWith(DOLLAR)) {
                if (fileColumns.size() > 0) {
                    throw new AnalysisException("can not mix column name and dollar sign");
                }
                return false;
            }
            if (columnSet.add(columnName)) {
                fileColumns.add(columnName);
            }
        }
        if (addDeleteSign) {
            //            exprList.add(new SlotRef(null, Column.DELETE_SIGN));
            fileColumns.add(Column.DELETE_SIGN);
        }
        return true;
    }

    private Column getDeleteSignColumn(OlapTable olapTable) throws AnalysisException {
        for (Column column : olapTable.getFullSchema()) {
            if (column.isDeleteSignColumn()) {
                return column;
            }
        }
        throw new AnalysisException("Can not find DeleteSignColumn for unique table");
    }

    private int getMaxFileColumnId() throws AnalysisException {
        int maxId = 0;
        if (exprList != null) {
            int maxFileColumnId = getMaxFileFilterColumnId(
                    exprList.stream().map(expr -> (Expression) expr).collect(Collectors.toList()));
            maxId = maxId > maxFileColumnId ? maxId : maxFileColumnId;
        }
        if (fileFilterExpr != null) {
            int maxFileColumnId = getMaxFileFilterColumnId(Lists.newArrayList(fileFilterExpr.get()));
            maxId = maxId > maxFileColumnId ? maxId : maxFileColumnId;
        }
        return maxId;
    }

    private int getMaxFileFilterColumnId(List<Expression> exprList) throws AnalysisException {
        Set<Slot> slots = ExpressionUtils.getInputSlotSet(exprList);
        int maxId = 0;
        for (Slot slot : slots) {
            int fileColumnId = getFileColumnIdOfSlotRef((UnboundSlot) slot);
            maxId = fileColumnId < maxId ? maxId : fileColumnId;
        }
        return maxId;
    }

    private int getFileColumnIdOfSlotRef(UnboundSlot unboundSlot) throws AnalysisException {
        String columnName = unboundSlot.getName();
        try {
            if (!columnName.startsWith(DOLLAR)) {
                throw new AnalysisException("can not mix column name and dollar sign");
            }
            return Integer.parseInt(columnName.substring(1));
        } catch (NumberFormatException e) {
            throw new AnalysisException("column name: " + columnName + " can not parse to a number");
        }
    }

    private void parseColumnNames(String fileType, List<String> columns) {
        // In Be, parquet and orc column names are case-insensitive, but there is a bug for hidden columns,
        // so handle it temporary.
        if (columns != null && fileType != null && (fileType.equalsIgnoreCase("parquet") || fileType.equalsIgnoreCase(
                "orc"))) {
            for (int i = 0; i < columns.size(); i++) {
                if (columns.get(i).equals(Column.DELETE_SIGN)) {
                    columns.set(i, Column.DELETE_SIGN.toLowerCase());
                }
            }
        }
    }
}