AnalyzeTblStmt.java

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.

package org.apache.doris.analysis;

import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.Database;
import org.apache.doris.catalog.DatabaseIf;
import org.apache.doris.catalog.Env;
import org.apache.doris.catalog.OlapTable;
import org.apache.doris.catalog.TableIf;
import org.apache.doris.catalog.View;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.common.ErrorCode;
import org.apache.doris.common.ErrorReport;
import org.apache.doris.common.FeNameFormat;
import org.apache.doris.common.UserException;
import org.apache.doris.datasource.CatalogIf;
import org.apache.doris.datasource.hive.HMSExternalTable;
import org.apache.doris.mysql.privilege.PrivPredicate;
import org.apache.doris.qe.ConnectContext;
import org.apache.doris.statistics.AnalysisInfo.AnalysisType;
import org.apache.doris.statistics.util.StatisticsUtil;

import com.google.common.collect.Sets;
import org.apache.commons.lang3.StringUtils;

import java.util.Collections;
import java.util.List;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;

/**
 * Column Statistics Collection Syntax:
 * ANALYZE [ SYNC ] TABLE table_name
 * [ PARTITIONS (partition_name [, ...])]
 * [ (column_name [, ...]) ]
 * [ [WITH SYNC] | [WITH INCREMENTAL] | [WITH SAMPLE PERCENT | ROWS ] ]
 * [ PROPERTIES ('key' = 'value', ...) ];
 * <p>
 * Column histogram collection syntax:
 * ANALYZE [ SYNC ] TABLE table_name
 * [ partitions (partition_name [, ...])]
 * [ (column_name [, ...]) ]
 * UPDATE HISTOGRAM
 * [ [ WITH SYNC ][ WITH INCREMENTAL ][ WITH SAMPLE PERCENT | ROWS ][ WITH BUCKETS ] ]
 * [ PROPERTIES ('key' = 'value', ...) ];
 * <p>
 * Illustrate:
 * - sync:Collect statistics synchronously. Return after collecting.
 * - incremental:Collect statistics incrementally. Incremental collection of histogram statistics is not supported.
 * - sample percent | rows:Collect statistics by sampling. Scale and number of rows can be sampled.
 * - buckets:Specifies the maximum number of buckets generated when collecting histogram statistics.
 * - table_name: The purpose table for collecting statistics. Can be of the form `db_name.table_name`.
 * - partition_name: The specified destination partition must be a partition that exists in `table_name`,
 * - column_name: The specified destination column must be a column that exists in `table_name`,
 * and multiple column names are separated by commas.
 * - properties:Properties used to set statistics tasks. Currently only the following configurations
 * are supported (equivalent to the with statement)
 * - 'sync' = 'true'
 * - 'incremental' = 'true'
 * - 'sample.percent' = '50'
 * - 'sample.rows' = '1000'
 * - 'num.buckets' = 10
 */
public class AnalyzeTblStmt extends AnalyzeStmt implements NotFallbackInParser {
    // The properties passed in by the user through "with" or "properties('K', 'V')"

    private final TableName tableName;
    private List<String> columnNames;
    private PartitionNames partitionNames;
    private boolean isAllColumns;

    // after analyzed
    private long catalogId;
    private long dbId;
    private TableIf table;

    public AnalyzeTblStmt(TableName tableName,
            PartitionNames partitionNames,
            List<String> columnNames,
            AnalyzeProperties properties) {
        super(properties);
        this.tableName = tableName;
        this.partitionNames = partitionNames;
        this.columnNames = columnNames;
        this.analyzeProperties = properties;
        this.isAllColumns = columnNames == null;
    }

    public AnalyzeTblStmt(AnalyzeProperties analyzeProperties, TableName tableName, List<String> columnNames, long dbId,
            TableIf table) throws AnalysisException {
        super(analyzeProperties);
        this.tableName = tableName;
        this.columnNames = columnNames;
        this.dbId = dbId;
        this.table = table;
        this.isAllColumns = columnNames == null;
        String catalogName = tableName.getCtl();
        CatalogIf catalog = Env.getCurrentEnv().getCatalogMgr()
                .getCatalogOrAnalysisException(catalogName);
        this.catalogId = catalog.getId();
    }

    @Override
    @SuppressWarnings({"rawtypes"})
    public void analyze(Analyzer analyzer) throws UserException {
        super.analyze(analyzer);
        tableName.analyze(analyzer);

        String catalogName = tableName.getCtl();
        String dbName = tableName.getDb();
        String tblName = tableName.getTbl();
        CatalogIf catalog = analyzer.getEnv().getCatalogMgr()
                .getCatalogOrAnalysisException(catalogName);
        this.catalogId = catalog.getId();
        DatabaseIf db = catalog.getDbOrAnalysisException(dbName);
        dbId = db.getId();
        table = db.getTableOrAnalysisException(tblName);
        isAllColumns = columnNames == null;
        check();
    }

    public void check() throws AnalysisException {
        if (table instanceof View) {
            throw new AnalysisException("Analyze view is not allowed");
        }
        checkAnalyzePriv(tableName.getCtl(), tableName.getDb(), tableName.getTbl());
        if (columnNames == null) {
            columnNames = table.getSchemaAllIndexes(false).stream()
                // Filter unsupported type columns.
                .filter(c -> !StatisticsUtil.isUnsupportedType(c.getType()))
                .map(Column::getName)
                .collect(Collectors.toList());
        } else {
            table.readLock();
            try {
                List<String> baseSchema = table.getSchemaAllIndexes(false)
                        .stream().map(Column::getName).collect(Collectors.toList());
                Optional<String> optional = columnNames.stream()
                        .filter(entity -> !baseSchema.contains(entity)).findFirst();
                if (optional.isPresent()) {
                    String columnName = optional.get();
                    ErrorReport.reportAnalysisException(ErrorCode.ERR_BAD_FIELD_ERROR, columnName, tableName.getTbl());
                }
                checkColumn();
            } finally {
                table.readUnlock();
            }
        }
        analyzeProperties.check();

        if (analyzeProperties.isSync()
                && (analyzeProperties.isAutomatic() || analyzeProperties.getPeriodTimeInMs() != 0)) {
            throw new AnalysisException("Automatic/Period statistics collection "
                    + "and synchronous statistics collection cannot be set at same time");
        }
        if (analyzeProperties.isAutomatic() && analyzeProperties.getPeriodTimeInMs() != 0) {
            throw new AnalysisException("Automatic collection "
                    + "and period statistics collection cannot be set at same time");
        }
        if (analyzeProperties.isSample() && analyzeProperties.forceFull()) {
            throw new AnalysisException("Impossible to analyze with sample and full simultaneously");
        }
    }

    private void checkColumn() throws AnalysisException {
        boolean containsUnsupportedTytpe = false;
        for (String colName : columnNames) {
            Column column = table instanceof OlapTable
                    ? ((OlapTable) table).getVisibleColumn(colName)
                    : table.getColumn(colName);
            if (column == null) {
                ErrorReport.reportAnalysisException(ErrorCode.ERR_WRONG_COLUMN_NAME,
                        colName, FeNameFormat.getColumnNameRegex());
            }
            if (StatisticsUtil.isUnsupportedType(column.getType())) {
                containsUnsupportedTytpe = true;
            }
        }
        if (containsUnsupportedTytpe) {
            if (ConnectContext.get() == null
                    || !ConnectContext.get().getSessionVariable().enableAnalyzeComplexTypeColumn) {
                columnNames = columnNames.stream()
                        .filter(c -> !StatisticsUtil.isUnsupportedType(table instanceof OlapTable
                            ? ((OlapTable) table).getVisibleColumn(c).getType()
                            : table.getColumn(c).getType()))
                        .collect(Collectors.toList());
            } else {
                throw new AnalysisException(
                        "Contains unsupported column type"
                                + "if you want to ignore them and analyze rest"
                                + "columns, please set session variable "
                                + "`ignore_column_with_complex_type` to true");
            }
        }
    }

    public String getCatalogName() {
        return tableName.getCtl();
    }

    public long getDbId() {
        return dbId;
    }

    public String getDBName() {
        return tableName.getDb();
    }

    public TableIf getTable() {
        return table;
    }

    public TableName getTblName() {
        return tableName;
    }

    public Set<String> getColumnNames() {
        return Sets.newHashSet(columnNames);
    }

    public Set<String> getPartitionNames() {
        if (partitionNames == null || partitionNames.getPartitionNames() == null || partitionNames.isStar()) {
            return Collections.emptySet();
        }
        Set<String> partitions = Sets.newHashSet();
        partitions.addAll(partitionNames.getPartitionNames());
        return partitions;
    }

    /**
     * @return for OLAP table, only in overwrite situation, overwrite auto detect partition
     *         for External table, all partitions.
     */
    public boolean isStarPartition() {
        if (partitionNames == null) {
            return false;
        }
        return partitionNames.isStar();
    }

    public long getPartitionCount() {
        if (partitionNames == null) {
            return 0;
        }
        return partitionNames.getCount();
    }

    public boolean isPartitionOnly() {
        return partitionNames != null;
    }

    public boolean isSamplingPartition() {
        if (!(table instanceof HMSExternalTable) || partitionNames != null) {
            return false;
        }
        int partNum = ConnectContext.get().getSessionVariable().getExternalTableAnalyzePartNum();
        if (partNum == -1 || partitionNames != null) {
            return false;
        }
        return table instanceof HMSExternalTable && table.getPartitionNames().size() > partNum;
    }

    private void checkAnalyzePriv(String ctlName, String dbName, String tblName) throws AnalysisException {
        ConnectContext ctx = ConnectContext.get();
        // means it a system analyze
        if (ctx == null) {
            return;
        }
        if (!Env.getCurrentEnv().getAccessManager()
                .checkTblPriv(ctx, ctlName, dbName, tblName, PrivPredicate.SELECT)) {
            ErrorReport.reportAnalysisException(
                    ErrorCode.ERR_TABLEACCESS_DENIED_ERROR,
                    "ANALYZE",
                    ConnectContext.get().getQualifiedUser(),
                    ConnectContext.get().getRemoteIP(),
                    dbName + ": " + tblName);
        }
    }

    @Override
    public String toSql() {
        StringBuilder sb = new StringBuilder();
        sb.append("ANALYZE TABLE ");

        if (tableName != null) {
            sb.append(" ");
            sb.append(tableName.toSql());
        }

        if (columnNames != null) {
            sb.append("(");
            sb.append(StringUtils.join(columnNames, ","));
            sb.append(")");
        }

        if (getAnalysisType().equals(AnalysisType.HISTOGRAM)) {
            sb.append(" ");
            sb.append("UPDATE HISTOGRAM");
        }

        if (analyzeProperties != null) {
            sb.append(" ");
            sb.append(analyzeProperties.toSQL());
        }

        return sb.toString();
    }

    public Database getDb() throws AnalysisException {
        return analyzer.getEnv().getInternalCatalog().getDbOrAnalysisException(dbId);
    }

    public boolean isAllColumns() {
        return isAllColumns;
    }

    public long getCatalogId() {
        return catalogId;
    }
}