AnalyzeTblStmt.java
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.analysis;
import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.Database;
import org.apache.doris.catalog.DatabaseIf;
import org.apache.doris.catalog.Env;
import org.apache.doris.catalog.OlapTable;
import org.apache.doris.catalog.TableIf;
import org.apache.doris.catalog.View;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.common.ErrorCode;
import org.apache.doris.common.ErrorReport;
import org.apache.doris.common.FeNameFormat;
import org.apache.doris.common.UserException;
import org.apache.doris.datasource.CatalogIf;
import org.apache.doris.datasource.hive.HMSExternalTable;
import org.apache.doris.mysql.privilege.PrivPredicate;
import org.apache.doris.qe.ConnectContext;
import org.apache.doris.statistics.AnalysisInfo.AnalysisType;
import org.apache.doris.statistics.util.StatisticsUtil;
import com.google.common.collect.Sets;
import org.apache.commons.lang3.StringUtils;
import java.util.Collections;
import java.util.List;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
/**
* Column Statistics Collection Syntax:
* ANALYZE [ SYNC ] TABLE table_name
* [ PARTITIONS (partition_name [, ...])]
* [ (column_name [, ...]) ]
* [ [WITH SYNC] | [WITH INCREMENTAL] | [WITH SAMPLE PERCENT | ROWS ] ]
* [ PROPERTIES ('key' = 'value', ...) ];
* <p>
* Column histogram collection syntax:
* ANALYZE [ SYNC ] TABLE table_name
* [ partitions (partition_name [, ...])]
* [ (column_name [, ...]) ]
* UPDATE HISTOGRAM
* [ [ WITH SYNC ][ WITH INCREMENTAL ][ WITH SAMPLE PERCENT | ROWS ][ WITH BUCKETS ] ]
* [ PROPERTIES ('key' = 'value', ...) ];
* <p>
* Illustrate:
* - sync:Collect statistics synchronously. Return after collecting.
* - incremental:Collect statistics incrementally. Incremental collection of histogram statistics is not supported.
* - sample percent | rows:Collect statistics by sampling. Scale and number of rows can be sampled.
* - buckets:Specifies the maximum number of buckets generated when collecting histogram statistics.
* - table_name: The purpose table for collecting statistics. Can be of the form `db_name.table_name`.
* - partition_name: The specified destination partition must be a partition that exists in `table_name`,
* - column_name: The specified destination column must be a column that exists in `table_name`,
* and multiple column names are separated by commas.
* - properties:Properties used to set statistics tasks. Currently only the following configurations
* are supported (equivalent to the with statement)
* - 'sync' = 'true'
* - 'incremental' = 'true'
* - 'sample.percent' = '50'
* - 'sample.rows' = '1000'
* - 'num.buckets' = 10
*/
public class AnalyzeTblStmt extends AnalyzeStmt implements NotFallbackInParser {
// The properties passed in by the user through "with" or "properties('K', 'V')"
private final TableName tableName;
private List<String> columnNames;
private PartitionNames partitionNames;
private boolean isAllColumns;
// after analyzed
private long catalogId;
private long dbId;
private TableIf table;
public AnalyzeTblStmt(TableName tableName,
PartitionNames partitionNames,
List<String> columnNames,
AnalyzeProperties properties) {
super(properties);
this.tableName = tableName;
this.partitionNames = partitionNames;
this.columnNames = columnNames;
this.analyzeProperties = properties;
this.isAllColumns = columnNames == null;
}
public AnalyzeTblStmt(AnalyzeProperties analyzeProperties, TableName tableName, List<String> columnNames, long dbId,
TableIf table) throws AnalysisException {
super(analyzeProperties);
this.tableName = tableName;
this.columnNames = columnNames;
this.dbId = dbId;
this.table = table;
this.isAllColumns = columnNames == null;
String catalogName = tableName.getCtl();
CatalogIf catalog = Env.getCurrentEnv().getCatalogMgr()
.getCatalogOrAnalysisException(catalogName);
this.catalogId = catalog.getId();
}
@Override
@SuppressWarnings({"rawtypes"})
public void analyze(Analyzer analyzer) throws UserException {
super.analyze(analyzer);
tableName.analyze(analyzer);
String catalogName = tableName.getCtl();
String dbName = tableName.getDb();
String tblName = tableName.getTbl();
CatalogIf catalog = analyzer.getEnv().getCatalogMgr()
.getCatalogOrAnalysisException(catalogName);
this.catalogId = catalog.getId();
DatabaseIf db = catalog.getDbOrAnalysisException(dbName);
dbId = db.getId();
table = db.getTableOrAnalysisException(tblName);
isAllColumns = columnNames == null;
check();
}
public void check() throws AnalysisException {
if (table instanceof View) {
throw new AnalysisException("Analyze view is not allowed");
}
checkAnalyzePriv(tableName.getCtl(), tableName.getDb(), tableName.getTbl());
if (columnNames == null) {
columnNames = table.getSchemaAllIndexes(false).stream()
// Filter unsupported type columns.
.filter(c -> !StatisticsUtil.isUnsupportedType(c.getType()))
.map(Column::getName)
.collect(Collectors.toList());
} else {
table.readLock();
try {
List<String> baseSchema = table.getSchemaAllIndexes(false)
.stream().map(Column::getName).collect(Collectors.toList());
Optional<String> optional = columnNames.stream()
.filter(entity -> !baseSchema.contains(entity)).findFirst();
if (optional.isPresent()) {
String columnName = optional.get();
ErrorReport.reportAnalysisException(ErrorCode.ERR_BAD_FIELD_ERROR, columnName, tableName.getTbl());
}
checkColumn();
} finally {
table.readUnlock();
}
}
analyzeProperties.check();
if (analyzeProperties.isSync()
&& (analyzeProperties.isAutomatic() || analyzeProperties.getPeriodTimeInMs() != 0)) {
throw new AnalysisException("Automatic/Period statistics collection "
+ "and synchronous statistics collection cannot be set at same time");
}
if (analyzeProperties.isAutomatic() && analyzeProperties.getPeriodTimeInMs() != 0) {
throw new AnalysisException("Automatic collection "
+ "and period statistics collection cannot be set at same time");
}
if (analyzeProperties.isSample() && analyzeProperties.forceFull()) {
throw new AnalysisException("Impossible to analyze with sample and full simultaneously");
}
}
private void checkColumn() throws AnalysisException {
boolean containsUnsupportedTytpe = false;
for (String colName : columnNames) {
Column column = table instanceof OlapTable
? ((OlapTable) table).getVisibleColumn(colName)
: table.getColumn(colName);
if (column == null) {
ErrorReport.reportAnalysisException(ErrorCode.ERR_WRONG_COLUMN_NAME,
colName, FeNameFormat.getColumnNameRegex());
}
if (StatisticsUtil.isUnsupportedType(column.getType())) {
containsUnsupportedTytpe = true;
}
}
if (containsUnsupportedTytpe) {
if (ConnectContext.get() == null
|| !ConnectContext.get().getSessionVariable().enableAnalyzeComplexTypeColumn) {
columnNames = columnNames.stream()
.filter(c -> !StatisticsUtil.isUnsupportedType(table instanceof OlapTable
? ((OlapTable) table).getVisibleColumn(c).getType()
: table.getColumn(c).getType()))
.collect(Collectors.toList());
} else {
throw new AnalysisException(
"Contains unsupported column type"
+ "if you want to ignore them and analyze rest"
+ "columns, please set session variable "
+ "`ignore_column_with_complex_type` to true");
}
}
}
public String getCatalogName() {
return tableName.getCtl();
}
public long getDbId() {
return dbId;
}
public String getDBName() {
return tableName.getDb();
}
public TableIf getTable() {
return table;
}
public TableName getTblName() {
return tableName;
}
public Set<String> getColumnNames() {
return Sets.newHashSet(columnNames);
}
public Set<String> getPartitionNames() {
if (partitionNames == null || partitionNames.getPartitionNames() == null || partitionNames.isStar()) {
return Collections.emptySet();
}
Set<String> partitions = Sets.newHashSet();
partitions.addAll(partitionNames.getPartitionNames());
return partitions;
}
/**
* @return for OLAP table, only in overwrite situation, overwrite auto detect partition
* for External table, all partitions.
*/
public boolean isStarPartition() {
if (partitionNames == null) {
return false;
}
return partitionNames.isStar();
}
public long getPartitionCount() {
if (partitionNames == null) {
return 0;
}
return partitionNames.getCount();
}
public boolean isPartitionOnly() {
return partitionNames != null;
}
public boolean isSamplingPartition() {
if (!(table instanceof HMSExternalTable) || partitionNames != null) {
return false;
}
int partNum = ConnectContext.get().getSessionVariable().getExternalTableAnalyzePartNum();
if (partNum == -1 || partitionNames != null) {
return false;
}
return table instanceof HMSExternalTable && table.getPartitionNames().size() > partNum;
}
private void checkAnalyzePriv(String ctlName, String dbName, String tblName) throws AnalysisException {
ConnectContext ctx = ConnectContext.get();
// means it a system analyze
if (ctx == null) {
return;
}
if (!Env.getCurrentEnv().getAccessManager()
.checkTblPriv(ctx, ctlName, dbName, tblName, PrivPredicate.SELECT)) {
ErrorReport.reportAnalysisException(
ErrorCode.ERR_TABLEACCESS_DENIED_ERROR,
"ANALYZE",
ConnectContext.get().getQualifiedUser(),
ConnectContext.get().getRemoteIP(),
dbName + ": " + tblName);
}
}
@Override
public String toSql() {
StringBuilder sb = new StringBuilder();
sb.append("ANALYZE TABLE ");
if (tableName != null) {
sb.append(" ");
sb.append(tableName.toSql());
}
if (columnNames != null) {
sb.append("(");
sb.append(StringUtils.join(columnNames, ","));
sb.append(")");
}
if (getAnalysisType().equals(AnalysisType.HISTOGRAM)) {
sb.append(" ");
sb.append("UPDATE HISTOGRAM");
}
if (analyzeProperties != null) {
sb.append(" ");
sb.append(analyzeProperties.toSQL());
}
return sb.toString();
}
public Database getDb() throws AnalysisException {
return analyzer.getEnv().getInternalCatalog().getDbOrAnalysisException(dbId);
}
public boolean isAllColumns() {
return isAllColumns;
}
public long getCatalogId() {
return catalogId;
}
}