ShowColumnStatsCommand.java
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.nereids.trees.plans.commands;
import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.DatabaseIf;
import org.apache.doris.catalog.Env;
import org.apache.doris.catalog.OlapTable;
import org.apache.doris.catalog.Partition;
import org.apache.doris.catalog.ScalarType;
import org.apache.doris.catalog.TableIf;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.common.ErrorCode;
import org.apache.doris.common.ErrorReport;
import org.apache.doris.common.Pair;
import org.apache.doris.common.util.Util;
import org.apache.doris.datasource.CatalogIf;
import org.apache.doris.mysql.privilege.PrivPredicate;
import org.apache.doris.nereids.trees.plans.PlanType;
import org.apache.doris.nereids.trees.plans.commands.info.PartitionNamesInfo;
import org.apache.doris.nereids.trees.plans.commands.info.TableNameInfo;
import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor;
import org.apache.doris.qe.ConnectContext;
import org.apache.doris.qe.ShowResultSet;
import org.apache.doris.qe.ShowResultSetMetaData;
import org.apache.doris.qe.StmtExecutor;
import org.apache.doris.statistics.AnalysisManager;
import org.apache.doris.statistics.ColStatsMeta;
import org.apache.doris.statistics.ColumnStatistic;
import org.apache.doris.statistics.PartitionColumnStatistic;
import org.apache.doris.statistics.PartitionColumnStatisticCacheKey;
import org.apache.doris.statistics.ResultRow;
import org.apache.doris.statistics.StatisticsRepository;
import org.apache.doris.statistics.TableStatsMeta;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.stream.Collectors;
/**
* Represents the command for SHOW COLUMN STATS.
*/
public class ShowColumnStatsCommand extends ShowCommand {
private static final Logger LOG = LogManager.getLogger(ShowColumnStatsCommand.class);
private static final ImmutableList<String> TABLE_COLUMN_TITLE_NAMES =
new ImmutableList.Builder<String>()
.add("column_name")
.add("index_name")
.add("count")
.add("ndv")
.add("num_null")
.add("data_size")
.add("avg_size_byte")
.add("min")
.add("max")
.add("method")
.add("type")
.add("trigger")
.add("query_times")
.add("updated_time")
.add("update_rows")
.add("last_analyze_row_count")
.add("last_analyze_version")
.add("hot_values")
.build();
private static final ImmutableList<String> PARTITION_COLUMN_TITLE_NAMES =
new ImmutableList.Builder<String>()
.add("column_name")
.add("partition_name")
.add("index_name")
.add("count")
.add("ndv")
.add("num_null")
.add("min")
.add("max")
.add("data_size")
.add("updated_time")
.add("update_rows")
.add("trigger")
.build();
private final TableNameInfo tableName;
private final List<String> columnNames;
private final PartitionNamesInfo partitionNames;
private final boolean isCached;
private TableIf table;
public ShowColumnStatsCommand(TableNameInfo tableName, List<String> columnNames,
PartitionNamesInfo partitionSpec, boolean isCached) {
super(PlanType.SHOW_COLUMN_STATS_COMMAND);
this.tableName = Objects.requireNonNull(tableName, "Table name cannot be null");
this.columnNames = columnNames;
this.partitionNames = partitionSpec;
this.isCached = isCached;
}
private void validate(ConnectContext ctx) throws Exception {
tableName.analyze(ctx);
if (partitionNames != null) {
partitionNames.validate();
}
CatalogIf<DatabaseIf> catalog = Env.getCurrentEnv().getCatalogMgr().getCatalog(tableName.getCtl());
if (catalog == null) {
ErrorReport.reportAnalysisException("Catalog: %s not exists", tableName.getCtl());
}
DatabaseIf<TableIf> db = catalog.getDb(tableName.getDb()).orElse(null);
if (db == null) {
ErrorReport.reportAnalysisException("DB: %s not exists", tableName.getDb());
}
table = db.getTable(tableName.getTbl()).orElse(null);
if (table == null) {
ErrorReport.reportAnalysisException("Table: %s not exists", tableName.getTbl());
}
if (!Env.getCurrentEnv().getAccessManager()
.checkTblPriv(ConnectContext.get(), tableName.getCtl(), tableName.getDb(), tableName.getTbl(),
PrivPredicate.SHOW)) {
ErrorReport.reportAnalysisException(ErrorCode.ERR_TABLEACCESS_DENIED_ERROR, "Permission denied",
ConnectContext.get().getQualifiedUser(), ConnectContext.get().getRemoteIP(),
tableName.getDb() + ": " + tableName.getTbl());
}
if (columnNames != null) {
for (String name : columnNames) {
if (table.getColumn(name) == null) {
ErrorReport.reportAnalysisException("Column: %s not exists", name);
}
}
}
}
private Map<PartitionColumnStatisticCacheKey, PartitionColumnStatistic> getCachedPartitionColumnStats(
Set<String> columnNames, List<String> partitionNames, TableIf tableIf, ConnectContext ctx) {
Map<PartitionColumnStatisticCacheKey, PartitionColumnStatistic> ret = new HashMap<>();
long catalogId = tableIf.getDatabase().getCatalog().getId();
long dbId = tableIf.getDatabase().getId();
long tableId = tableIf.getId();
for (String colName : columnNames) {
// Olap base index use -1 as index id.
List<Long> indexIds = Lists.newArrayList();
if (tableIf instanceof OlapTable) {
indexIds = ((OlapTable) tableIf).getMvColumnIndexIds(colName);
} else {
indexIds.add(-1L);
}
for (long indexId : indexIds) {
String indexName = tableIf.getName();
if (tableIf instanceof OlapTable) {
OlapTable olapTable = (OlapTable) tableIf;
indexName = olapTable.getIndexNameById(indexId == -1 ? olapTable.getBaseIndexId() : indexId);
}
if (indexName == null) {
continue;
}
for (String partName : partitionNames) {
PartitionColumnStatistic partitionStatistics = Env.getCurrentEnv().getStatisticsCache()
.getPartitionColumnStatistics(catalogId, dbId, tableId, indexId, partName, colName, ctx);
ret.put(new PartitionColumnStatisticCacheKey(catalogId, dbId, tableId, indexId, partName, colName),
partitionStatistics);
}
}
}
return ret;
}
private void getStatsForAllColumns(List<Pair<Pair<String, String>, ColumnStatistic>> columnStatistics,
TableIf tableIf) {
List<ResultRow> resultRows = StatisticsRepository.queryColumnStatisticsForTable(
tableIf.getDatabase().getCatalog().getId(), tableIf.getDatabase().getId(), tableIf.getId());
// row[4] is index id, row[5] is column name.
for (ResultRow row : resultRows) {
String indexName = tableIf.getName();
long indexId = Long.parseLong(row.get(4));
if (tableIf instanceof OlapTable) {
OlapTable olapTable = (OlapTable) tableIf;
indexName = olapTable.getIndexNameById(indexId == -1 ? olapTable.getBaseIndexId() : indexId);
}
if (indexName == null) {
continue;
}
try {
columnStatistics.add(Pair.of(Pair.of(indexName, row.get(5)), ColumnStatistic.fromResultRow(row)));
} catch (Exception e) {
LOG.warn("Failed to deserialize column statistics. reason: [{}]. Row [{}]", e.getMessage(), row);
if (LOG.isDebugEnabled()) {
LOG.debug(e);
}
}
}
}
private void getStatsForSpecifiedColumns(List<Pair<Pair<String, String>, ColumnStatistic>> columnStatistics,
Set<String> columnNames, TableIf tableIf, boolean showCache, ConnectContext ctx)
throws AnalysisException {
for (String colName : columnNames) {
// Olap base index use -1 as index id.
List<Long> indexIds = Lists.newArrayList();
if (tableIf instanceof OlapTable) {
indexIds = ((OlapTable) tableIf).getMvColumnIndexIds(colName);
} else {
indexIds.add(-1L);
}
for (long indexId : indexIds) {
String indexName = tableIf.getName();
if (tableIf instanceof OlapTable) {
OlapTable olapTable = (OlapTable) tableIf;
indexName = olapTable.getIndexNameById(indexId == -1 ? olapTable.getBaseIndexId() : indexId);
}
if (indexName == null) {
continue;
}
// Show column statistics in columnStatisticsCache.
ColumnStatistic columnStatistic;
if (showCache) {
columnStatistic = Env.getCurrentEnv().getStatisticsCache().getColumnStatistics(
tableIf.getDatabase().getCatalog().getId(),
tableIf.getDatabase().getId(), tableIf.getId(), indexId, colName, ctx);
} else {
columnStatistic = StatisticsRepository.queryColumnStatisticsByName(
tableIf.getDatabase().getCatalog().getId(),
tableIf.getDatabase().getId(), tableIf.getId(), indexId, colName);
}
columnStatistics.add(Pair.of(Pair.of(indexName, colName), columnStatistic));
}
}
}
@Override
public ShowResultSetMetaData getMetaData() {
ShowResultSetMetaData.Builder builder = ShowResultSetMetaData.builder();
ImmutableList<String> titles = partitionNames == null ? TABLE_COLUMN_TITLE_NAMES : PARTITION_COLUMN_TITLE_NAMES;
for (String title : titles) {
builder.addColumn(new Column(title, ScalarType.createVarchar(30)));
}
return builder.build();
}
private ShowResultSet constructResultSet(List<Pair<Pair<String, String>, ColumnStatistic>> columnStatistics) {
List<List<String>> result = Lists.newArrayList();
AnalysisManager analysisManager = Env.getCurrentEnv().getAnalysisManager();
columnStatistics.forEach(p -> {
if (p.second.isUnKnown) {
return;
}
List<String> row = Lists.newArrayList();
// p data structure is Pair<Pair<IndexName, ColumnName>, ColumnStatistic>
row.add(p.first.second);
row.add(Util.getTempTableDisplayName(p.first.first));
row.add(String.valueOf(p.second.count));
row.add(String.valueOf(p.second.ndv));
row.add(String.valueOf(p.second.numNulls));
row.add(String.valueOf(p.second.dataSize));
row.add(String.valueOf(p.second.avgSizeByte));
row.add(String.valueOf(p.second.minExpr == null ? "N/A" : p.second.minExpr.toSql()));
row.add(String.valueOf(p.second.maxExpr == null ? "N/A" : p.second.maxExpr.toSql()));
ColStatsMeta colStatsMeta = analysisManager.findColStatsMeta(table.getId(), p.first.first, p.first.second);
row.add(String.valueOf(colStatsMeta == null ? "N/A" : colStatsMeta.analysisMethod));
row.add(String.valueOf(colStatsMeta == null ? "N/A" : colStatsMeta.analysisType));
row.add(String.valueOf(colStatsMeta == null ? "N/A" : colStatsMeta.jobType));
row.add(String.valueOf(colStatsMeta == null ? "N/A" : colStatsMeta.queriedTimes));
row.add(String.valueOf(p.second.updatedTime));
row.add(String.valueOf(colStatsMeta == null ? "N/A" : colStatsMeta.updatedRows));
row.add(String.valueOf(colStatsMeta == null ? "N/A" : colStatsMeta.rowCount));
row.add(String.valueOf(colStatsMeta == null ? "N/A" : colStatsMeta.tableVersion));
row.add(String.valueOf(colStatsMeta == null ? "N/A" : p.second.getStringHotValues()));
result.add(row);
});
return new ShowResultSet(getMetaData(), result);
}
private ShowResultSet constructPartitionResultSet(List<ResultRow> resultRows, TableIf tableIf) {
List<List<String>> result = Lists.newArrayList();
for (ResultRow r : resultRows) {
List<String> row = Lists.newArrayList();
row.add(r.get(0)); // column_name
row.add(r.get(1)); // partition_name
long indexId = Long.parseLong(r.get(2));
String indexName = indexId == -1 ? tableIf.getName() : ((OlapTable) tableIf).getIndexNameById(indexId);
row.add(indexName); // index_name.
row.add(r.get(3)); // count
row.add(r.get(4)); // ndv
row.add(r.get(5)); // num_null
row.add(r.get(6)); // min
row.add(r.get(7)); // max
row.add(r.get(8)); // data_size
row.add(r.get(9)); // updated_time
String updateRows = "N/A";
TableStatsMeta tableStats = Env.getCurrentEnv().getAnalysisManager().findTableStatsStatus(tableIf.getId());
ColStatsMeta columnStatsMeta = null;
if (tableStats != null && tableIf instanceof OlapTable) {
OlapTable olapTable = (OlapTable) tableIf;
columnStatsMeta = tableStats.findColumnStatsMeta(indexName, r.get(0));
if (columnStatsMeta != null && columnStatsMeta.partitionUpdateRows != null) {
Partition partition = olapTable.getPartition(r.get(1));
if (partition != null) {
Long rows = columnStatsMeta.partitionUpdateRows.get(partition.getId());
if (rows != null) {
updateRows = rows.toString();
}
}
}
}
row.add(updateRows); // update_rows
row.add(columnStatsMeta == null ? "N/A" : columnStatsMeta.jobType.name()); // trigger. Manual or System
result.add(row);
}
return new ShowResultSet(getMetaData(), result);
}
private ShowResultSet constructPartitionCachedColumnStats(
Map<PartitionColumnStatisticCacheKey, PartitionColumnStatistic> resultMap, TableIf tableIf) {
List<List<String>> result = Lists.newArrayList();
for (Map.Entry<PartitionColumnStatisticCacheKey, PartitionColumnStatistic> entry : resultMap.entrySet()) {
PartitionColumnStatisticCacheKey key = entry.getKey();
PartitionColumnStatistic value = entry.getValue();
if (value == null || value.isUnKnown) {
continue;
}
List<String> row = Lists.newArrayList();
row.add(key.colName); // column_name
row.add(key.partId); // partition_name
long indexId = key.idxId;
String indexName = indexId == -1 ? tableIf.getName() : ((OlapTable) tableIf).getIndexNameById(indexId);
row.add(indexName); // index_name.
row.add(String.valueOf(value.count)); // count
row.add(String.valueOf(value.ndv.estimateCardinality())); // ndv
row.add(String.valueOf(value.numNulls)); // num_null
row.add(String.valueOf(value.minExpr == null ? "N/A" : value.minExpr.toSql())); // min
row.add(String.valueOf(value.maxExpr == null ? "N/A" : value.maxExpr.toSql())); // max
row.add(String.valueOf(value.dataSize)); // data_size
row.add(value.updatedTime); // updated_time
row.add("N/A"); // update_rows
row.add("N/A"); // trigger
result.add(row);
}
return new ShowResultSet(getMetaData(), result);
}
private PartitionNamesInfo getPartitionNames() {
return partitionNames;
}
private Set<String> getColumnNames() {
if (columnNames != null) {
return Sets.newHashSet(columnNames);
}
return table.getColumns().stream()
.map(Column::getName).collect(Collectors.toSet());
}
private boolean isAllColumns() {
return columnNames == null;
}
@Override
public ShowResultSet doRun(ConnectContext ctx, StmtExecutor executor) throws Exception {
ShowResultSet resultSet;
validate(ctx);
TableIf tableIf = table;
List<Pair<Pair<String, String>, ColumnStatistic>> columnStatistics = new ArrayList<>();
Set<String> columnNames = getColumnNames();
PartitionNamesInfo partitionNames = getPartitionNames();
boolean showCache = isCached;
boolean isAllColumns = isAllColumns();
if (partitionNames != null) {
List<String> partNames = partitionNames.getPartitionNames() == null
? new ArrayList<>(tableIf.getPartitionNames())
: partitionNames.getPartitionNames();
if (showCache) {
resultSet = this.constructPartitionCachedColumnStats(
getCachedPartitionColumnStats(columnNames, partNames, tableIf, ctx), tableIf);
} else {
List<ResultRow> partitionColumnStats =
StatisticsRepository.queryColumnStatisticsByPartitions(tableIf, columnNames, partNames);
resultSet = constructPartitionResultSet(partitionColumnStats, tableIf);
}
} else {
if (isAllColumns && !showCache) {
getStatsForAllColumns(columnStatistics, tableIf);
} else {
getStatsForSpecifiedColumns(columnStatistics, columnNames, tableIf, showCache, ctx);
}
resultSet = constructResultSet(columnStatistics);
}
return resultSet;
}
@Override
public <R, C> R accept(PlanVisitor<R, C> visitor, C context) {
return visitor.visitShowColumnStatsCommand(this, context);
}
}