PartitionColumnStatistic.java
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.statistics;
import org.apache.doris.analysis.LiteralExpr;
import org.apache.doris.catalog.Column;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.common.io.Hll;
import org.apache.doris.statistics.util.Hll128;
import org.apache.doris.statistics.util.StatisticsUtil;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.io.ByteArrayInputStream;
import java.io.DataInputStream;
import java.io.IOException;
import java.util.Base64;
import java.util.List;
import java.util.StringJoiner;
public class PartitionColumnStatistic {
private static final Logger LOG = LogManager.getLogger(PartitionColumnStatistic.class);
public static PartitionColumnStatistic UNKNOWN = new PartitionColumnStatisticBuilder(1).setAvgSizeByte(1)
.setNdv(new Hll128()).setNumNulls(1).setMaxValue(Double.POSITIVE_INFINITY)
.setMinValue(Double.NEGATIVE_INFINITY)
.setIsUnknown(true).setUpdatedTime("")
.build();
public final double count;
public final Hll128 ndv;
public final double numNulls;
public final double dataSize;
public final double avgSizeByte;
public final double minValue;
public final double maxValue;
public final boolean isUnKnown;
public final LiteralExpr minExpr;
public final LiteralExpr maxExpr;
public final String updatedTime;
public PartitionColumnStatistic(double count, Hll128 ndv, double avgSizeByte,
double numNulls, double dataSize, double minValue, double maxValue,
LiteralExpr minExpr, LiteralExpr maxExpr, boolean isUnKnown,
String updatedTime) {
this.count = count;
this.ndv = ndv;
this.avgSizeByte = avgSizeByte;
this.numNulls = numNulls;
this.dataSize = dataSize;
this.minValue = minValue;
this.maxValue = maxValue;
this.minExpr = minExpr;
this.maxExpr = maxExpr;
this.isUnKnown = isUnKnown;
this.updatedTime = updatedTime;
}
public static PartitionColumnStatistic fromResultRow(List<ResultRow> resultRows) throws IOException {
if (resultRows == null || resultRows.isEmpty()) {
return PartitionColumnStatistic.UNKNOWN;
}
// This should never happen. resultRows should be empty or contain only 1 result row.
if (resultRows.size() > 1) {
StringJoiner stringJoiner = new StringJoiner("][", "[", "]");
for (ResultRow row : resultRows) {
stringJoiner.add(row.toString());
}
LOG.warn("Partition stats has more than one row, please drop stats and analyze again. {}",
stringJoiner.toString());
return PartitionColumnStatistic.UNKNOWN;
}
return fromResultRow(resultRows.get(0));
}
public static PartitionColumnStatistic fromResultRow(ResultRow row) throws IOException {
// row : [catalog_id, db_id, tbl_id, idx_id, part_name, col_id,
// count, ndv, null_count, min, max, data_size, update_time]
long catalogId = Long.parseLong(row.get(0));
long dbID = Long.parseLong(row.get(1));
long tblId = Long.parseLong(row.get(2));
long idxId = Long.parseLong(row.get(3));
String colName = row.get(5);
Column col = StatisticsUtil.findColumn(catalogId, dbID, tblId, idxId, colName);
if (col == null) {
LOG.info("Failed to deserialize column statistics, ctlId: {} dbId: {}, "
+ "tblId: {} column: {} not exists", catalogId, dbID, tblId, colName);
return PartitionColumnStatistic.UNKNOWN;
}
double count = Double.parseDouble(row.get(6));
PartitionColumnStatisticBuilder partitionStatisticBuilder = new PartitionColumnStatisticBuilder(count);
String ndv = row.get(7);
Base64.Decoder decoder = Base64.getDecoder();
DataInputStream dis = new DataInputStream(new ByteArrayInputStream(decoder.decode(ndv)));
Hll hll = new Hll();
if (!hll.deserialize(dis)) {
LOG.warn("Failed to deserialize ndv. [{}]", row);
return PartitionColumnStatistic.UNKNOWN;
}
partitionStatisticBuilder.setNdv(Hll128.fromHll(hll));
String nullCount = row.getWithDefault(8, "0");
partitionStatisticBuilder.setNumNulls(Double.parseDouble(nullCount));
partitionStatisticBuilder.setDataSize(Double
.parseDouble(row.getWithDefault(11, "0")));
partitionStatisticBuilder.setAvgSizeByte(partitionStatisticBuilder.getCount() == 0
? 0 : partitionStatisticBuilder.getDataSize()
/ partitionStatisticBuilder.getCount());
String min = row.get(9);
String max = row.get(10);
if (min != null && !"NULL".equalsIgnoreCase(min)) {
try {
partitionStatisticBuilder.setMinValue(StatisticsUtil.convertToDouble(col.getType(), min));
partitionStatisticBuilder.setMinExpr(StatisticsUtil.readableValue(col.getType(), min));
} catch (AnalysisException e) {
LOG.warn("Failed to deserialize column {} min value {}.", col, min, e);
partitionStatisticBuilder.setMinValue(Double.NEGATIVE_INFINITY);
}
} else {
partitionStatisticBuilder.setMinValue(Double.NEGATIVE_INFINITY);
}
if (max != null && !"NULL".equalsIgnoreCase(max)) {
try {
partitionStatisticBuilder.setMaxValue(StatisticsUtil.convertToDouble(col.getType(), max));
partitionStatisticBuilder.setMaxExpr(StatisticsUtil.readableValue(col.getType(), max));
} catch (AnalysisException e) {
LOG.warn("Failed to deserialize column {} max value {}.", col, max, e);
partitionStatisticBuilder.setMaxValue(Double.POSITIVE_INFINITY);
}
} else {
partitionStatisticBuilder.setMaxValue(Double.POSITIVE_INFINITY);
}
partitionStatisticBuilder.setUpdatedTime(row.get(12));
return partitionStatisticBuilder.build();
}
}