StatisticRange.java
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.statistics;
import org.apache.doris.analysis.LiteralExpr;
import org.apache.doris.common.Pair;
import org.apache.doris.nereids.types.DataType;
import java.util.Objects;
public class StatisticRange {
private static final double INFINITE_TO_FINITE_RANGE_INTERSECT_OVERLAP_HEURISTIC_FACTOR = 0.25;
private static final double INFINITE_TO_INFINITE_RANGE_INTERSECT_OVERLAP_HEURISTIC_FACTOR = 0.5;
/**
* {@code NaN} represents empty range ({@code high} must be {@code NaN} too)
*/
private final double low;
private final LiteralExpr lowExpr;
/**
* {@code NaN} represents empty range ({@code low} must be {@code NaN} too)
*/
private final double high;
private final LiteralExpr highExpr;
private final double distinctValues;
private final DataType dataType;
private final boolean isEmpty;
public StatisticRange(double low, LiteralExpr lowExpr, double high, LiteralExpr highExpr,
double distinctValues, DataType dataType) {
this(low, lowExpr, high, highExpr, distinctValues, dataType, false);
}
private StatisticRange(double low, LiteralExpr lowExpr, double high, LiteralExpr highExpr,
double distinctValues, DataType dataType, boolean isEmpty) {
this.low = low;
this.lowExpr = lowExpr;
this.high = high;
this.highExpr = highExpr;
this.distinctValues = distinctValues;
this.dataType = dataType;
this.isEmpty = isEmpty;
}
public LiteralExpr getLowExpr() {
return lowExpr;
}
public LiteralExpr getHighExpr() {
return highExpr;
}
public DataType getDataType() {
return dataType;
}
public double overlapPercentWith(StatisticRange other) {
Objects.requireNonNull(other, "other is null");
if (this.isEmpty() || other.isEmpty() || this.distinctValues == 0 || other.distinctValues == 0) {
return 0.0; // zero is better than NaN as it will behave properly for calculating row count
}
if (this.equals(other) && !isBothInfinite()) {
return 1.0;
}
double lengthOfIntersect = dataType.rangeLength(Math.min(this.high, other.high), Math.max(this.low, other.low));
if (Double.isInfinite(lengthOfIntersect)) {
if (Double.isFinite(this.distinctValues) && Double.isFinite(other.distinctValues)) {
return Math.min(other.distinctValues / this.distinctValues, 1);
}
return INFINITE_TO_INFINITE_RANGE_INTERSECT_OVERLAP_HEURISTIC_FACTOR;
}
if (lengthOfIntersect == 0) {
return 1 / Math.max(this.distinctValues, 1);
}
if (lengthOfIntersect < 0) {
return 0;
}
double length = length();
if (Double.isInfinite(length)) {
return INFINITE_TO_FINITE_RANGE_INTERSECT_OVERLAP_HEURISTIC_FACTOR;
}
if (lengthOfIntersect > 0) {
return lengthOfIntersect / length;
}
return INFINITE_TO_FINITE_RANGE_INTERSECT_OVERLAP_HEURISTIC_FACTOR;
}
public static StatisticRange empty(DataType dataType) {
return new StatisticRange(Double.NEGATIVE_INFINITY, null, Double.POSITIVE_INFINITY,
null, 0, dataType, true);
}
public boolean isEmpty() {
return isEmpty;
}
public boolean isBothInfinite() {
return Double.isInfinite(low) && Double.isInfinite(high);
}
public boolean isInfinite() {
return Double.isInfinite(low) || Double.isInfinite(high);
}
public boolean isOneSideInfinite() {
return isInfinite() && !isBothInfinite();
}
public boolean isFinite() {
return Double.isFinite(low) && Double.isFinite(high);
}
public static StatisticRange from(ColumnStatistic colStats, DataType dataType) {
return new StatisticRange(colStats.minValue, colStats.minExpr, colStats.maxValue, colStats.maxExpr,
colStats.ndv, dataType);
}
public double getLow() {
return low;
}
public double getHigh() {
return high;
}
public double length() {
return dataType.rangeLength(this.high, this.low);
}
public StatisticRange intersect(StatisticRange other) {
return intersect(other, false);
}
public StatisticRange intersect(StatisticRange other, boolean partial) {
Pair<Double, LiteralExpr> biggerLow = maxPair(low, lowExpr, other.low, other.lowExpr);
double newLow = biggerLow.first;
LiteralExpr newLowExpr = biggerLow.second;
Pair<Double, LiteralExpr> smallerHigh = minPair(high, highExpr, other.high, other.highExpr);
double newHigh = smallerHigh.first;
LiteralExpr newHighExpr = smallerHigh.second;
if (newLow <= newHigh) {
double distinctValues = overlappingDistinctValues(other, partial);
return new StatisticRange(newLow, newLowExpr, newHigh, newHighExpr, distinctValues, dataType);
}
return empty(dataType);
}
public Pair<Double, LiteralExpr> minPair(double r1, LiteralExpr e1, double r2, LiteralExpr e2) {
if (r1 < r2) {
return Pair.of(r1, e1);
}
return Pair.of(r2, e2);
}
public Pair<Double, LiteralExpr> maxPair(double r1, LiteralExpr e1, double r2, LiteralExpr e2) {
if (r1 > r2) {
return Pair.of(r1, e1);
}
return Pair.of(r2, e2);
}
public StatisticRange union(StatisticRange other) {
double overlapPercentThis = this.overlapPercentWith(other);
double overlapPercentOther = other.overlapPercentWith(this);
double overlapNDVThis = overlapPercentThis * distinctValues;
double overlapNDVOther = overlapPercentOther * other.distinctValues;
double maxOverlapNDV = Math.max(overlapNDVThis, overlapNDVOther);
double newNDV = maxOverlapNDV + ((1 - overlapPercentThis) * distinctValues)
+ ((1 - overlapPercentOther) * other.distinctValues);
Pair<Double, LiteralExpr> smallerMin = minPair(low, lowExpr, other.low, other.lowExpr);
Pair<Double, LiteralExpr> biggerHigh = maxPair(high, highExpr, other.high, other.highExpr);
return new StatisticRange(smallerMin.first, smallerMin.second,
biggerHigh.first, biggerHigh.second, newNDV, dataType);
}
private double overlappingDistinctValues(StatisticRange other, boolean partial) {
double overlapDistinctValuesLeft;
if (other.isInfinite() || this.isInfinite()) {
overlapDistinctValuesLeft = distinctValues * INFINITE_TO_INFINITE_RANGE_INTERSECT_OVERLAP_HEURISTIC_FACTOR;
} else if (Math.abs(this.low - this.high) < 1e-6) {
overlapDistinctValuesLeft = distinctValues;
} else {
double overlapPercentOfLeft = this.overlapPercentWith(other);
overlapDistinctValuesLeft = overlapPercentOfLeft * distinctValues;
}
if (partial) {
return overlapDistinctValuesLeft;
} else {
double overlapDistinctValuesRight;
if (this.isInfinite() || other.isInfinite()) {
overlapDistinctValuesRight = distinctValues
* INFINITE_TO_INFINITE_RANGE_INTERSECT_OVERLAP_HEURISTIC_FACTOR;
} else if (Math.abs(other.low - other.high) < 1e-6) {
// other is constant
overlapDistinctValuesRight = distinctValues;
} else {
double overlapPercentOfRight = other.overlapPercentWith(this);
overlapDistinctValuesRight = overlapPercentOfRight * other.distinctValues;
}
return minExcludeNaN(overlapDistinctValuesLeft, overlapDistinctValuesRight);
}
}
public static double minExcludeNaN(double v1, double v2) {
if (Double.isNaN(v1)) {
return v2;
}
if (Double.isNaN(v2)) {
return v1;
}
return Math.min(v1, v2);
}
public static double maxExcludeNaN(double v1, double v2) {
if (Double.isNaN(v1)) {
return v2;
}
if (Double.isNaN(v2)) {
return v1;
}
return Math.max(v1, v2);
}
public double getDistinctValues() {
return distinctValues;
}
@Override
public String toString() {
return "range=(" + lowExpr + "," + highExpr + "), ndv=" + distinctValues;
}
}