AddMinMax.java

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.

package org.apache.doris.nereids.rules.expression.rules;

import org.apache.doris.nereids.rules.expression.ExpressionPatternMatcher;
import org.apache.doris.nereids.rules.expression.ExpressionPatternRuleFactory;
import org.apache.doris.nereids.rules.expression.ExpressionRewriteContext;
import org.apache.doris.nereids.rules.expression.ExpressionRuleType;
import org.apache.doris.nereids.rules.expression.rules.RangeInference.DiscreteValue;
import org.apache.doris.nereids.rules.expression.rules.RangeInference.EmptyValue;
import org.apache.doris.nereids.rules.expression.rules.RangeInference.RangeValue;
import org.apache.doris.nereids.rules.expression.rules.RangeInference.UnknownValue;
import org.apache.doris.nereids.rules.expression.rules.RangeInference.ValueDesc;
import org.apache.doris.nereids.trees.expressions.ComparisonPredicate;
import org.apache.doris.nereids.trees.expressions.CompoundPredicate;
import org.apache.doris.nereids.trees.expressions.EqualTo;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.GreaterThan;
import org.apache.doris.nereids.trees.expressions.GreaterThanEqual;
import org.apache.doris.nereids.trees.expressions.LessThan;
import org.apache.doris.nereids.trees.expressions.LessThanEqual;
import org.apache.doris.nereids.trees.expressions.SlotReference;
import org.apache.doris.nereids.trees.expressions.literal.BooleanLiteral;
import org.apache.doris.nereids.trees.expressions.literal.ComparableLiteral;
import org.apache.doris.nereids.trees.expressions.literal.Literal;
import org.apache.doris.nereids.util.ExpressionUtils;

import com.google.common.collect.BoundType;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Range;
import com.google.common.collect.Sets;
import org.apache.commons.lang3.NotImplementedException;

import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;

/**
 * This class implements the function to add min max to or expression.
 * for example:
 *
 * a > 10 and a < 20 or a > 30 and a < 40 or a > 50 and a < 60
 *   => (a < 20 or a > 30 and a < 40 or a > 50) and a > 10 and a < 60
 *
 * a between 10 and 20 and b between 10 and 20 or a between 100 and 200 and b between 100 and 200
 *   => (a <= 20 and b <= 20 or a >= 100 and b >= 100) and a >= 10 and a <= 200 and b >= 10 and b <= 200
 */
public class AddMinMax implements ExpressionPatternRuleFactory {
    public static final AddMinMax INSTANCE = new AddMinMax();

    @Override
    public List<ExpressionPatternMatcher<? extends Expression>> buildRules() {
        return ImmutableList.of(
                matchesTopType(CompoundPredicate.class)
                        .thenApply(ctx -> rewrite(ctx.expr, ctx.rewriteContext))
                        .toRule(ExpressionRuleType.ADD_MIN_MAX)
        );
    }

    /** rewrite */
    public Expression rewrite(CompoundPredicate expr, ExpressionRewriteContext context) {
        ValueDesc valueDesc = (new RangeInference()).getValue(expr, context);
        Map<Expression, MinMaxValue> exprMinMaxValues = getExprMinMaxValues(valueDesc);
        removeUnnecessaryMinMaxValues(expr, exprMinMaxValues);
        if (!exprMinMaxValues.isEmpty()) {
            return addExprMinMaxValues(expr, context, exprMinMaxValues);
        }
        return expr;
    }

    private enum MatchMinMax {
        MATCH_MIN,
        MATCH_MAX,
        MATCH_NONE,
    }

    private static class MinMaxValue {
        // min max range, if range = null means empty
        Range<ComparableLiteral> range;

        // expression in range is discrete value
        boolean isDiscrete;

        // expr relative order, for keep order after add min-max to the expression
        int exprOrderIndex;

        public MinMaxValue(Range<ComparableLiteral> range, boolean isDiscrete, int exprOrderIndex) {
            this.range = range;
            this.isDiscrete = isDiscrete;
            this.exprOrderIndex = exprOrderIndex;
        }
    }

    private void removeUnnecessaryMinMaxValues(Expression expr, Map<Expression, MinMaxValue> exprMinMaxValues) {
        exprMinMaxValues.entrySet().removeIf(entry -> entry.getValue().isDiscrete || entry.getValue().range == null
                || (!entry.getValue().range.hasLowerBound() && !entry.getValue().range.hasUpperBound()));
        if (exprMinMaxValues.isEmpty()) {
            return;
        }

        // keep original expression order, don't rewrite a sub expression if it's in original conjunctions.
        // example: if original expression is:  '(a >= 100) AND (...)',  and after visiting got a's range is [100, 200],
        // because 'a >= 100' is in expression's conjunctions, don't add 'a >= 100' to expression,
        // then the rewritten expression is '((a >= 100) AND (...)) AND (a <= 200)'
        List<Expression> conjuncts = ExpressionUtils.extractConjunction(expr);
        for (Expression conjunct : conjuncts) {
            List<Expression> disjunctions = ExpressionUtils.extractDisjunction(conjunct);
            if (disjunctions.isEmpty() || !(disjunctions.get(0) instanceof ComparisonPredicate)) {
                continue;
            }
            Expression targetExpr = disjunctions.get(0).child(0);
            boolean matchMin = false;
            boolean matchMax = false;
            for (Expression disjunction : disjunctions) {
                MatchMinMax match = getExprMatchMinMax(disjunction, exprMinMaxValues);
                if (match == MatchMinMax.MATCH_NONE || !disjunction.child(0).equals(targetExpr)) {
                    matchMin = false;
                    matchMax = false;
                    break;
                }
                if (match == MatchMinMax.MATCH_MIN) {
                    matchMin = true;
                } else if (match == MatchMinMax.MATCH_MAX) {
                    matchMax = true;
                }
            }
            MinMaxValue targetValue = exprMinMaxValues.get(targetExpr);
            if (matchMin) {
                // remove targetValue's lower bound
                if (targetValue.range.hasUpperBound()) {
                    targetValue.range = Range.upTo(targetValue.range.upperEndpoint(),
                        targetValue.range.upperBoundType());
                } else {
                    exprMinMaxValues.remove(targetExpr);
                }
            }
            if (matchMax) {
                // remove targetValue's upper bound
                if (targetValue.range.hasLowerBound()) {
                    targetValue.range = Range.downTo(targetValue.range.lowerEndpoint(),
                        targetValue.range.lowerBoundType());
                } else {
                    exprMinMaxValues.remove(targetExpr);
                }
            }
        }
    }

    private Expression addExprMinMaxValues(Expression expr, ExpressionRewriteContext context,
            Map<Expression, MinMaxValue> exprMinMaxValues) {
        List<Map.Entry<Expression, MinMaxValue>> minMaxExprs = exprMinMaxValues.entrySet().stream()
                .sorted((a, b) -> Integer.compare(a.getValue().exprOrderIndex, b.getValue().exprOrderIndex))
                .collect(Collectors.toList());
        List<Expression> addExprs = Lists.newArrayListWithExpectedSize(minMaxExprs.size() * 2);
        for (Map.Entry<Expression, MinMaxValue> entry : minMaxExprs) {
            Expression targetExpr = entry.getKey();
            Range<ComparableLiteral> range = entry.getValue().range;
            if (range.hasLowerBound() && range.hasUpperBound()
                    && range.lowerEndpoint().equals(range.upperEndpoint())
                    && range.lowerBoundType() == BoundType.CLOSED
                    && range.upperBoundType() == BoundType.CLOSED) {
                Expression cmp = new EqualTo(targetExpr, (Literal) range.lowerEndpoint());
                addExprs.add(cmp);
                continue;
            }
            if (range.hasLowerBound()) {
                ComparableLiteral literal = range.lowerEndpoint();
                Expression cmp = range.lowerBoundType() == BoundType.CLOSED
                        ? new GreaterThanEqual(targetExpr, (Literal) literal)
                        : new GreaterThan(targetExpr, (Literal) literal);
                addExprs.add(cmp);
            }
            if (range.hasUpperBound()) {
                ComparableLiteral literal = range.upperEndpoint();
                Expression cmp = range.upperBoundType() == BoundType.CLOSED
                        ? new LessThanEqual(targetExpr, (Literal) literal)
                        : new LessThan(targetExpr, (Literal) literal);
                addExprs.add(cmp);
            }
        }

        // later will add `addExprs` to original expr, before doing that, remove duplicate expr in original expr
        Expression replaceOriginExpr = replaceCmpMinMax(expr, Sets.newHashSet(addExprs));

        addExprs.add(0, replaceOriginExpr);
        Expression result = FoldConstantRuleOnFE.evaluate(ExpressionUtils.and(addExprs), context);
        if (result.equals(expr)) {
            return expr;
        }

        return result;
    }

    private Expression replaceCmpMinMax(Expression expr, Set<Expression> cmpMinMaxExprs) {
        // even if expr is nullable, replace it to true is ok because expression will 'AND' it later
        if (cmpMinMaxExprs.contains(expr)) {
            return BooleanLiteral.TRUE;
        }

        // only replace those expression whose all its ancestors are AND / OR
        if (!(expr instanceof CompoundPredicate)) {
            return expr;
        }

        ImmutableList.Builder<Expression> newChildren = ImmutableList.builderWithExpectedSize(expr.arity());
        boolean changed = false;
        for (Expression child : expr.children()) {
            Expression newChild = replaceCmpMinMax(child, cmpMinMaxExprs);
            if (child != newChild) {
                changed = true;
            }
            newChildren.add(newChild);
        }

        if (changed) {
            return expr.withChildren(newChildren.build());
        } else {
            return expr;
        }
    }

    private MatchMinMax getExprMatchMinMax(Expression expr,
            Map<Expression, MinMaxValue> exprMinMaxValues) {
        if (!(expr instanceof ComparisonPredicate)) {
            return MatchMinMax.MATCH_NONE;
        }

        ComparisonPredicate cp = (ComparisonPredicate) expr;
        Expression left = cp.left();
        Expression right = cp.right();
        if (!(right instanceof ComparableLiteral)) {
            return MatchMinMax.MATCH_NONE;
        }

        MinMaxValue value = exprMinMaxValues.get(left);
        if (value == null) {
            return MatchMinMax.MATCH_NONE;
        }

        if (cp instanceof GreaterThan || cp instanceof GreaterThanEqual) {
            if (value.range.hasLowerBound() && value.range.lowerEndpoint().equals(right)) {
                BoundType boundType = value.range.lowerBoundType();
                if ((boundType == BoundType.CLOSED && expr instanceof GreaterThanEqual)
                        || (boundType == BoundType.OPEN && expr instanceof GreaterThan)) {
                    return MatchMinMax.MATCH_MIN;
                }
            }
        } else if (expr instanceof LessThan || expr instanceof LessThanEqual) {
            if (value.range.hasUpperBound() && value.range.upperEndpoint().equals(right)) {
                BoundType boundType = value.range.upperBoundType();
                if ((boundType == BoundType.CLOSED && expr instanceof LessThanEqual)
                        || (boundType == BoundType.OPEN && expr instanceof LessThan)) {
                    return MatchMinMax.MATCH_MAX;
                }
            }
        }

        return MatchMinMax.MATCH_NONE;
    }

    private boolean isExprNeedAddMinMax(Expression expr) {
        return (expr instanceof SlotReference) && ((SlotReference) expr).getOriginalColumn().isPresent();
    }

    private Map<Expression, MinMaxValue> getExprMinMaxValues(ValueDesc value) {
        if (value instanceof EmptyValue) {
            return getExprMinMaxValues((EmptyValue) value);
        } else if (value instanceof DiscreteValue) {
            return getExprMinMaxValues((DiscreteValue) value);
        } else if (value instanceof RangeValue) {
            return getExprMinMaxValues((RangeValue) value);
        } else if (value instanceof UnknownValue) {
            return getExprMinMaxValues((UnknownValue) value);
        } else {
            throw new NotImplementedException("not implements");
        }
    }

    private Map<Expression, MinMaxValue> getExprMinMaxValues(EmptyValue value) {
        Expression reference = value.getReference();
        Map<Expression, MinMaxValue> exprMinMaxValues = Maps.newHashMap();
        if (isExprNeedAddMinMax(reference)) {
            exprMinMaxValues.put(reference, new MinMaxValue(null, true, 0));
        }
        return exprMinMaxValues;
    }

    private Map<Expression, MinMaxValue> getExprMinMaxValues(DiscreteValue value) {
        Expression reference = value.getReference();
        Map<Expression, MinMaxValue> exprMinMaxValues = Maps.newHashMap();
        if (isExprNeedAddMinMax(reference)) {
            exprMinMaxValues.put(reference, new MinMaxValue(Range.encloseAll(value.getValues()), true, 0));
        }
        return exprMinMaxValues;
    }

    private Map<Expression, MinMaxValue> getExprMinMaxValues(RangeValue value) {
        Expression reference = value.getReference();
        Map<Expression, MinMaxValue> exprMinMaxValues = Maps.newHashMap();
        if (isExprNeedAddMinMax(reference)) {
            exprMinMaxValues.put(reference, new MinMaxValue(value.getRange(), false, 0));
        }
        return exprMinMaxValues;
    }

    private Map<Expression, MinMaxValue> getExprMinMaxValues(UnknownValue valueDesc) {
        List<ValueDesc> sourceValues = valueDesc.getSourceValues();
        if (sourceValues.isEmpty()) {
            return Maps.newHashMap();
        }
        Map<Expression, MinMaxValue> result = Maps.newHashMap(getExprMinMaxValues(sourceValues.get(0)));
        int nextExprOrderIndex = result.values().stream().mapToInt(k -> k.exprOrderIndex).max().orElse(0);
        for (int i = 1; i < sourceValues.size(); i++) {
            // process in sourceValues[i]
            Map<Expression, MinMaxValue> minMaxValues = getExprMinMaxValues(sourceValues.get(i));
            // merge values of sourceValues[i] into result.
            // also keep the value's relative order in sourceValues[i].
            // for example, if a and b in sourceValues[i], but not in result, then during merging,
            // a and b will assign a new exprOrderIndex (using nextExprOrderIndex).
            // if in sourceValues[i], a's exprOrderIndex < b's exprOrderIndex,
            // then make sure in result, a's new exprOrderIndex < b's new exprOrderIndex.
            // so that their relative order can preserve.
            List<Map.Entry<Expression, MinMaxValue>> minMaxValueList = minMaxValues.entrySet().stream()
                    .sorted((a, b) -> Integer.compare(a.getValue().exprOrderIndex, b.getValue().exprOrderIndex))
                    .collect(Collectors.toList());
            for (Map.Entry<Expression, MinMaxValue> entry : minMaxValueList) {
                Expression expr = entry.getKey();
                MinMaxValue value = result.get(expr);
                MinMaxValue otherValue = entry.getValue();
                if (valueDesc.isAnd()) {
                    if (value == null) { // value = null means range for all
                        nextExprOrderIndex++;
                        value = otherValue;
                        value.exprOrderIndex = nextExprOrderIndex;
                        result.put(expr, value);
                    } else if (otherValue.range == null) { // range = null means empty range
                        value.range = null;
                    } else if (value.range != null) {
                        if (value.range.isConnected(otherValue.range)) {
                            Range<ComparableLiteral> newRange = value.range.intersection(otherValue.range);
                            if (!newRange.isEmpty()) {
                                value.range = newRange;
                                // If newRange.lowerEndpoint().equals(newRange.upperEndpoint()),
                                // then isDiscrete should be true.
                                // But no need to do that because AddMinMax will not handle discrete value cases.
                                value.isDiscrete = value.isDiscrete && otherValue.isDiscrete;
                            } else {
                                value.range = null;
                            }
                        } else {
                            value.range = null;
                        }
                    }
                } else {
                    if (value == null) { // value = null means range for all
                        nextExprOrderIndex++;
                        value = new MinMaxValue(Range.all(), false, nextExprOrderIndex);
                        result.put(expr, value);
                    } else if (value.range == null) { // range = null means empty range
                        value.range = otherValue.range;
                        value.isDiscrete = otherValue.isDiscrete;
                    } else if (otherValue.range != null) {
                        value.range = value.range.span(otherValue.range);
                        value.isDiscrete = value.isDiscrete && otherValue.isDiscrete;
                    }
                }
            }

            // process not in sourceValues[i]
            if (!valueDesc.isAnd()) {
                for (Map.Entry<Expression, MinMaxValue> entry : result.entrySet()) {
                    Expression expr = entry.getKey();
                    MinMaxValue value = entry.getValue();
                    if (!minMaxValues.containsKey(expr)) {
                        value.range = Range.all();
                        value.isDiscrete = false;
                    }
                }
            }
        }
        return result;
    }
}