Repeat.java
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.nereids.trees.plans.algebra;
import org.apache.doris.nereids.exceptions.AnalysisException;
import org.apache.doris.nereids.trees.expressions.Alias;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.NamedExpression;
import org.apache.doris.nereids.trees.expressions.VirtualSlotReference;
import org.apache.doris.nereids.trees.expressions.functions.scalar.GroupingScalarFunction;
import org.apache.doris.nereids.trees.plans.Plan;
import org.apache.doris.nereids.types.BigIntType;
import org.apache.doris.nereids.util.BitUtils;
import org.apache.doris.nereids.util.ExpressionUtils;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import org.apache.commons.lang3.StringUtils;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
/**
* Common interface for logical/physical Repeat.
*/
public interface Repeat<CHILD_PLAN extends Plan> extends Aggregate<CHILD_PLAN> {
String COL_GROUPING_ID = "GROUPING_ID";
String GROUPING_PREFIX = "GROUPING_PREFIX_";
List<List<Expression>> getGroupingSets();
List<NamedExpression> getOutputExpressions();
@Override
default List<Expression> getGroupByExpressions() {
return ExpressionUtils.flatExpressions(getGroupingSets());
}
@Override
default Aggregate<CHILD_PLAN> pruneOutputs(List<NamedExpression> prunedOutputs) {
// just output reserved outputs and COL_GROUPING_ID for repeat correctly.
ImmutableList.Builder<NamedExpression> outputBuilder
= ImmutableList.builderWithExpectedSize(prunedOutputs.size() + 1);
outputBuilder.addAll(prunedOutputs);
for (NamedExpression output : getOutputExpressions()) {
Set<VirtualSlotReference> v = output.collect(VirtualSlotReference.class::isInstance);
if (v.stream().anyMatch(slot -> slot.getName().equals(COL_GROUPING_ID))) {
outputBuilder.add(output);
}
}
// prune groupingSets, if parent operator do not need some exprs in grouping sets, we removed it.
// this could not lead to wrong result because be repeat other columns by normal.
ImmutableList.Builder<List<Expression>> groupingSetsBuilder
= ImmutableList.builderWithExpectedSize(getGroupingSets().size());
for (List<Expression> groupingSet : getGroupingSets()) {
ImmutableList.Builder<Expression> groupingSetBuilder
= ImmutableList.builderWithExpectedSize(groupingSet.size());
for (Expression expr : groupingSet) {
if (prunedOutputs.contains(expr)) {
groupingSetBuilder.add(expr);
}
}
groupingSetsBuilder.add(groupingSetBuilder.build());
}
return withGroupSetsAndOutput(groupingSetsBuilder.build(), outputBuilder.build());
}
Repeat<CHILD_PLAN> withGroupSetsAndOutput(List<List<Expression>> groupingSets,
List<NamedExpression> outputExpressions);
static VirtualSlotReference generateVirtualGroupingIdSlot() {
return new VirtualSlotReference(COL_GROUPING_ID, BigIntType.INSTANCE, Optional.empty(),
GroupingSetShapes::computeVirtualGroupingIdValue);
}
static VirtualSlotReference generateVirtualSlotByFunction(GroupingScalarFunction function) {
return new VirtualSlotReference(
generateVirtualSlotName(function), function.getDataType(), Optional.of(function),
function::computeVirtualSlotValue);
}
/**
* get common grouping set expressions.
* e.g. grouping sets((a, b, c), (b, c), (c))
* the common expressions is [c]
*/
default Set<Expression> getCommonGroupingSetExpressions() {
List<List<Expression>> groupingSets = getGroupingSets();
Iterator<List<Expression>> iterator = groupingSets.iterator();
Set<Expression> commonGroupingExpressions = Sets.newLinkedHashSet(iterator.next());
while (iterator.hasNext()) {
commonGroupingExpressions =
Sets.intersection(commonGroupingExpressions, Sets.newLinkedHashSet(iterator.next()));
if (commonGroupingExpressions.isEmpty()) {
break;
}
}
return commonGroupingExpressions;
}
/**
* getSortedVirtualSlots: order by virtual GROUPING_ID slot first.
*/
default Set<VirtualSlotReference> getSortedVirtualSlots() {
Set<VirtualSlotReference> virtualSlots =
ExpressionUtils.collect(getOutputExpressions(), VirtualSlotReference.class::isInstance);
VirtualSlotReference virtualGroupingSetIdSlot = virtualSlots.stream()
.filter(slot -> slot.getName().equals(COL_GROUPING_ID))
.findFirst()
.get();
return ImmutableSet.<VirtualSlotReference>builder()
.add(virtualGroupingSetIdSlot)
.addAll(Sets.difference(virtualSlots, ImmutableSet.of(virtualGroupingSetIdSlot)))
.build();
}
/**
* computeVirtualSlotValues. backend will fill this long value to the VirtualSlotRef
*/
default List<List<Long>> computeVirtualSlotValues(Set<VirtualSlotReference> sortedVirtualSlots) {
GroupingSetShapes shapes = toShapes();
return sortedVirtualSlots.stream()
.map(virtualSlot -> virtualSlot.getComputeLongValueMethod().apply(shapes))
.collect(ImmutableList.toImmutableList());
}
/**
* flatten the grouping sets and build to a GroupingSetShapes.
*/
default GroupingSetShapes toShapes() {
Set<Expression> flattenGroupingSet = ImmutableSet.copyOf(ExpressionUtils.flatExpressions(getGroupingSets()));
List<GroupingSetShape> shapes = Lists.newArrayList();
for (List<Expression> groupingSet : getGroupingSets()) {
List<Boolean> shouldBeErasedToNull = Lists.newArrayListWithCapacity(flattenGroupingSet.size());
for (Expression groupingSetExpression : flattenGroupingSet) {
shouldBeErasedToNull.add(!groupingSet.contains(groupingSetExpression));
}
shapes.add(new GroupingSetShape(shouldBeErasedToNull));
}
return new GroupingSetShapes(flattenGroupingSet, shapes);
}
/**
* Generate repeat slot id list corresponding to SlotId according to the original grouping sets
* and the actual SlotId.
*
* eg: groupingSets=((b, a), (a)), output=[a, b]
* slotId in the outputTuple: [3, 4]
*
* return: [(4, 3), (3)]
*/
default List<Set<Integer>> computeRepeatSlotIdList(List<Integer> slotIdList) {
List<Set<Integer>> groupingSetsIndexesInOutput = getGroupingSetsIndexesInOutput();
List<Set<Integer>> repeatSlotIdList = Lists.newArrayList();
for (Set<Integer> groupingSetIndex : groupingSetsIndexesInOutput) {
// keep order
Set<Integer> repeatSlotId = Sets.newLinkedHashSet();
for (Integer exprInOutputIndex : groupingSetIndex) {
repeatSlotId.add(slotIdList.get(exprInOutputIndex));
}
repeatSlotIdList.add(repeatSlotId);
}
return repeatSlotIdList;
}
/**
* getGroupingSetsIndexesInOutput: find the location where the grouping output exists
*
* e.g. groupingSets=((b, a), (a)), output=[a, b]
* return ((1, 0), (1))
*/
default List<Set<Integer>> getGroupingSetsIndexesInOutput() {
Map<Expression, Integer> indexMap = indexesOfOutput();
List<Set<Integer>> groupingSetsIndex = Lists.newArrayList();
List<List<Expression>> groupingSets = getGroupingSets();
for (List<Expression> groupingSet : groupingSets) {
// keep the index order
Set<Integer> groupingSetIndex = Sets.newLinkedHashSet();
for (Expression expression : groupingSet) {
Integer index = indexMap.get(expression);
if (index == null) {
throw new AnalysisException("Can not find grouping set expression in output: " + expression);
}
groupingSetIndex.add(index);
}
groupingSetsIndex.add(groupingSetIndex);
}
return groupingSetsIndex;
}
/**
* indexesOfOutput: get the indexes which mapping from the expression to the index in the output.
*
* e.g. output=[a + 1, b + 2, c]
*
* return the map(
* `a + 1`: 0,
* `b + 2`: 1,
* `c`: 2
* )
*/
default Map<Expression, Integer> indexesOfOutput() {
Map<Expression, Integer> indexes = Maps.newLinkedHashMap();
List<NamedExpression> outputs = getOutputExpressions();
for (int i = 0; i < outputs.size(); i++) {
NamedExpression output = outputs.get(i);
indexes.put(output, i);
if (output instanceof Alias) {
indexes.put(((Alias) output).child(), i);
}
}
return indexes;
}
static String generateVirtualSlotName(GroupingScalarFunction function) {
String colName = function.getArguments()
.stream()
.map(Expression::toSql)
.collect(Collectors.joining("_"));
return GROUPING_PREFIX + colName;
}
/** GroupingSetShapes */
class GroupingSetShapes {
public final List<Expression> flattenGroupingSetExpression;
public final List<GroupingSetShape> shapes;
public GroupingSetShapes(Set<Expression> flattenGroupingSetExpression, List<GroupingSetShape> shapes) {
this.flattenGroupingSetExpression = ImmutableList.copyOf(flattenGroupingSetExpression);
this.shapes = ImmutableList.copyOf(shapes);
}
/**compute a long value that backend need to fill to the GROUPING_ID slot*/
public List<Long> computeVirtualGroupingIdValue() {
Set<Long> res = Sets.newLinkedHashSet();
long k = (long) Math.pow(2, flattenGroupingSetExpression.size());
for (GroupingSetShape shape : shapes) {
Long val = shape.computeLongValue();
while (res.contains(val)) {
val += k;
}
res.add(val);
}
return ImmutableList.copyOf(res);
}
public int indexOf(Expression expression) {
return flattenGroupingSetExpression.indexOf(expression);
}
@Override
public String toString() {
String exprs = StringUtils.join(flattenGroupingSetExpression, ", ");
return "GroupingSetShapes(flattenGroupingSetExpression=" + exprs + ", shapes=" + shapes + ")";
}
}
/**
* GroupingSetShape is used to compute which group column should be erased to null,
* and as the computation source of grouping() / grouping_id() function.
*
* for example: this grouping sets will create 3 group sets
* <pre>
* select b, a
* from tbl
* group by
* grouping sets
* (
* (a, b) -- GroupingSetShape(shouldBeErasedToNull=[false, false])
* ( b) -- GroupingSetShape(shouldBeErasedToNull=[true, false])
* ( ) -- GroupingSetShape(shouldBeErasedToNull=[true, true])
* )
* </pre>
*/
class GroupingSetShape {
List<Boolean> shouldBeErasedToNull;
public GroupingSetShape(List<Boolean> shouldBeErasedToNull) {
this.shouldBeErasedToNull = shouldBeErasedToNull;
}
public boolean shouldBeErasedToNull(int index) {
return shouldBeErasedToNull.get(index);
}
/**
* convert shouldBeErasedToNull to bits, combine the bits to long,
* backend will set the column to null if the bit is 1.
*
* The compute method, e.g.
* shouldBeErasedToNull = [false, true, true, true] means [0, 1, 1, 1],
* we combine the bits of big endian to long value 7.
*
* The example in class comment:
* grouping sets
* (
* (a, b) -- [0, 0], to long value is 0
* ( b) -- [1, 0], to long value is 2
* ( ) -- [1, 1], to long value is 3
* )
*/
public Long computeLongValue() {
return BitUtils.bigEndianBitsToLong(shouldBeErasedToNull);
}
@Override
public String toString() {
String shouldBeErasedToNull = StringUtils.join(this.shouldBeErasedToNull, ", ");
return "GroupingSetShape(shouldBeErasedToNull=" + shouldBeErasedToNull + ")";
}
}
}