QueryColumnCollector.java
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.nereids.rules.expression;
import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.Env;
import org.apache.doris.catalog.TableIf;
import org.apache.doris.nereids.jobs.JobContext;
import org.apache.doris.nereids.rules.expression.QueryColumnCollector.CollectorContext;
import org.apache.doris.nereids.trees.expressions.NamedExpression;
import org.apache.doris.nereids.trees.expressions.Slot;
import org.apache.doris.nereids.trees.expressions.SlotReference;
import org.apache.doris.nereids.trees.plans.Plan;
import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate;
import org.apache.doris.nereids.trees.plans.logical.LogicalCatalogRelation;
import org.apache.doris.nereids.trees.plans.logical.LogicalFileScan;
import org.apache.doris.nereids.trees.plans.logical.LogicalFilter;
import org.apache.doris.nereids.trees.plans.logical.LogicalHaving;
import org.apache.doris.nereids.trees.plans.logical.LogicalJoin;
import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan;
import org.apache.doris.nereids.trees.plans.logical.LogicalProject;
import org.apache.doris.nereids.trees.plans.logical.LogicalWindow;
import org.apache.doris.nereids.trees.plans.visitor.CustomRewriter;
import org.apache.doris.nereids.trees.plans.visitor.DefaultPlanRewriter;
import org.apache.doris.qe.ConnectContext;
import org.apache.doris.statistics.AnalysisManager;
import org.apache.doris.statistics.util.StatisticsUtil;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
/**
* Used to collect query column.
*/
public class QueryColumnCollector extends DefaultPlanRewriter<CollectorContext> implements CustomRewriter {
@Override
public Plan rewriteRoot(Plan plan, JobContext jobContext) {
ConnectContext connectContext = ConnectContext.get();
if (connectContext != null && connectContext.getSessionVariable().internalSession) {
return plan;
}
CollectorContext context = new CollectorContext();
plan.accept(this, context);
if (StatisticsUtil.enableAutoAnalyze()) {
context.midPriority.removeAll(context.highPriority);
AnalysisManager analysisManager = Env.getCurrentEnv().getAnalysisManager();
analysisManager.updateHighPriorityColumn(context.highPriority);
analysisManager.updateMidPriorityColumn(context.midPriority);
}
return plan;
}
/**
* Context.
*/
public static class CollectorContext {
public Map<Slot/*project output column*/, NamedExpression/*Actual project expr*/> projects = new HashMap<>();
public Set<Slot> highPriority = new HashSet<>();
public Set<Slot> midPriority = new HashSet<>();
}
@Override
public Plan visitLogicalProject(LogicalProject<? extends Plan> project, CollectorContext context) {
project.child().accept(this, context);
List<NamedExpression> projects = project.getOutputs();
List<Slot> slots = project.computeOutput();
for (int i = 0; i < slots.size(); i++) {
context.projects.put(slots.get(i), projects.get(i));
}
if (project.child() instanceof LogicalCatalogRelation
|| project.child() instanceof LogicalFilter
&& ((LogicalFilter<?>) project.child()).child() instanceof LogicalCatalogRelation) {
Set<Slot> allUsed = project.getExpressions()
.stream().flatMap(e -> e.<SlotReference>collect(SlotReference.class::isInstance).stream())
.collect(Collectors.toSet());
LogicalCatalogRelation scan = project.child() instanceof LogicalCatalogRelation
? (LogicalCatalogRelation) project.child()
: (LogicalCatalogRelation) project.child().child(0);
List<Slot> outputOfScan = scan.getOutput();
for (Slot slot : outputOfScan) {
if (!allUsed.contains(slot)) {
context.midPriority.remove(slot);
}
}
}
return project;
}
@Override
public Plan visitLogicalJoin(LogicalJoin<? extends Plan, ? extends Plan> join, CollectorContext context) {
join.child(0).accept(this, context);
join.child(1).accept(this, context);
context.highPriority.addAll(
(join.isMarkJoin() ? join.getLeftConditionSlot() : join.getConditionSlot())
.stream().flatMap(s -> backtrace(s, context).stream())
.collect(Collectors.toSet())
);
return join;
}
@Override
public Plan visitLogicalAggregate(LogicalAggregate<? extends Plan> aggregate, CollectorContext context) {
aggregate.child(0).accept(this, context);
context.highPriority.addAll(aggregate.getGroupByExpressions()
.stream()
.flatMap(e -> e.<SlotReference>collect(SlotReference.class::isInstance).stream())
.flatMap(s -> backtrace(s, context).stream())
.collect(Collectors.toSet()));
return aggregate;
}
@Override
public Plan visitLogicalHaving(LogicalHaving<? extends Plan> having, CollectorContext context) {
having.child(0).accept(this, context);
context.highPriority.addAll(
having.getExpressions().stream()
.flatMap(e -> e.<SlotReference>collect(SlotReference.class::isInstance).stream())
.flatMap(s -> backtrace(s, context).stream())
.collect(Collectors.toSet()));
return having;
}
@Override
public Plan visitLogicalOlapScan(LogicalOlapScan olapScan, CollectorContext context) {
List<Slot> slots = olapScan.getOutput();
context.midPriority.addAll(slots);
return olapScan;
}
@Override
public Plan visitLogicalFileScan(LogicalFileScan fileScan, CollectorContext context) {
List<Slot> slots = fileScan.getOutput();
context.midPriority.addAll(slots);
return fileScan;
}
@Override
public Plan visitLogicalFilter(LogicalFilter<? extends Plan> filter, CollectorContext context) {
filter.child(0).accept(this, context);
context.highPriority.addAll(filter
.getExpressions()
.stream()
.flatMap(e -> e.<SlotReference>collect(SlotReference.class::isInstance).stream())
.flatMap(s -> backtrace(s, context).stream())
.collect(Collectors.toSet()));
return filter;
}
@Override
public Plan visitLogicalWindow(LogicalWindow<? extends Plan> window, CollectorContext context) {
window.child(0).accept(this, context);
context.highPriority.addAll(window
.getWindowExpressions()
.stream()
.flatMap(e -> e.<SlotReference>collect(SlotReference.class::isInstance).stream())
.flatMap(s -> backtrace(s, context).stream())
.collect(Collectors.toSet()));
return window;
}
private Set<Slot> backtrace(Slot slot, CollectorContext context) {
return backtrace(slot, new HashSet<>(), context);
}
private Set<Slot> backtrace(Slot slot, Set<Slot> path, CollectorContext context) {
if (path.contains(slot)) {
return Collections.emptySet();
}
path.add(slot);
if (slot instanceof SlotReference) {
SlotReference slotReference = (SlotReference) slot;
Optional<Column> col = slotReference.getOriginalColumn();
Optional<TableIf> table = slotReference.getOriginalTable();
if (col.isPresent() && table.isPresent()) {
return Collections.singleton(slot);
}
}
NamedExpression namedExpression = context.projects.get(slot);
if (namedExpression == null) {
return Collections.emptySet();
}
Set<SlotReference> slotReferences = namedExpression.collect(SlotReference.class::isInstance);
Set<Slot> refCol = new HashSet<>();
for (SlotReference slotReference : slotReferences) {
refCol.addAll(backtrace(slotReference, path, context));
}
return refCol;
}
}