Analyzer.java
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.nereids.jobs.executor;
import org.apache.doris.nereids.CascadesContext;
import org.apache.doris.nereids.jobs.rewrite.RewriteJob;
import org.apache.doris.nereids.rules.analysis.AdjustAggregateNullableForEmptySet;
import org.apache.doris.nereids.rules.analysis.AnalyzeCTE;
import org.apache.doris.nereids.rules.analysis.BindExpression;
import org.apache.doris.nereids.rules.analysis.BindRelation;
import org.apache.doris.nereids.rules.analysis.BindSink;
import org.apache.doris.nereids.rules.analysis.CheckAfterBind;
import org.apache.doris.nereids.rules.analysis.CheckAnalysis;
import org.apache.doris.nereids.rules.analysis.CheckPolicy;
import org.apache.doris.nereids.rules.analysis.CollectJoinConstraint;
import org.apache.doris.nereids.rules.analysis.CollectSubQueryAlias;
import org.apache.doris.nereids.rules.analysis.CompressedMaterialize;
import org.apache.doris.nereids.rules.analysis.EliminateDistinctConstant;
import org.apache.doris.nereids.rules.analysis.EliminateLogicalPreAggOnHint;
import org.apache.doris.nereids.rules.analysis.EliminateLogicalSelectHint;
import org.apache.doris.nereids.rules.analysis.FillUpMissingSlots;
import org.apache.doris.nereids.rules.analysis.FillUpQualifyMissingSlot;
import org.apache.doris.nereids.rules.analysis.HavingToFilter;
import org.apache.doris.nereids.rules.analysis.LeadingJoin;
import org.apache.doris.nereids.rules.analysis.NormalizeAggregate;
import org.apache.doris.nereids.rules.analysis.NormalizeGenerate;
import org.apache.doris.nereids.rules.analysis.NormalizeRepeat;
import org.apache.doris.nereids.rules.analysis.OneRowRelationExtractAggregate;
import org.apache.doris.nereids.rules.analysis.ProjectToGlobalAggregate;
import org.apache.doris.nereids.rules.analysis.ProjectWithDistinctToAggregate;
import org.apache.doris.nereids.rules.analysis.QualifyToFilter;
import org.apache.doris.nereids.rules.analysis.ReplaceExpressionByChildOutput;
import org.apache.doris.nereids.rules.analysis.SubqueryToApply;
import org.apache.doris.nereids.rules.analysis.VariableToLiteral;
import org.apache.doris.nereids.rules.rewrite.SemiJoinCommute;
import org.apache.doris.nereids.rules.rewrite.SimplifyAggGroupBy;
import org.apache.doris.nereids.trees.plans.logical.LogicalCTEAnchor;
import org.apache.doris.nereids.trees.plans.logical.LogicalView;
import com.google.common.collect.ImmutableSet;
import java.util.List;
/**
* Bind symbols according to metadata in the catalog, perform semantic analysis, etc.
* TODO: revisit the interface after subquery analysis is supported.
*/
public class Analyzer extends AbstractBatchJobExecutor {
public static final List<RewriteJob> ANALYZE_JOBS = buildAnalyzeJobs();
/**
* constructor of Analyzer. For view, we only do bind relation since other analyze step will do by outer Analyzer.
*
* @param cascadesContext current context for analyzer
*/
public Analyzer(CascadesContext cascadesContext) {
super(cascadesContext);
}
@Override
public List<RewriteJob> getJobs() {
return ANALYZE_JOBS;
}
/**
* nereids analyze sql.
*/
public void analyze() {
execute();
}
private static List<RewriteJob> buildAnalyzeJobs() {
return notTraverseChildrenOf(
ImmutableSet.of(LogicalView.class, LogicalCTEAnchor.class),
Analyzer::buildAnalyzerJobs
);
}
private static List<RewriteJob> buildAnalyzerJobs() {
return jobs(
// we should eliminate hint before "Subquery unnesting".
topDown(new AnalyzeCTE()),
topDown(new EliminateLogicalSelectHint(),
new EliminateLogicalPreAggOnHint()),
bottomUp(
new BindRelation(),
new CheckPolicy()
),
bottomUp(new BindExpression()),
topDown(new BindSink()),
bottomUp(new CheckAfterBind()),
topDown(new FillUpQualifyMissingSlot()),
bottomUp(
new ProjectToGlobalAggregate(),
// this rule check's the logicalProject node's isDistinct property
// and replace the logicalProject node with a LogicalAggregate node
// so any rule before this, if create a new logicalProject node
// should make sure isDistinct property is correctly passed around.
// please see rule BindSlotReference or BindFunction for example
new EliminateDistinctConstant(),
new ProjectWithDistinctToAggregate(),
new ReplaceExpressionByChildOutput(),
new OneRowRelationExtractAggregate()
),
topDown(
new FillUpMissingSlots(),
// We should use NormalizeRepeat to compute nullable properties for LogicalRepeat in the analysis
// stage. NormalizeRepeat will compute nullable property, add virtual slot, LogicalAggregate and
// LogicalProject for normalize. This rule depends on FillUpMissingSlots to fill up slots.
new NormalizeRepeat()
),
bottomUp(new AdjustAggregateNullableForEmptySet()),
// consider sql with user defined var @t_zone
// set @t_zone='GMT';
// SELECT
// DATE_FORMAT(convert_tz(dt, time_zone, @t_zone),'%Y-%m-%d') day
// FROM
// t
// GROUP BY
// 1;
// @t_zone must be replaced as 'GMT' before EliminateGroupByConstant and NormalizeAggregate rule.
// So need run VariableToLiteral rule before the two rules.
topDown(new VariableToLiteral()),
// run CheckAnalysis before EliminateGroupByConstant in order to report error message correctly like bellow
// select SUM(lo_tax) FROM lineorder group by 1;
// errCode = 2, detailMessage = GROUP BY expression must not contain aggregate functions: sum(lo_tax)
bottomUp(new CheckAnalysis()),
topDown(new SimplifyAggGroupBy()),
bottomUp(new CompressedMaterialize()),
topDown(new NormalizeAggregate()),
topDown(new HavingToFilter()),
topDown(new QualifyToFilter()),
bottomUp(new SemiJoinCommute()),
bottomUp(
new CollectSubQueryAlias(),
new CollectJoinConstraint()
),
topDown(new LeadingJoin()),
bottomUp(new NormalizeGenerate()),
bottomUp(new SubqueryToApply())
/*
* Notice, MergeProjects rule should NOT be placed after SubqueryToApply in analyze phase.
* because in SubqueryToApply, we may add assert_true function with subquery output slot in projects list.
* on the other hand, the assert_true function should be not be in final output.
* in order to keep the plan unchanged, we add a new project node to prune the extra assert_true slot.
* but MergeProjects rule will merge the two projects and keep assert_true anyway.
* so we move MergeProjects from analyze to rewrite phase.
*/
);
}
}