InitJoinOrder.java

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.

package org.apache.doris.nereids.rules.rewrite;

import org.apache.doris.nereids.rules.Rule;
import org.apache.doris.nereids.rules.RuleType;
import org.apache.doris.nereids.rules.rewrite.StatsDerive.DeriveContext;
import org.apache.doris.nereids.trees.plans.AbstractPlan;
import org.apache.doris.nereids.trees.plans.JoinType;
import org.apache.doris.nereids.trees.plans.Plan;
import org.apache.doris.nereids.trees.plans.logical.LogicalJoin;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * Due to the limitation on the data size in the memo, when optimizing large SQL queries, once this
 * limitation is triggered, some subtrees of the plan tree may not undergo optimization. Therefore,
 * we need to set a reasonably good initial join order before optimizing the plan tree.
 */
public class InitJoinOrder extends OneRewriteRuleFactory {
    private static final Logger LOG = LoggerFactory.getLogger(InitJoinOrder.class);
    private static final double SWAP_THRESHOLD = 0.1;
    private final StatsDerive derive = new StatsDerive(false);

    @Override
    public Rule build() {
        return logicalJoin()
                .whenNot(LogicalJoin::isMarkJoin)
                .thenApply(ctx -> {
                    if (ctx.statementContext.getConnectContext().getSessionVariable().isDisableJoinReorder()
                            || !ctx.statementContext.getConnectContext().getSessionVariable().enableInitJoinOrder
                            || ctx.cascadesContext.isLeadingDisableJoinReorder()
                            || ((LogicalJoin<?, ?>) ctx.root).isLeadingJoin()) {
                        return null;
                    }
                    LogicalJoin<? extends Plan, ? extends Plan> join = (LogicalJoin<?, ?>) ctx.root;
                    return swapJoinChildrenIfNeed(join);
                })
                .toRule(RuleType.INIT_JOIN_ORDER);
    }

    private Plan swapJoinChildrenIfNeed(LogicalJoin<? extends Plan, ? extends Plan> join) {
        if (join.getJoinType().isLeftSemiOrAntiJoin()) {
            // TODO: currently, the transform rules for right semi/anti join is not complete,
            //  for example LogicalJoinSemiJoinTransposeProject (tpch 22) only works for left semi/anti join
            //  if we swap left semi/anti to right semi/anti, we lost the opportunity to optimize join order
            return null;
        }
        JoinType swapType = join.getJoinType().swap();
        if (swapType == null) {
            return null;
        }
        AbstractPlan left = (AbstractPlan) join.left();
        AbstractPlan right = (AbstractPlan) join.right();
        if (left.getStats() == null) {
            left.accept(derive, new DeriveContext());
        }
        if (right.getStats() == null) {
            right.accept(derive, new DeriveContext());
        }

        if (left.getStats().getRowCount() < right.getStats().getRowCount() * SWAP_THRESHOLD) {
            join = join.withTypeChildren(swapType, right, left,
                    join.getJoinReorderContext());
            return join;
        }
        return null;
    }

}