StatisticsJobAppender.java

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.

package org.apache.doris.statistics;

import org.apache.doris.analysis.TableName;
import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.Database;
import org.apache.doris.catalog.Env;
import org.apache.doris.catalog.OlapTable;
import org.apache.doris.catalog.Table;
import org.apache.doris.catalog.TableIf;
import org.apache.doris.common.Pair;
import org.apache.doris.common.util.MasterDaemon;
import org.apache.doris.datasource.InternalCatalog;
import org.apache.doris.statistics.util.StatisticsUtil;

import com.google.common.annotations.VisibleForTesting;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Queue;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;

public class StatisticsJobAppender extends MasterDaemon {

    private static final Logger LOG = LogManager.getLogger(StatisticsJobAppender.class);

    public static final long INTERVAL = 1000;
    public static final int JOB_MAP_SIZE = 100;
    public static final int TABLE_BATCH_SIZE = 100;

    private long currentDbId = 0;
    private long currentTableId = 0;
    private long lastRoundFinishTime = 0;
    private final long lowJobIntervalMs = TimeUnit.MINUTES.toMillis(1);

    public StatisticsJobAppender() {
        super("Statistics Job Appender", INTERVAL);
    }

    @Override
    protected void runAfterCatalogReady() {
        if (!StatisticsUtil.enableAutoAnalyze()) {
            return;
        }
        if (!Env.getCurrentEnv().isMaster()) {
            return;
        }
        if (!StatisticsUtil.statsTblAvailable()) {
            LOG.info("Stats table not available, skip");
            return;
        }
        if (Env.getCurrentEnv().getStatisticsAutoCollector() == null
                || !Env.getCurrentEnv().getStatisticsAutoCollector().isReady()) {
            LOG.info("Statistics auto collector not ready, skip");
            return;
        }
        if (Env.isCheckpointThread()) {
            return;
        }
        appendJobs();
    }

    protected void appendJobs() {
        AnalysisManager manager = Env.getCurrentEnv().getAnalysisManager();
        appendColumnsToJobs(manager.highPriorityColumns, manager.highPriorityJobs);
        appendColumnsToJobs(manager.midPriorityColumns, manager.midPriorityJobs);
        if (StatisticsUtil.enableAutoAnalyzeInternalCatalog()) {
            appendToLowJobs(manager.lowPriorityJobs, manager.veryLowPriorityJobs);
        }
    }

    protected void appendColumnsToJobs(Queue<QueryColumn> columnQueue, Map<TableName, Set<Pair<String, String>>> jobs) {
        int size = columnQueue.size();
        int processed = 0;
        for (int i = 0; i < size; i++) {
            QueryColumn column = columnQueue.poll();
            if (column == null) {
                continue;
            }
            TableIf table;
            try {
                table = StatisticsUtil.findTable(column.catalogId, column.dbId, column.tblId);
            } catch (Exception e) {
                LOG.warn("Fail to find table {}.{}.{} for column {}",
                        column.catalogId, column.dbId, column.tblId, column.colName);
                continue;
            }
            if (StatisticConstants.SYSTEM_DBS.contains(table.getDatabase().getFullName())) {
                continue;
            }
            Column col = table.getColumn(column.colName);
            if (col == null || !col.isVisible() || StatisticsUtil.isUnsupportedType(col.getType())) {
                continue;
            }
            Set<Pair<String, String>> columnIndexPairs = table.getColumnIndexPairs(
                    Collections.singleton(column.colName)).stream()
                    .filter(p -> StatisticsUtil.needAnalyzeColumn(table, p))
                    .collect(Collectors.toSet());
            if (columnIndexPairs.isEmpty()) {
                continue;
            }
            TableName tableName = new TableName(table.getDatabase().getCatalog().getName(),
                    table.getDatabase().getFullName(), table.getName());
            synchronized (jobs) {
                // If job map reach the upper limit, stop putting new jobs.
                if (!jobs.containsKey(tableName) && jobs.size() >= JOB_MAP_SIZE) {
                    LOG.info("High or mid job map full.");
                    break;
                }
                if (jobs.containsKey(tableName)) {
                    jobs.get(tableName).addAll(columnIndexPairs);
                } else {
                    jobs.put(tableName, columnIndexPairs);
                }
            }
            processed++;
        }
        if (size > 0 && LOG.isDebugEnabled()) {
            LOG.debug("{} of {} columns append to jobs", processed, size);
        }
    }

    protected void appendToLowJobs(Map<TableName, Set<Pair<String, String>>> lowPriorityJobs,
                                   Map<TableName, Set<Pair<String, String>>> veryLowPriorityJobs) {
        if (System.currentTimeMillis() - lastRoundFinishTime < lowJobIntervalMs) {
            return;
        }
        InternalCatalog catalog = Env.getCurrentInternalCatalog();
        List<Long> sortedDbs = catalog.getDbIds().stream().sorted().collect(Collectors.toList());
        int processed = 0;
        for (long dbId : sortedDbs) {
            if (dbId < currentDbId || catalog.getDbNullable(dbId) == null
                    || StatisticConstants.SYSTEM_DBS.contains(catalog.getDbNullable(dbId).getFullName())) {
                continue;
            }
            currentDbId = dbId;
            Optional<Database> db = catalog.getDb(dbId);
            if (!db.isPresent()) {
                continue;
            }
            List<Table> tables = db.get().getTables().stream()
                    .sorted((t1, t2) -> (int) (t1.getId() - t2.getId())).collect(Collectors.toList());
            for (Table t : tables) {
                if (!(t instanceof OlapTable) || t.getId() <= currentTableId) {
                    continue;
                }
                if (t.getBaseSchema().size() > StatisticsUtil.getAutoAnalyzeTableWidthThreshold()) {
                    continue;
                }
                Set<String> columns = t.getSchemaAllIndexes(false).stream()
                        .filter(c -> !StatisticsUtil.isUnsupportedType(c.getType()))
                        .map(Column::getName).collect(Collectors.toSet());
                Set<Pair<String, String>> columnIndexPairs = t.getColumnIndexPairs(columns)
                        .stream().filter(p -> StatisticsUtil.needAnalyzeColumn(t, p))
                        .collect(Collectors.toSet());
                TableName tableName = new TableName(t.getDatabase().getCatalog().getName(),
                        t.getDatabase().getFullName(), t.getName());
                // Append to low job map first.
                if (!columnIndexPairs.isEmpty()) {
                    boolean appended = doAppend(lowPriorityJobs, columnIndexPairs, tableName);
                    // If low job map is full, stop this iteration.
                    if (!appended) {
                        LOG.debug("Low Priority job map is full.");
                        return;
                    }
                } else {
                    // Append to very low job map.
                    columnIndexPairs = t.getColumnIndexPairs(columns)
                        .stream().filter(p -> StatisticsUtil.isLongTimeColumn(t, p))
                        .collect(Collectors.toSet());
                    if (!columnIndexPairs.isEmpty()) {
                        boolean appended = doAppend(veryLowPriorityJobs, columnIndexPairs, tableName);
                        // If very low job map is full, simply ignore it and go to the next table.
                        if (!appended) {
                            LOG.debug("Very low Priority job map is full.");
                        }
                    }
                }
                currentTableId = t.getId();
                if (++processed >= TABLE_BATCH_SIZE) {
                    return;
                }
            }
        }
        // All tables have been processed once, reset for the next loop.
        if (LOG.isDebugEnabled()) {
            LOG.debug("All low priority internal tables are appended once.");
        }
        currentDbId = 0;
        currentTableId = 0;
        lastRoundFinishTime = System.currentTimeMillis();
    }

    @VisibleForTesting
    public boolean doAppend(Map<TableName, Set<Pair<String, String>>> jobMap,
                         Set<Pair<String, String>> columnIndexPairs,
                         TableName tableName) {
        synchronized (jobMap) {
            if (!jobMap.containsKey(tableName) && jobMap.size() >= JOB_MAP_SIZE) {
                return false;
            }
            if (jobMap.containsKey(tableName)) {
                jobMap.get(tableName).addAll(columnIndexPairs);
            } else {
                jobMap.put(tableName, columnIndexPairs);
            }
        }
        return true;
    }

    // For unit test only.
    public void setLastRoundFinishTime(long value) {
        lastRoundFinishTime = value;
    }

}