Diagnoser.java

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.

package org.apache.doris.system;

import org.apache.doris.catalog.Database;
import org.apache.doris.catalog.Env;
import org.apache.doris.catalog.MaterializedIndex;
import org.apache.doris.catalog.OlapTable;
import org.apache.doris.catalog.Partition;
import org.apache.doris.catalog.Replica;
import org.apache.doris.catalog.Tablet;
import org.apache.doris.catalog.TabletInvertedIndex;
import org.apache.doris.catalog.TabletMeta;

import com.google.common.collect.Lists;
import org.json.simple.JSONObject;

import java.util.List;

// This is a util class to diagnose the Doris system
public class Diagnoser {
    // To diagnose a given tablet and return the info and issues about it
    // - tablet exist:
    // - tablet id
    // - database
    // - table
    // - partition
    // - materialized view
    // - replica info: {"replica_id" : "backend id"}
    // - replica num
    // - ReplicaBackendStatus
    // - ReplicaVersionStatus
    // - ReplicaStatus
    // - ReplicaCompactionStatus
    //
    public static List<List<String>> diagnoseTablet(long tabletId) {
        List<List<String>> results = Lists.newArrayList();
        TabletInvertedIndex invertedIndex = Env.getCurrentInvertedIndex();
        TabletMeta tabletMeta = invertedIndex.getTabletMeta(tabletId);
        if (tabletMeta == null) {
            results.add(Lists.newArrayList("TabletExist", "No", ""));
            return results;
        }
        results.add(Lists.newArrayList("TabletExist", "Yes", ""));
        results.add(Lists.newArrayList("TabletId", String.valueOf(tabletId), ""));
        // database
        Database db = Env.getCurrentInternalCatalog().getDbNullable(tabletMeta.getDbId());
        if (db == null) {
            boolean inRecycleBin = Env.getCurrentRecycleBin().isRecycleDatabase(tabletMeta.getDbId());
            results.add(Lists.newArrayList("Database", inRecycleBin ? "In catalog recycle bin" : "Not exist", ""));
            return results;
        }
        results.add(Lists.newArrayList("Database", db.getFullName() + ": " + db.getId(), ""));
        // table
        OlapTable tbl = (OlapTable) db.getTableNullable(tabletMeta.getTableId());
        if (tbl == null) {
            boolean inRecycleBin = Env.getCurrentRecycleBin().isRecycleTable(tabletMeta.getDbId(),
                    tabletMeta.getTableId());
            results.add(Lists.newArrayList("Table", inRecycleBin ? "In catalog recycle bin" : "Not exist", ""));
            return results;
        }
        results.add(Lists.newArrayList("Table", tbl.getName() + ": " + tbl.getId(), ""));
        // partition
        Partition partition = tbl.getPartition(tabletMeta.getPartitionId());
        if (partition == null) {
            boolean inRecycleBin = Env.getCurrentRecycleBin().isRecyclePartition(tabletMeta.getDbId(),
                    tabletMeta.getTableId(), tabletMeta.getPartitionId());
            results.add(Lists.newArrayList("Partition", inRecycleBin ? "In catalog recycle bin" : "Not exist", ""));
            return results;
        }
        results.add(Lists.newArrayList("Partition", partition.getName() + ": " + partition.getId(), ""));
        // materialized index
        MaterializedIndex mIndex = partition.getIndex(tabletMeta.getIndexId());
        if (mIndex == null) {
            results.add(Lists.newArrayList("MaterializedIndex", "Not exist", ""));
            return results;
        }
        results.add(Lists.newArrayList("MaterializedIndex",
                tbl.getIndexNameById(mIndex.getId()) + ": " + mIndex.getId(), ""));
        // replica info
        Tablet tablet = mIndex.getTablet(tabletId);
        List<Replica> replicas = tablet.getReplicas();
        JSONObject jobj = new JSONObject();
        for (Replica replica : replicas) {
            jobj.put(replica.getId(), replica.getBackendIdWithoutException());
        }
        results.add(Lists.newArrayList("Replicas(ReplicaId -> BackendId)", jobj.toJSONString(), ""));
        // replica
        short replicaNum = tbl.getPartitionInfo().getReplicaAllocation(partition.getId()).getTotalReplicaNum();
        if (replicas.size() != replicaNum) {
            results.add(Lists.newArrayList("ReplicasNum", "Replica num is "
                    + replicas.size() + ", expected: " + replicaNum, ""));
        } else {
            results.add(Lists.newArrayList("ReplicasNum", "OK", ""));
        }

        SystemInfoService infoService = Env.getCurrentSystemInfo();
        StringBuilder backendErr = new StringBuilder();
        StringBuilder versionErr = new StringBuilder();
        StringBuilder statusErr = new StringBuilder();
        StringBuilder compactionErr = new StringBuilder();
        long visibleVersion = partition.getVisibleVersion();
        for (Replica replica : replicas) {
            // backend
            do {
                Backend be = infoService.getBackend(replica.getBackendIdWithoutException());
                if (be == null) {
                    backendErr.append("Backend "
                            + replica.getBackendIdWithoutException() + " does not exist. ");
                    break;
                }
                if (!be.isAlive()) {
                    backendErr.append("Backend " + replica.getBackendIdWithoutException() + " is not alive. ");
                    break;
                }
                if (be.isDecommissioned()) {
                    backendErr.append("Backend " + replica.getBackendIdWithoutException() + " is decommission. ");
                    break;
                }
                if (!be.isLoadAvailable()) {
                    backendErr.append("Backend " + replica.getBackendIdWithoutException() + " is not load available. ");
                    break;
                }
                if (!be.isQueryAvailable()) {
                    backendErr.append("Backend "
                            + replica.getBackendIdWithoutException() + " is not query available. ");
                    break;
                }
                if (be.diskExceedLimit()) {
                    backendErr.append("Backend " + replica.getBackendIdWithoutException() + " has no space left. ");
                    break;
                }
            } while (false);
            // version
            if (replica.getVersion() != visibleVersion) {
                versionErr.append("Replica on backend " + replica.getBackendIdWithoutException() + "'s version ("
                        + replica.getVersion() + ") does not equal"
                        + " to partition visible version (" + visibleVersion + ")");
            } else if (replica.getLastFailedVersion() != -1) {
                versionErr.append("Replica on backend "
                        + replica.getBackendIdWithoutException() + "'s last failed version is "
                        + replica.getLastFailedVersion());
            }
            // status
            if (!replica.isAlive() || replica.isUserDrop()) {
                statusErr.append("Replica on backend " + replica.getBackendIdWithoutException()
                        + "'s state is " + replica.getState()
                        + ", and is bad: " + (replica.isBad() ? "Yes" : "No")
                        + ", and is going to drop: " + (replica.isUserDrop() ? "Yes" : "No"));
            }
            if (replica.tooBigVersionCount()) {
                compactionErr.append("Replica on backend " + replica.getBackendIdWithoutException()
                        + "'s version count is too high: "
                        + replica.getVisibleVersionCount());
            }
        }
        results.add(Lists.newArrayList("ReplicaBackendStatus", (backendErr.length() == 0
                ? "OK" : backendErr.toString()), ""));
        results.add(Lists.newArrayList("ReplicaVersionStatus", (versionErr.length() == 0
                ? "OK" : versionErr.toString()), ""));
        results.add(Lists.newArrayList("ReplicaStatus", (statusErr.length() == 0
                ? "OK" : statusErr.toString()), ""));
        results.add(Lists.newArrayList("ReplicaCompactionStatus", (compactionErr.length() == 0
                ? "OK" : compactionErr.toString()), ""));
        return results;
    }
}