MetadataViewer.java

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.

package org.apache.doris.catalog;

import org.apache.doris.analysis.BinaryPredicate.Operator;
import org.apache.doris.analysis.PartitionNames;
import org.apache.doris.analysis.ShowDataSkewStmt;
import org.apache.doris.analysis.ShowReplicaDistributionStmt;
import org.apache.doris.analysis.ShowReplicaStatusStmt;
import org.apache.doris.catalog.MaterializedIndex.IndexExtState;
import org.apache.doris.catalog.Replica.ReplicaStatus;
import org.apache.doris.cloud.catalog.CloudEnv;
import org.apache.doris.common.Config;
import org.apache.doris.common.DdlException;
import org.apache.doris.common.FeConstants;
import org.apache.doris.nereids.trees.expressions.EqualTo;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.plans.commands.ShowReplicaStatusCommand;
import org.apache.doris.resource.Tag;
import org.apache.doris.system.Backend;
import org.apache.doris.system.SystemInfoService;

import com.google.common.collect.Lists;
import com.google.common.collect.Maps;

import java.text.DecimalFormat;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

public class MetadataViewer {

    public static List<List<String>> getTabletStatus(ShowReplicaStatusCommand command) throws DdlException {
        return getTabletStatus(command.getDbName(), command.getTblName(), command.getPartitions(),
            command.getStatusFilter(), command.isEqual());
    }

    public static List<List<String>> getTabletStatus(ShowReplicaStatusStmt stmt) throws DdlException {
        return getTabletStatus(stmt.getDbName(), stmt.getTblName(), stmt.getPartitions(),
                               stmt.getStatusFilter(), stmt.getOp());
    }

    private static List<List<String>> getTabletStatus(String dbName, String tblName, List<String> partitions,
                                                      ReplicaStatus statusFilter, boolean isEqual) throws DdlException {
        List<List<String>> result = Lists.newArrayList();

        Env env = Env.getCurrentEnv();
        SystemInfoService infoService = Env.getCurrentSystemInfo();

        Database db = env.getInternalCatalog().getDbOrDdlException(dbName);
        OlapTable olapTable = db.getOlapTableOrDdlException(tblName);

        olapTable.readLock();
        try {
            if (partitions.isEmpty()) {
                partitions.addAll(olapTable.getPartitionNames());
            } else {
                // check partition
                for (String partName : partitions) {
                    Partition partition = olapTable.getPartition(partName);
                    if (partition == null) {
                        throw new DdlException("Partition does not exist: " + partName);
                    }
                }
            }

            for (String partName : partitions) {
                Partition partition = olapTable.getPartition(partName);
                long visibleVersion = partition.getVisibleVersion();
                short replicationNum = olapTable.getPartitionInfo()
                        .getReplicaAllocation(partition.getId()).getTotalReplicaNum();

                for (MaterializedIndex index : partition.getMaterializedIndices(IndexExtState.VISIBLE)) {
                    int schemaHash = olapTable.getSchemaHashByIndexId(index.getId());
                    for (Tablet tablet : index.getTablets()) {
                        long tabletId = tablet.getId();
                        int count = replicationNum;
                        for (Replica replica : tablet.getReplicas()) {
                            --count;
                            List<String> row = Lists.newArrayList();

                            ReplicaStatus status = ReplicaStatus.OK;
                            long beId = replica.getBackendIdWithoutException();
                            Backend be = infoService.getBackend(beId);
                            if (be == null || !be.isAlive() || replica.isBad()) {
                                status = ReplicaStatus.DEAD;
                            } else if (replica.getVersion() < visibleVersion
                                    || replica.getLastFailedVersion() > 0) {
                                status = ReplicaStatus.VERSION_ERROR;

                            } else if (replica.getSchemaHash() != -1 && replica.getSchemaHash() != schemaHash) {
                                status = ReplicaStatus.SCHEMA_ERROR;
                            } else if (replica.isUserDrop()) {
                                status = ReplicaStatus.DROP;
                            }

                            if (filterReplica(status, statusFilter, isEqual)) {
                                continue;
                            }

                            row.add(String.valueOf(tabletId));
                            row.add(String.valueOf(replica.getId()));
                            row.add(String.valueOf(beId));
                            row.add(String.valueOf(replica.getVersion()));
                            row.add(String.valueOf(replica.getLastFailedVersion()));
                            row.add(String.valueOf(replica.getLastSuccessVersion()));
                            row.add(String.valueOf(visibleVersion));
                            row.add(String.valueOf(replica.getSchemaHash()));
                            row.add(String.valueOf(replica.getTotalVersionCount()));
                            row.add(String.valueOf(replica.isBad()));
                            row.add(String.valueOf(replica.isUserDrop()));
                            row.add(replica.getState().name());
                            row.add(status.name());
                            result.add(row);
                        }

                        if (filterReplica(ReplicaStatus.MISSING, statusFilter, isEqual)) {
                            continue;
                        }

                        // get missing replicas
                        for (int i = 0; i < count; ++i) {
                            List<String> row = Lists.newArrayList();
                            row.add(String.valueOf(tabletId));
                            row.add("-1");
                            row.add("-1");
                            row.add("-1");
                            row.add("-1");
                            row.add("-1");
                            row.add("-1");
                            row.add("-1");
                            row.add(FeConstants.null_string);
                            row.add(FeConstants.null_string);
                            row.add(FeConstants.null_string);
                            row.add(ReplicaStatus.MISSING.name());
                            result.add(row);
                        }
                    }
                }
            }
        } finally {
            olapTable.readUnlock();
        }

        return result;
    }

    private static List<List<String>> getTabletStatus(String dbName, String tblName, List<String> partitions,
            ReplicaStatus statusFilter, Operator op) throws DdlException {
        List<List<String>> result = Lists.newArrayList();

        Env env = Env.getCurrentEnv();
        SystemInfoService infoService = Env.getCurrentSystemInfo();

        Database db = env.getInternalCatalog().getDbOrDdlException(dbName);
        OlapTable olapTable = db.getOlapTableOrDdlException(tblName);

        olapTable.readLock();
        try {
            if (partitions.isEmpty()) {
                partitions.addAll(olapTable.getPartitionNames());
            } else {
                // check partition
                for (String partName : partitions) {
                    Partition partition = olapTable.getPartition(partName);
                    if (partition == null) {
                        throw new DdlException("Partition does not exist: " + partName);
                    }
                }
            }

            for (String partName : partitions) {
                Partition partition = olapTable.getPartition(partName);
                long visibleVersion = partition.getVisibleVersion();
                short replicationNum = olapTable.getPartitionInfo()
                        .getReplicaAllocation(partition.getId()).getTotalReplicaNum();

                for (MaterializedIndex index : partition.getMaterializedIndices(IndexExtState.VISIBLE)) {
                    int schemaHash = olapTable.getSchemaHashByIndexId(index.getId());
                    for (Tablet tablet : index.getTablets()) {
                        long tabletId = tablet.getId();
                        int count = replicationNum;
                        for (Replica replica : tablet.getReplicas()) {
                            --count;
                            List<String> row = Lists.newArrayList();

                            ReplicaStatus status = ReplicaStatus.OK;
                            long beId = replica.getBackendIdWithoutException();
                            Backend be = infoService.getBackend(beId);
                            if (be == null || !be.isAlive() || replica.isBad()) {
                                status = ReplicaStatus.DEAD;
                            } else if (replica.getVersion() < visibleVersion
                                        || replica.getLastFailedVersion() > 0) {
                                status = ReplicaStatus.VERSION_ERROR;

                            } else if (replica.getSchemaHash() != -1 && replica.getSchemaHash() != schemaHash) {
                                status = ReplicaStatus.SCHEMA_ERROR;
                            } else if (replica.isUserDrop()) {
                                status = ReplicaStatus.DROP;
                            }

                            if (filterReplica(status, statusFilter, op)) {
                                continue;
                            }

                            row.add(String.valueOf(tabletId));
                            row.add(String.valueOf(replica.getId()));
                            row.add(String.valueOf(beId));
                            row.add(String.valueOf(replica.getVersion()));
                            row.add(String.valueOf(replica.getLastFailedVersion()));
                            row.add(String.valueOf(replica.getLastSuccessVersion()));
                            row.add(String.valueOf(visibleVersion));
                            row.add(String.valueOf(replica.getSchemaHash()));
                            row.add(String.valueOf(replica.getTotalVersionCount()));
                            row.add(String.valueOf(replica.isBad()));
                            row.add(String.valueOf(replica.isUserDrop()));
                            row.add(replica.getState().name());
                            row.add(status.name());
                            result.add(row);
                        }

                        if (filterReplica(ReplicaStatus.MISSING, statusFilter, op)) {
                            continue;
                        }

                        // get missing replicas
                        for (int i = 0; i < count; ++i) {
                            List<String> row = Lists.newArrayList();
                            row.add(String.valueOf(tabletId));
                            row.add("-1");
                            row.add("-1");
                            row.add("-1");
                            row.add("-1");
                            row.add("-1");
                            row.add("-1");
                            row.add("-1");
                            row.add(FeConstants.null_string);
                            row.add(FeConstants.null_string);
                            row.add(FeConstants.null_string);
                            row.add(ReplicaStatus.MISSING.name());
                            result.add(row);
                        }
                    }
                }
            }
        } finally {
            olapTable.readUnlock();
        }

        return result;
    }

    private static boolean filterReplica(ReplicaStatus status, ReplicaStatus statusFilter, boolean isEqual) {
        if (statusFilter == null) {
            return false;
        }
        if (isEqual) {
            return status != statusFilter;
        } else {
            return status == statusFilter;
        }
    }

    private static boolean filterReplica(ReplicaStatus status, ReplicaStatus statusFilter, Operator op) {
        if (statusFilter == null) {
            return false;
        }
        if (op == Operator.EQ) {
            return status != statusFilter;
        } else {
            return status == statusFilter;
        }
    }

    /**
     * Used for Nereids
     */
    private static boolean filterReplica(ReplicaStatus status, ReplicaStatus statusFilter, Expression op) {
        if (statusFilter == null) {
            return false;
        }
        if (op instanceof EqualTo) {
            return status != statusFilter;
        } else {
            return status == statusFilter;
        }
    }

    public static List<List<String>> getTabletDistribution(ShowReplicaDistributionStmt stmt) throws DdlException {
        return getTabletDistribution(stmt.getDbName(), stmt.getTblName(), stmt.getPartitionNames());
    }

    public static List<List<String>> getTabletDistribution(
            String dbName, String tblName, PartitionNames partitionNames)
            throws DdlException {
        DecimalFormat df = new DecimalFormat("00.00 %");

        List<List<String>> result = Lists.newArrayList();

        Env env = Env.getCurrentEnv();
        SystemInfoService infoService = Env.getCurrentSystemInfo();

        Database db = env.getInternalCatalog().getDbOrDdlException(dbName);
        OlapTable olapTable = db.getOlapTableOrDdlException(tblName);
        olapTable.readLock();
        try {
            List<Long> partitionIds = Lists.newArrayList();
            if (partitionNames == null) {
                for (Partition partition : olapTable.getPartitions()) {
                    partitionIds.add(partition.getId());
                }
            } else {
                // check partition
                for (String partName : partitionNames.getPartitionNames()) {
                    Partition partition = olapTable.getPartition(partName, partitionNames.isTemp());
                    if (partition == null) {
                        throw new DdlException("Partition does not exist: " + partName);
                    }
                    partitionIds.add(partition.getId());
                }
            }

            // backend id -> replica count
            Map<Long, Integer> countMap = Maps.newHashMap();
            // backend id -> replica size
            Map<Long, Long> sizeMap = Maps.newHashMap();
            // init map
            List<Long> beIds = infoService.getAllBackendIds(false);
            for (long beId : beIds) {
                countMap.put(beId, 0);
                sizeMap.put(beId, 0L);
            }

            int totalReplicaNum = 0;
            long totalReplicaSize = 0;
            for (long partId : partitionIds) {
                Partition partition = olapTable.getPartition(partId);
                for (MaterializedIndex index : partition.getMaterializedIndices(IndexExtState.VISIBLE)) {
                    for (Tablet tablet : index.getTablets()) {
                        for (Replica replica : tablet.getReplicas()) {
                            long beId = replica.getBackendIdWithoutException();
                            if (!countMap.containsKey(beId)) {
                                continue;
                            }
                            countMap.put(beId,
                                    countMap.get(beId) + 1);
                            sizeMap.put(beId,
                                    sizeMap.get(beId) + replica.getDataSize());
                            totalReplicaNum++;
                            totalReplicaSize += replica.getDataSize();
                        }
                    }
                }
            }

            // graph
            Collections.sort(beIds);
            for (Long beId : beIds) {
                List<String> row = Lists.newArrayList();
                row.add(String.valueOf(beId));
                row.add(String.valueOf(countMap.get(beId)));
                row.add(String.valueOf(sizeMap.get(beId)));
                row.add(graph(countMap.get(beId), totalReplicaNum));
                row.add(totalReplicaNum == countMap.get(beId) ? (totalReplicaNum == 0 ? "0.00%" : "100.00%")
                        : df.format((double) countMap.get(beId) / totalReplicaNum));
                row.add(graph(sizeMap.get(beId), totalReplicaSize));
                row.add(totalReplicaSize == sizeMap.get(beId) ? (totalReplicaSize == 0 ? "0.00%" : "100.00%")
                        : df.format((double) sizeMap.get(beId) / totalReplicaSize));
                if (Config.isNotCloudMode()) {
                    row.add("");
                    row.add("");
                } else {
                    Backend be = CloudEnv.getCurrentSystemInfo().getBackend(beId);
                    if (be != null) {
                        row.add(be.getTagMap().get(Tag.CLOUD_CLUSTER_NAME));
                        row.add(be.getTagMap().get(Tag.CLOUD_CLUSTER_ID));
                    } else {
                        row.add("not exist be");
                        row.add("not exist be");
                    }
                }
                result.add(row);
            }

        } finally {
            olapTable.readUnlock();
        }

        return result;
    }

    private static String graph(long num, long totalNum) {
        StringBuilder sb = new StringBuilder();
        long normalized = num == totalNum ? (totalNum == 0L ? 0 : 100) : (int) Math.ceil(num * 100 / totalNum);
        for (int i = 0; i < normalized; ++i) {
            sb.append(">");
        }
        return sb.toString();
    }

    public static List<List<String>> getDataSkew(ShowDataSkewStmt stmt) throws DdlException {
        return getDataSkew(stmt.getDbName(), stmt.getTblName(), stmt.getPartitionNames());
    }

    public static List<List<String>> getDataSkew(String dbName, String tblName, PartitionNames partitionNames)
            throws DdlException {
        DecimalFormat df = new DecimalFormat("00.00 %");

        List<List<String>> result = Lists.newArrayList();
        Env env = Env.getCurrentEnv();

        Database db = env.getInternalCatalog().getDbOrDdlException(dbName);
        OlapTable olapTable = db.getOlapTableOrDdlException(tblName);

        if (olapTable.getPartitionNames().isEmpty()) {
            throw new DdlException("Can not find any partition from " + dbName + "." + tblName);
        }

        // patition -> isTmep
        Map<String, Boolean> allPartionNames = new HashMap<>();
        if (partitionNames == null) {
            for (Partition p : olapTable.getPartitions()) {
                allPartionNames.put(p.getName(), false);
            }
            for (Partition p : olapTable.getAllTempPartitions()) {
                allPartionNames.put(p.getName(), true);
            }
        } else {
            for (String name : partitionNames.getPartitionNames()) {
                allPartionNames.put(name, partitionNames.isTemp());
            }
        }

        olapTable.readLock();
        try {
            Partition partition = null;
            // check partition
            for (Map.Entry<String, Boolean> partName : allPartionNames.entrySet()) {
                partition = olapTable.getPartition(partName.getKey(), partName.getValue());
                if (partition == null) {
                    throw new DdlException("Partition does not exist: " + partName);
                }
                DistributionInfo distributionInfo = partition.getDistributionInfo();
                List<Long> rowCountTabletInfos = Lists.newArrayListWithCapacity(distributionInfo.getBucketNum());
                List<Long> dataSizeTabletInfos = Lists.newArrayListWithCapacity(distributionInfo.getBucketNum());
                for (long i = 0; i < distributionInfo.getBucketNum(); i++) {
                    rowCountTabletInfos.add(0L);
                    dataSizeTabletInfos.add(0L);
                }

                long totalSize = 0;
                for (MaterializedIndex mIndex : partition.getMaterializedIndices(IndexExtState.VISIBLE)) {
                    List<Long> tabletIds = mIndex.getTabletIdsInOrder();
                    for (int i = 0; i < tabletIds.size(); i++) {
                        Tablet tablet = mIndex.getTablet(tabletIds.get(i));
                        long rowCount = tablet.getRowCount(true);
                        long dataSize = tablet.getDataSize(true);
                        rowCountTabletInfos.set(i, rowCountTabletInfos.get(i) + rowCount);
                        dataSizeTabletInfos.set(i, dataSizeTabletInfos.get(i) + dataSize);
                        totalSize += dataSize;
                    }
                }

                // graph
                for (int i = 0; i < distributionInfo.getBucketNum(); i++) {
                    List<String> row = Lists.newArrayList();
                    row.add(partName.getKey());
                    row.add(String.valueOf(i));
                    row.add(rowCountTabletInfos.get(i).toString());
                    row.add(dataSizeTabletInfos.get(i).toString());
                    row.add(graph(dataSizeTabletInfos.get(i), totalSize));
                    row.add(totalSize == dataSizeTabletInfos.get(i) ? (totalSize == 0L ? "0.00%" : "100.00%") :
                            df.format((double) dataSizeTabletInfos.get(i) / totalSize));
                    result.add(row);
                }
            }
        } finally {
            olapTable.readUnlock();
        }

        return result;
    }
}