Histogram.java

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.

package org.apache.doris.statistics;

import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.PrimitiveType;
import org.apache.doris.catalog.Type;
import org.apache.doris.statistics.util.StatisticsUtil;

import com.google.common.base.Strings;
import com.google.common.collect.Lists;
import com.google.gson.JsonArray;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import org.apache.commons.collections.CollectionUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

import java.util.Collections;
import java.util.List;

public class Histogram {
    private static final Logger LOG = LogManager.getLogger(Histogram.class);

    public final Type dataType;

    public final double sampleRate;

    public final List<Bucket> buckets;

    public final int numBuckets;

    public Histogram(Type dataType, double sampleRate, int numBuckets, List<Bucket> buckets) {
        this.dataType = dataType;
        this.sampleRate = sampleRate;
        this.numBuckets = numBuckets;
        this.buckets = buckets;
    }

    public static Histogram UNKNOWN = new HistogramBuilder().setDataType(Type.NULL)
            .setSampleRate(0).setNumBuckets(0).setBuckets(Collections.emptyList())
            .build();

    // TODO: use thrift
    public static Histogram fromResultRow(ResultRow resultRow) {
        try {
            HistogramBuilder histogramBuilder = new HistogramBuilder();
            HistData histData = new HistData(resultRow);
            long catalogId = histData.statsId.catalogId;
            long idxId = histData.statsId.idxId;
            long dbId = histData.statsId.dbId;
            long tblId = histData.statsId.tblId;
            String colName = histData.statsId.colId;
            Column col = StatisticsUtil.findColumn(catalogId, dbId, tblId, idxId, colName);
            if (col == null) {
                LOG.warn("Failed to deserialize histogram statistics, ctlId: {} dbId: {}"
                        + "tblId: {} column: {} not exists", catalogId, dbId, tblId, colName);
                return null;
            }

            Type dataType = col.getType();
            histogramBuilder.setDataType(dataType);

            double sampleRate = histData.sampleRate;
            histogramBuilder.setSampleRate(sampleRate);

            String json = histData.buckets;
            JsonObject jsonObj = JsonParser.parseString(json).getAsJsonObject();

            int bucketNum = jsonObj.get("num_buckets").getAsInt();
            histogramBuilder.setNumBuckets(bucketNum);

            List<Bucket> buckets = Lists.newArrayList();
            JsonArray jsonArray = jsonObj.getAsJsonArray("buckets");
            for (JsonElement element : jsonArray) {
                String bucketJson = element.toString();
                buckets.add(Bucket.deserializeFromJson(dataType, bucketJson));
            }
            histogramBuilder.setBuckets(buckets);

            return histogramBuilder.build();
        } catch (Exception e) {
            LOG.warn("Failed to deserialize histogram statistics.", e);
            return null;
        }
    }

    /**
     * Histogram info is stored in an internal table in json format,
     * and Histogram obj can be obtained by this method.
     */
    public static Histogram deserializeFromJson(String json) {
        if (Strings.isNullOrEmpty(json)) {
            return Histogram.UNKNOWN;
        }

        try {
            HistogramBuilder histogramBuilder = new HistogramBuilder();

            JsonObject histogramJson = JsonParser.parseString(json).getAsJsonObject();
            String typeStr = histogramJson.get("data_type").getAsString();
            Type dataType = Type.fromPrimitiveType(PrimitiveType.valueOf(typeStr));
            histogramBuilder.setDataType(dataType);

            float sampleRate = histogramJson.get("sample_rate").getAsFloat();
            histogramBuilder.setSampleRate(sampleRate);

            int bucketSize = histogramJson.get("num_buckets").getAsInt();
            histogramBuilder.setNumBuckets(bucketSize);

            JsonArray jsonArray = histogramJson.getAsJsonArray("buckets");
            List<Bucket> buckets = Lists.newArrayList();

            for (JsonElement element : jsonArray) {
                String bucketJsonStr = element.toString();
                buckets.add(Bucket.deserializeFromJson(dataType, bucketJsonStr));
            }
            histogramBuilder.setBuckets(buckets);

            return histogramBuilder.build();
        } catch (Throwable e) {
            LOG.error("deserialize from json error.", e);
        }

        return Histogram.UNKNOWN;
    }

    /**
     * Convert to json format string
     */
    public static String serializeToJson(Histogram histogram) {
        if (histogram == null) {
            return "";
        }

        JsonObject histogramJson = new JsonObject();

        histogramJson.addProperty("data_type", histogram.dataType.toString());
        histogramJson.addProperty("sample_rate", histogram.sampleRate);
        histogramJson.addProperty("num_buckets", histogram.buckets.size());

        JsonArray bucketsJson = getBucketsJson(histogram.buckets);
        histogramJson.add("buckets", bucketsJson);

        return histogramJson.toString();
    }

    public static JsonArray getBucketsJson(List<Bucket> buckets) {
        if (buckets == null) {
            return null;
        }
        JsonArray bucketsJsonArray = new JsonArray();
        buckets.stream().map(Bucket::serializeToJsonObj).forEach(bucketsJsonArray::add);
        return bucketsJsonArray;
    }

    public double size() {
        if (CollectionUtils.isEmpty(buckets)) {
            return 0;
        }
        Bucket lastBucket = buckets.get(buckets.size() - 1);
        return lastBucket.preSum + lastBucket.count;
    }

    @Override
    public String toString() {
        return serializeToJson(this);
    }
}