/root/doris/be/src/util/histogram.h
| Line | Count | Source | 
| 1 |  | // Licensed to the Apache Software Foundation (ASF) under one | 
| 2 |  | // or more contributor license agreements.  See the NOTICE file | 
| 3 |  | // distributed with this work for additional information | 
| 4 |  | // regarding copyright ownership.  The ASF licenses this file | 
| 5 |  | // to you under the Apache License, Version 2.0 (the | 
| 6 |  | // "License"); you may not use this file except in compliance | 
| 7 |  | // with the License.  You may obtain a copy of the License at | 
| 8 |  | // | 
| 9 |  | //   http://www.apache.org/licenses/LICENSE-2.0 | 
| 10 |  | // | 
| 11 |  | // Unless required by applicable law or agreed to in writing, | 
| 12 |  | // software distributed under the License is distributed on an | 
| 13 |  | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | 
| 14 |  | // KIND, either express or implied.  See the License for the | 
| 15 |  | // specific language governing permissions and limitations | 
| 16 |  | // under the License. | 
| 17 |  |  | 
| 18 |  | #pragma once | 
| 19 |  |  | 
| 20 |  | #include <stddef.h> | 
| 21 |  | #include <stdint.h> | 
| 22 |  |  | 
| 23 |  | #include <atomic> | 
| 24 |  | #include <map> | 
| 25 |  | #include <string> | 
| 26 |  | #include <vector> | 
| 27 |  |  | 
| 28 |  | #include "common/logging.h" | 
| 29 |  |  | 
| 30 |  | namespace doris { | 
| 31 |  |  | 
| 32 |  | // Histogram data structure implementation: | 
| 33 |  | // | 
| 34 |  | // After construction, the 'value_index_map' will be set to: | 
| 35 |  | // | 
| 36 |  | // BucketValue: |   1   |   2   | 2*1.5 |2*1.5^2|2*1.5^3|  ...  |2*1.5^n|  ...  |UINT64MAX| | 
| 37 |  | // Index:       |   0   |   1   |   2   |   3   |   4   |  ...  |  n-1  |  ...  |   108   | | 
| 38 |  | // | 
| 39 |  | // The width of bucket is growing by 1.5 times and its initial values are 1 and 2. | 
| 40 |  | // The input value will be add to the bucket which lower bound is just greater than | 
| 41 |  | // input value. For example, input value A > 2*1.5^n and A <= 2*1.5^(n+1), A will be added | 
| 42 |  | // to the latter bucket. | 
| 43 |  | class HistogramBucketMapper { | 
| 44 |  | public: | 
| 45 |  |     HistogramBucketMapper(); | 
| 46 |  |  | 
| 47 |  |     // converts a value to the bucket index. | 
| 48 |  |     size_t index_for_value(const uint64_t& value) const; | 
| 49 |  |     // number of buckets required. | 
| 50 |  |  | 
| 51 | 246 |     size_t bucket_count() const { return _bucket_values.size(); } | 
| 52 |  |  | 
| 53 | 295 |     uint64_t last_value() const { return _max_bucket_value; } | 
| 54 |  |  | 
| 55 | 0 |     uint64_t first_value() const { return _min_bucket_value; } | 
| 56 |  |  | 
| 57 | 92 |     uint64_t bucket_limit(const size_t bucket_number) const { | 
| 58 | 92 |         DCHECK(bucket_number < bucket_count()); | 
| 59 | 92 |         return _bucket_values[bucket_number]; | 
| 60 | 92 |     } | 
| 61 |  |  | 
| 62 |  | private: | 
| 63 |  |     std::vector<uint64_t> _bucket_values; | 
| 64 |  |     uint64_t _max_bucket_value; | 
| 65 |  |     uint64_t _min_bucket_value; | 
| 66 |  |     std::map<uint64_t, uint64_t> _value_index_map; | 
| 67 |  | }; | 
| 68 |  |  | 
| 69 |  | struct HistogramStat { | 
| 70 |  |     HistogramStat(); | 
| 71 | 150 |     ~HistogramStat() {} | 
| 72 |  |  | 
| 73 |  |     HistogramStat(const HistogramStat&) = delete; | 
| 74 |  |     HistogramStat& operator=(const HistogramStat&) = delete; | 
| 75 |  |  | 
| 76 |  |     void clear(); | 
| 77 |  |     bool is_empty() const; | 
| 78 |  |     void add(const uint64_t& value); | 
| 79 |  |     void merge(const HistogramStat& other); | 
| 80 |  |  | 
| 81 | 1.46M |     uint64_t min() const { return _min.load(std::memory_order_relaxed); } | 
| 82 | 1.46M |     uint64_t max() const { return _max.load(std::memory_order_relaxed); } | 
| 83 | 233 |     uint64_t num() const { return _num.load(std::memory_order_relaxed); } | 
| 84 | 169 |     uint64_t sum() const { return _sum.load(std::memory_order_relaxed); } | 
| 85 | 149 |     uint64_t sum_squares() const { return _sum_squares.load(std::memory_order_relaxed); } | 
| 86 | 15.6k |     uint64_t bucket_at(size_t b) const { return _buckets[b].load(std::memory_order_relaxed); } | 
| 87 |  |  | 
| 88 |  |     double median() const; | 
| 89 |  |     double percentile(double p) const; | 
| 90 |  |     double average() const; | 
| 91 |  |     double standard_deviation() const; | 
| 92 |  |     std::string to_string() const; | 
| 93 |  |  | 
| 94 |  |     // To be able to use HistogramStat as thread local variable, it | 
| 95 |  |     // cannot have dynamic allocated member. That's why we're | 
| 96 |  |     // using manually values from BucketMapper | 
| 97 |  |     std::atomic<uint64_t> _min; | 
| 98 |  |     std::atomic<uint64_t> _max; | 
| 99 |  |     std::atomic<uint64_t> _num; | 
| 100 |  |     std::atomic<uint64_t> _sum; | 
| 101 |  |     std::atomic<uint64_t> _sum_squares; | 
| 102 |  |     std::atomic<uint64_t> _buckets[109]; // 109==BucketMapper::bucket_count() | 
| 103 |  |     const uint64_t _num_buckets; | 
| 104 |  | }; | 
| 105 |  |  | 
| 106 |  | } // namespace doris |