/root/doris/be/src/util/histogram.cpp
| Line | Count | Source (jump to first uncovered line) | 
| 1 |  | // Licensed to the Apache Software Foundation (ASF) under one | 
| 2 |  | // or more contributor license agreements.  See the NOTICE file | 
| 3 |  | // distributed with this work for additional information | 
| 4 |  | // regarding copyright ownership.  The ASF licenses this file | 
| 5 |  | // to you under the Apache License, Version 2.0 (the | 
| 6 |  | // "License"); you may not use this file except in compliance | 
| 7 |  | // with the License.  You may obtain a copy of the License at | 
| 8 |  | // | 
| 9 |  | //   http://www.apache.org/licenses/LICENSE-2.0 | 
| 10 |  | // | 
| 11 |  | // Unless required by applicable law or agreed to in writing, | 
| 12 |  | // software distributed under the License is distributed on an | 
| 13 |  | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | 
| 14 |  | // KIND, either express or implied.  See the License for the | 
| 15 |  | // specific language governing permissions and limitations | 
| 16 |  | // under the License. | 
| 17 |  |  | 
| 18 |  | #include "util/histogram.h" | 
| 19 |  |  | 
| 20 |  | #include <stdio.h> | 
| 21 |  |  | 
| 22 |  | #include <algorithm> | 
| 23 |  | #include <cinttypes> | 
| 24 |  | #include <cmath> | 
| 25 |  | #include <limits> | 
| 26 |  | #include <utility> | 
| 27 |  |  | 
| 28 |  | namespace doris { | 
| 29 |  |  | 
| 30 | 2 | HistogramBucketMapper::HistogramBucketMapper() { | 
| 31 |  |     // If you change this, you also need to change | 
| 32 |  |     // size of array buckets_ in HistogramStat | 
| 33 | 2 |     _bucket_values = {1, 2}; | 
| 34 | 2 |     _value_index_map = {{1, 0}, {2, 1}}; | 
| 35 | 2 |     double bucket_val = static_cast<double>(_bucket_values.back()); | 
| 36 | 216 |     while ((bucket_val = 1.5 * bucket_val) <=   Branch (36:12): [True: 214, False: 2]
 | 
| 37 | 216 |            static_cast<double>(std::numeric_limits<uint64_t>::max())) { | 
| 38 | 214 |         _bucket_values.push_back(static_cast<uint64_t>(bucket_val)); | 
| 39 |  |         // Extracts two most significant digits to make histogram buckets more | 
| 40 |  |         // human-readable. E.g., 172 becomes 170. | 
| 41 | 214 |         uint64_t pow_of_ten = 1; | 
| 42 | 1.99k |         while (_bucket_values.back() / 10 > 10) {  Branch (42:16): [True: 1.77k, False: 214]
 | 
| 43 | 1.77k |             _bucket_values.back() /= 10; | 
| 44 | 1.77k |             pow_of_ten *= 10; | 
| 45 | 1.77k |         } | 
| 46 | 214 |         _bucket_values.back() *= pow_of_ten; | 
| 47 | 214 |         _value_index_map[_bucket_values.back()] = _bucket_values.size() - 1; | 
| 48 | 214 |     } | 
| 49 | 2 |     _max_bucket_value = _bucket_values.back(); | 
| 50 | 2 |     _min_bucket_value = _bucket_values.front(); | 
| 51 | 2 | } | 
| 52 |  |  | 
| 53 | 1.65k | size_t HistogramBucketMapper::index_for_value(const uint64_t& value) const { | 
| 54 | 1.65k |     if (value >= _max_bucket_value) {  Branch (54:9): [True: 0, False: 1.65k]
 | 
| 55 | 0 |         return _bucket_values.size() - 1; | 
| 56 | 1.65k |     } else if (value >= _min_bucket_value) {  Branch (56:16): [True: 1.65k, False: 0]
 | 
| 57 | 1.65k |         std::map<uint64_t, uint64_t>::const_iterator lowerBound = | 
| 58 | 1.65k |                 _value_index_map.lower_bound(value); | 
| 59 | 1.65k |         if (lowerBound != _value_index_map.end()) {  Branch (59:13): [True: 1.65k, False: 0]
 | 
| 60 | 1.65k |             return static_cast<size_t>(lowerBound->second); | 
| 61 | 1.65k |         } else { | 
| 62 | 0 |             return 0; | 
| 63 | 0 |         } | 
| 64 | 1.65k |     } else { | 
| 65 | 0 |         return 0; | 
| 66 | 0 |     } | 
| 67 | 1.65k | } | 
| 68 |  |  | 
| 69 |  | namespace { | 
| 70 |  | const HistogramBucketMapper bucket_mapper; | 
| 71 |  | } | 
| 72 |  |  | 
| 73 | 8 | HistogramStat::HistogramStat() : _num_buckets(bucket_mapper.bucket_count()) { | 
| 74 | 8 |     DCHECK(_num_buckets == sizeof(_buckets) / sizeof(*_buckets)); | 
| 75 | 8 |     clear(); | 
| 76 | 8 | } | 
| 77 |  |  | 
| 78 | 9 | void HistogramStat::clear() { | 
| 79 | 9 |     _min.store(bucket_mapper.last_value(), std::memory_order_relaxed); | 
| 80 | 9 |     _max.store(0, std::memory_order_relaxed); | 
| 81 | 9 |     _num.store(0, std::memory_order_relaxed); | 
| 82 | 9 |     _sum.store(0, std::memory_order_relaxed); | 
| 83 | 9 |     _sum_squares.store(0, std::memory_order_relaxed); | 
| 84 | 990 |     for (unsigned int b = 0; b < _num_buckets; b++) {  Branch (84:30): [True: 981, False: 9]
 | 
| 85 | 981 |         _buckets[b].store(0, std::memory_order_relaxed); | 
| 86 | 981 |     } | 
| 87 | 9 | }; | 
| 88 |  |  | 
| 89 | 2 | bool HistogramStat::is_empty() const { | 
| 90 | 2 |     return num() == 0; | 
| 91 | 2 | } | 
| 92 |  |  | 
| 93 | 1.65k | void HistogramStat::add(const uint64_t& value) { | 
| 94 |  |     // This function is designed to be lock free, as it's in the critical path | 
| 95 |  |     // of any operation. Each individual value is atomic and the order of updates | 
| 96 |  |     // by concurrent threads is tolerable. | 
| 97 | 1.65k |     const size_t index = bucket_mapper.index_for_value(value); | 
| 98 | 1.65k |     DCHECK(index < _num_buckets); | 
| 99 | 1.65k |     _buckets[index].store(_buckets[index].load(std::memory_order_relaxed) + 1, | 
| 100 | 1.65k |                           std::memory_order_relaxed); | 
| 101 |  |  | 
| 102 | 1.65k |     uint64_t old_min = min(); | 
| 103 | 1.65k |     if (value < old_min) {  Branch (103:9): [True: 6, False: 1.64k]
 | 
| 104 | 6 |         _min.store(value, std::memory_order_relaxed); | 
| 105 | 6 |     } | 
| 106 |  |  | 
| 107 | 1.65k |     uint64_t old_max = max(); | 
| 108 | 1.65k |     if (value > old_max) {  Branch (108:9): [True: 660, False: 990]
 | 
| 109 | 660 |         _max.store(value, std::memory_order_relaxed); | 
| 110 | 660 |     } | 
| 111 |  |  | 
| 112 | 1.65k |     _num.store(_num.load(std::memory_order_relaxed) + 1, std::memory_order_relaxed); | 
| 113 | 1.65k |     _sum.store(_sum.load(std::memory_order_relaxed) + value, std::memory_order_relaxed); | 
| 114 | 1.65k |     _sum_squares.store(_sum_squares.load(std::memory_order_relaxed) + value * value, | 
| 115 | 1.65k |                        std::memory_order_relaxed); | 
| 116 | 1.65k | } | 
| 117 |  |  | 
| 118 | 1 | void HistogramStat::merge(const HistogramStat& other) { | 
| 119 |  |     // This function needs to be performed with the outer lock acquired | 
| 120 |  |     // However, atomic operation on every member is still need, since Add() | 
| 121 |  |     // requires no lock and value update can still happen concurrently | 
| 122 | 1 |     uint64_t old_min = min(); | 
| 123 | 1 |     uint64_t other_min = other.min(); | 
| 124 | 1 |     while (other_min < old_min && !_min.compare_exchange_weak(old_min, other_min)) {  Branch (124:12): [True: 0, False: 1]
  Branch (124:35): [True: 0, False: 0]
 | 
| 125 | 0 |     } | 
| 126 |  |  | 
| 127 | 1 |     uint64_t old_max = max(); | 
| 128 | 1 |     uint64_t other_max = other.max(); | 
| 129 | 1 |     while (other_max > old_max && !_max.compare_exchange_weak(old_max, other_max)) {  Branch (129:12): [True: 1, False: 0]
  Branch (129:35): [True: 0, False: 1]
 | 
| 130 | 0 |     } | 
| 131 |  |  | 
| 132 | 1 |     _num.fetch_add(other.num(), std::memory_order_relaxed); | 
| 133 | 1 |     _sum.fetch_add(other.sum(), std::memory_order_relaxed); | 
| 134 | 1 |     _sum_squares.fetch_add(other.sum_squares(), std::memory_order_relaxed); | 
| 135 | 110 |     for (unsigned int b = 0; b < _num_buckets; b++) {  Branch (135:30): [True: 109, False: 1]
 | 
| 136 | 109 |         _buckets[b].fetch_add(other.bucket_at(b), std::memory_order_relaxed); | 
| 137 | 109 |     } | 
| 138 | 1 | } | 
| 139 |  |  | 
| 140 | 11 | double HistogramStat::median() const { | 
| 141 | 11 |     return percentile(50.0); | 
| 142 | 11 | } | 
| 143 |  |  | 
| 144 | 54 | double HistogramStat::percentile(double p) const { | 
| 145 | 54 |     double threshold = num() * (p / 100.0); | 
| 146 | 54 |     uint64_t cumulative_sum = 0; | 
| 147 | 392 |     for (unsigned int b = 0; b < _num_buckets; b++) {  Branch (147:30): [True: 392, False: 0]
 | 
| 148 | 392 |         uint64_t bucket_value = bucket_at(b); | 
| 149 | 392 |         cumulative_sum += bucket_value; | 
| 150 | 392 |         if (cumulative_sum >= threshold) {  Branch (150:13): [True: 54, False: 338]
 | 
| 151 |  |             // Scale linearly within this bucket | 
| 152 | 54 |             uint64_t left_point = (b == 0) ? 0 : bucket_mapper.bucket_limit(b - 1);   Branch (152:35): [True: 22, False: 32]
 | 
| 153 | 54 |             uint64_t right_point = bucket_mapper.bucket_limit(b); | 
| 154 | 54 |             uint64_t left_sum = cumulative_sum - bucket_value; | 
| 155 | 54 |             uint64_t right_sum = cumulative_sum; | 
| 156 | 54 |             double pos = 0; | 
| 157 | 54 |             uint64_t right_left_diff = right_sum - left_sum; | 
| 158 | 54 |             if (right_left_diff != 0) {  Branch (158:17): [True: 32, False: 22]
 | 
| 159 | 32 |                 pos = (threshold - left_sum) / right_left_diff; | 
| 160 | 32 |             } | 
| 161 | 54 |             double r = left_point + (right_point - left_point) * pos; | 
| 162 | 54 |             uint64_t cur_min = min(); | 
| 163 | 54 |             uint64_t cur_max = max(); | 
| 164 | 54 |             if (r < cur_min) r = static_cast<double>(cur_min);   Branch (164:17): [True: 22, False: 32]
 | 
| 165 | 54 |             if (r > cur_max) r = static_cast<double>(cur_max);   Branch (165:17): [True: 30, False: 24]
 | 
| 166 | 54 |             return r; | 
| 167 | 54 |         } | 
| 168 | 392 |     } | 
| 169 | 0 |     return static_cast<double>(max()); | 
| 170 | 54 | } | 
| 171 |  |  | 
| 172 | 11 | double HistogramStat::average() const { | 
| 173 | 11 |     uint64_t cur_num = num(); | 
| 174 | 11 |     uint64_t cur_sum = sum(); | 
| 175 | 11 |     if (cur_num == 0) return 0;   Branch (175:9): [True: 5, False: 6]
 | 
| 176 | 6 |     return static_cast<double>(cur_sum) / static_cast<double>(cur_num); | 
| 177 | 11 | } | 
| 178 |  |  | 
| 179 | 8 | double HistogramStat::standard_deviation() const { | 
| 180 | 8 |     uint64_t cur_num = num(); | 
| 181 | 8 |     uint64_t cur_sum = sum(); | 
| 182 | 8 |     uint64_t cur_sum_squares = sum_squares(); | 
| 183 | 8 |     if (cur_num == 0) return 0;   Branch (183:9): [True: 4, False: 4]
 | 
| 184 | 4 |     double variance = static_cast<double>(cur_sum_squares * cur_num - cur_sum * cur_sum) / | 
| 185 | 4 |                       static_cast<double>(cur_num * cur_num); | 
| 186 | 4 |     return std::sqrt(variance); | 
| 187 | 8 | } | 
| 188 | 0 | std::string HistogramStat::to_string() const { | 
| 189 | 0 |     uint64_t cur_num = num(); | 
| 190 | 0 |     std::string r; | 
| 191 | 0 |     char buf[1650]; | 
| 192 | 0 |     snprintf(buf, sizeof(buf), "Count: %" PRIu64 " Average: %.4f  StdDev: %.2f\n", cur_num, | 
| 193 | 0 |              average(), standard_deviation()); | 
| 194 | 0 |     r.append(buf); | 
| 195 | 0 |     snprintf(buf, sizeof(buf), "Min: %" PRIu64 "  Median: %.4f  Max: %" PRIu64 "\n", | 
| 196 | 0 |              (cur_num == 0 ? 0 : min()), median(), (cur_num == 0 ? 0 : max()));   Branch (196:15): [True: 0, False: 0]
  Branch (196:53): [True: 0, False: 0]
 | 
| 197 | 0 |     r.append(buf); | 
| 198 | 0 |     snprintf(buf, sizeof(buf), | 
| 199 | 0 |              "Percentiles: " | 
| 200 | 0 |              "P50: %.2f P75: %.2f P99: %.2f P99.9: %.2f P99.99: %.2f\n", | 
| 201 | 0 |              percentile(50), percentile(75), percentile(99), percentile(99.9), percentile(99.99)); | 
| 202 | 0 |     r.append(buf); | 
| 203 | 0 |     r.append("------------------------------------------------------\n"); | 
| 204 | 0 |     if (cur_num == 0) return r; // all buckets are empty   Branch (204:9): [True: 0, False: 0]
 | 
| 205 | 0 |     const double mult = 100.0 / cur_num; | 
| 206 | 0 |     uint64_t cumulative_sum = 0; | 
| 207 | 0 |     for (unsigned int b = 0; b < _num_buckets; b++) {  Branch (207:30): [True: 0, False: 0]
 | 
| 208 | 0 |         uint64_t bucket_value = bucket_at(b); | 
| 209 | 0 |         if (bucket_value <= 0.0) continue;   Branch (209:13): [True: 0, False: 0]
 | 
| 210 | 0 |         cumulative_sum += bucket_value; | 
| 211 | 0 |         snprintf(buf, sizeof(buf), "%c %7" PRIu64 ", %7" PRIu64 " ] %8" PRIu64 " %7.3f%% %7.3f%% ", | 
| 212 | 0 |                  (b == 0) ? '[' : '(',  Branch (212:18): [True: 0, False: 0]
 | 
| 213 | 0 |                  (b == 0) ? 0 : bucket_mapper.bucket_limit(b - 1), // left   Branch (213:18): [True: 0, False: 0]
 | 
| 214 | 0 |                  bucket_mapper.bucket_limit(b),                    // right | 
| 215 | 0 |                  bucket_value,                                     // count | 
| 216 | 0 |                  (mult * bucket_value),                            // percentage | 
| 217 | 0 |                  (mult * cumulative_sum));                         // cumulative percentage | 
| 218 | 0 |         r.append(buf); | 
| 219 |  |  | 
| 220 |  |         // Add hash marks based on percentage; 20 marks for 100%. | 
| 221 | 0 |         size_t marks = static_cast<size_t>(mult * bucket_value / 5 + 0.5); | 
| 222 | 0 |         r.append(marks, '#'); | 
| 223 | 0 |         r.push_back('\n'); | 
| 224 | 0 |     } | 
| 225 | 0 |     return r; | 
| 226 | 0 | } | 
| 227 |  |  | 
| 228 |  | } // namespace doris |