/root/doris/be/src/util/histogram.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include "util/histogram.h" |
19 | | |
20 | | #include <stdio.h> |
21 | | |
22 | | #include <algorithm> |
23 | | #include <cinttypes> |
24 | | #include <cmath> |
25 | | #include <limits> |
26 | | #include <utility> |
27 | | |
28 | | namespace doris { |
29 | | |
30 | 2 | HistogramBucketMapper::HistogramBucketMapper() { |
31 | | // If you change this, you also need to change |
32 | | // size of array buckets_ in HistogramStat |
33 | 2 | _bucket_values = {1, 2}; |
34 | 2 | _value_index_map = {{1, 0}, {2, 1}}; |
35 | 2 | double bucket_val = static_cast<double>(_bucket_values.back()); |
36 | 216 | while ((bucket_val = 1.5 * bucket_val) <= |
37 | 216 | static_cast<double>(std::numeric_limits<uint64_t>::max())) { |
38 | 214 | _bucket_values.push_back(static_cast<uint64_t>(bucket_val)); |
39 | | // Extracts two most significant digits to make histogram buckets more |
40 | | // human-readable. E.g., 172 becomes 170. |
41 | 214 | uint64_t pow_of_ten = 1; |
42 | 1.99k | while (_bucket_values.back() / 10 > 10) { |
43 | 1.77k | _bucket_values.back() /= 10; |
44 | 1.77k | pow_of_ten *= 10; |
45 | 1.77k | } |
46 | 214 | _bucket_values.back() *= pow_of_ten; |
47 | 214 | _value_index_map[_bucket_values.back()] = _bucket_values.size() - 1; |
48 | 214 | } |
49 | 2 | _max_bucket_value = _bucket_values.back(); |
50 | 2 | _min_bucket_value = _bucket_values.front(); |
51 | 2 | } |
52 | | |
53 | 1.65k | size_t HistogramBucketMapper::index_for_value(const uint64_t& value) const { |
54 | 1.65k | if (value >= _max_bucket_value) { |
55 | 0 | return _bucket_values.size() - 1; |
56 | 1.65k | } else if (value >= _min_bucket_value) { |
57 | 1.65k | std::map<uint64_t, uint64_t>::const_iterator lowerBound = |
58 | 1.65k | _value_index_map.lower_bound(value); |
59 | 1.65k | if (lowerBound != _value_index_map.end()) { |
60 | 1.65k | return static_cast<size_t>(lowerBound->second); |
61 | 1.65k | } else { |
62 | 0 | return 0; |
63 | 0 | } |
64 | 1.65k | } else { |
65 | 0 | return 0; |
66 | 0 | } |
67 | 1.65k | } |
68 | | |
69 | | namespace { |
70 | | const HistogramBucketMapper bucket_mapper; |
71 | | } |
72 | | |
73 | 8 | HistogramStat::HistogramStat() : _num_buckets(bucket_mapper.bucket_count()) { |
74 | 8 | DCHECK(_num_buckets == sizeof(_buckets) / sizeof(*_buckets)); |
75 | 8 | clear(); |
76 | 8 | } |
77 | | |
78 | 9 | void HistogramStat::clear() { |
79 | 9 | _min.store(bucket_mapper.last_value(), std::memory_order_relaxed); |
80 | 9 | _max.store(0, std::memory_order_relaxed); |
81 | 9 | _num.store(0, std::memory_order_relaxed); |
82 | 9 | _sum.store(0, std::memory_order_relaxed); |
83 | 9 | _sum_squares.store(0, std::memory_order_relaxed); |
84 | 990 | for (unsigned int b = 0; b < _num_buckets; b++) { |
85 | 981 | _buckets[b].store(0, std::memory_order_relaxed); |
86 | 981 | } |
87 | 9 | }; |
88 | | |
89 | 2 | bool HistogramStat::is_empty() const { |
90 | 2 | return num() == 0; |
91 | 2 | } |
92 | | |
93 | 1.65k | void HistogramStat::add(const uint64_t& value) { |
94 | | // This function is designed to be lock free, as it's in the critical path |
95 | | // of any operation. Each individual value is atomic and the order of updates |
96 | | // by concurrent threads is tolerable. |
97 | 1.65k | const size_t index = bucket_mapper.index_for_value(value); |
98 | 1.65k | DCHECK(index < _num_buckets); |
99 | 1.65k | _buckets[index].store(_buckets[index].load(std::memory_order_relaxed) + 1, |
100 | 1.65k | std::memory_order_relaxed); |
101 | | |
102 | 1.65k | uint64_t old_min = min(); |
103 | 1.65k | if (value < old_min) { |
104 | 6 | _min.store(value, std::memory_order_relaxed); |
105 | 6 | } |
106 | | |
107 | 1.65k | uint64_t old_max = max(); |
108 | 1.65k | if (value > old_max) { |
109 | 660 | _max.store(value, std::memory_order_relaxed); |
110 | 660 | } |
111 | | |
112 | 1.65k | _num.store(_num.load(std::memory_order_relaxed) + 1, std::memory_order_relaxed); |
113 | 1.65k | _sum.store(_sum.load(std::memory_order_relaxed) + value, std::memory_order_relaxed); |
114 | 1.65k | _sum_squares.store(_sum_squares.load(std::memory_order_relaxed) + value * value, |
115 | 1.65k | std::memory_order_relaxed); |
116 | 1.65k | } |
117 | | |
118 | 1 | void HistogramStat::merge(const HistogramStat& other) { |
119 | | // This function needs to be performed with the outer lock acquired |
120 | | // However, atomic operation on every member is still need, since Add() |
121 | | // requires no lock and value update can still happen concurrently |
122 | 1 | uint64_t old_min = min(); |
123 | 1 | uint64_t other_min = other.min(); |
124 | 1 | while (other_min < old_min && !_min.compare_exchange_weak(old_min, other_min)) { |
125 | 0 | } |
126 | | |
127 | 1 | uint64_t old_max = max(); |
128 | 1 | uint64_t other_max = other.max(); |
129 | 1 | while (other_max > old_max && !_max.compare_exchange_weak(old_max, other_max)) { |
130 | 0 | } |
131 | | |
132 | 1 | _num.fetch_add(other.num(), std::memory_order_relaxed); |
133 | 1 | _sum.fetch_add(other.sum(), std::memory_order_relaxed); |
134 | 1 | _sum_squares.fetch_add(other.sum_squares(), std::memory_order_relaxed); |
135 | 110 | for (unsigned int b = 0; b < _num_buckets; b++) { |
136 | 109 | _buckets[b].fetch_add(other.bucket_at(b), std::memory_order_relaxed); |
137 | 109 | } |
138 | 1 | } |
139 | | |
140 | 11 | double HistogramStat::median() const { |
141 | 11 | return percentile(50.0); |
142 | 11 | } |
143 | | |
144 | 54 | double HistogramStat::percentile(double p) const { |
145 | 54 | double threshold = num() * (p / 100.0); |
146 | 54 | uint64_t cumulative_sum = 0; |
147 | 392 | for (unsigned int b = 0; b < _num_buckets; b++) { |
148 | 392 | uint64_t bucket_value = bucket_at(b); |
149 | 392 | cumulative_sum += bucket_value; |
150 | 392 | if (cumulative_sum >= threshold) { |
151 | | // Scale linearly within this bucket |
152 | 54 | uint64_t left_point = (b == 0) ? 0 : bucket_mapper.bucket_limit(b - 1); |
153 | 54 | uint64_t right_point = bucket_mapper.bucket_limit(b); |
154 | 54 | uint64_t left_sum = cumulative_sum - bucket_value; |
155 | 54 | uint64_t right_sum = cumulative_sum; |
156 | 54 | double pos = 0; |
157 | 54 | uint64_t right_left_diff = right_sum - left_sum; |
158 | 54 | if (right_left_diff != 0) { |
159 | 32 | pos = (threshold - left_sum) / right_left_diff; |
160 | 32 | } |
161 | 54 | double r = left_point + (right_point - left_point) * pos; |
162 | 54 | uint64_t cur_min = min(); |
163 | 54 | uint64_t cur_max = max(); |
164 | 54 | if (r < cur_min) r = static_cast<double>(cur_min); |
165 | 54 | if (r > cur_max) r = static_cast<double>(cur_max); |
166 | 54 | return r; |
167 | 54 | } |
168 | 392 | } |
169 | 0 | return static_cast<double>(max()); |
170 | 54 | } |
171 | | |
172 | 11 | double HistogramStat::average() const { |
173 | 11 | uint64_t cur_num = num(); |
174 | 11 | uint64_t cur_sum = sum(); |
175 | 11 | if (cur_num == 0) return 0; |
176 | 6 | return static_cast<double>(cur_sum) / static_cast<double>(cur_num); |
177 | 11 | } |
178 | | |
179 | 8 | double HistogramStat::standard_deviation() const { |
180 | 8 | uint64_t cur_num = num(); |
181 | 8 | uint64_t cur_sum = sum(); |
182 | 8 | uint64_t cur_sum_squares = sum_squares(); |
183 | 8 | if (cur_num == 0) return 0; |
184 | 4 | double variance = static_cast<double>(cur_sum_squares * cur_num - cur_sum * cur_sum) / |
185 | 4 | static_cast<double>(cur_num * cur_num); |
186 | 4 | return std::sqrt(variance); |
187 | 8 | } |
188 | 0 | std::string HistogramStat::to_string() const { |
189 | 0 | uint64_t cur_num = num(); |
190 | 0 | std::string r; |
191 | 0 | char buf[1650]; |
192 | 0 | snprintf(buf, sizeof(buf), "Count: %" PRIu64 " Average: %.4f StdDev: %.2f\n", cur_num, |
193 | 0 | average(), standard_deviation()); |
194 | 0 | r.append(buf); |
195 | 0 | snprintf(buf, sizeof(buf), "Min: %" PRIu64 " Median: %.4f Max: %" PRIu64 "\n", |
196 | 0 | (cur_num == 0 ? 0 : min()), median(), (cur_num == 0 ? 0 : max())); |
197 | 0 | r.append(buf); |
198 | 0 | snprintf(buf, sizeof(buf), |
199 | 0 | "Percentiles: " |
200 | 0 | "P50: %.2f P75: %.2f P99: %.2f P99.9: %.2f P99.99: %.2f\n", |
201 | 0 | percentile(50), percentile(75), percentile(99), percentile(99.9), percentile(99.99)); |
202 | 0 | r.append(buf); |
203 | 0 | r.append("------------------------------------------------------\n"); |
204 | 0 | if (cur_num == 0) return r; // all buckets are empty |
205 | 0 | const double mult = 100.0 / cur_num; |
206 | 0 | uint64_t cumulative_sum = 0; |
207 | 0 | for (unsigned int b = 0; b < _num_buckets; b++) { |
208 | 0 | uint64_t bucket_value = bucket_at(b); |
209 | 0 | if (bucket_value <= 0.0) continue; |
210 | 0 | cumulative_sum += bucket_value; |
211 | 0 | snprintf(buf, sizeof(buf), "%c %7" PRIu64 ", %7" PRIu64 " ] %8" PRIu64 " %7.3f%% %7.3f%% ", |
212 | 0 | (b == 0) ? '[' : '(', |
213 | 0 | (b == 0) ? 0 : bucket_mapper.bucket_limit(b - 1), // left |
214 | 0 | bucket_mapper.bucket_limit(b), // right |
215 | 0 | bucket_value, // count |
216 | 0 | (mult * bucket_value), // percentage |
217 | 0 | (mult * cumulative_sum)); // cumulative percentage |
218 | 0 | r.append(buf); |
219 | | |
220 | | // Add hash marks based on percentage; 20 marks for 100%. |
221 | 0 | size_t marks = static_cast<size_t>(mult * bucket_value / 5 + 0.5); |
222 | 0 | r.append(marks, '#'); |
223 | 0 | r.push_back('\n'); |
224 | 0 | } |
225 | 0 | return r; |
226 | 0 | } |
227 | | |
228 | | } // namespace doris |