Coverage Report

Created: 2025-04-28 14:13

/root/doris/be/src/util/histogram.cpp
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "util/histogram.h"
19
20
#include <stdio.h>
21
22
#include <algorithm>
23
#include <cinttypes>
24
#include <cmath>
25
#include <limits>
26
#include <utility>
27
28
namespace doris {
29
30
3
HistogramBucketMapper::HistogramBucketMapper() {
31
    // If you change this, you also need to change
32
    // size of array buckets_ in HistogramStat
33
3
    _bucket_values = {1, 2};
34
3
    _value_index_map = {{1, 0}, {2, 1}};
35
3
    double bucket_val = static_cast<double>(_bucket_values.back());
36
324
    while ((bucket_val = 1.5 * bucket_val) <=
37
324
           static_cast<double>(std::numeric_limits<uint64_t>::max())) {
38
321
        _bucket_values.push_back(static_cast<uint64_t>(bucket_val));
39
        // Extracts two most significant digits to make histogram buckets more
40
        // human-readable. E.g., 172 becomes 170.
41
321
        uint64_t pow_of_ten = 1;
42
2.98k
        while (_bucket_values.back() / 10 > 10) {
43
2.66k
            _bucket_values.back() /= 10;
44
2.66k
            pow_of_ten *= 10;
45
2.66k
        }
46
321
        _bucket_values.back() *= pow_of_ten;
47
321
        _value_index_map[_bucket_values.back()] = _bucket_values.size() - 1;
48
321
    }
49
3
    _max_bucket_value = _bucket_values.back();
50
3
    _min_bucket_value = _bucket_values.front();
51
3
}
52
53
19.1M
size_t HistogramBucketMapper::index_for_value(const uint64_t& value) const {
54
19.1M
    if (value >= _max_bucket_value) {
55
0
        return _bucket_values.size() - 1;
56
19.1M
    } else if (value >= _min_bucket_value) {
57
19.1M
        std::map<uint64_t, uint64_t>::const_iterator lowerBound =
58
19.1M
                _value_index_map.lower_bound(value);
59
19.1M
        if (lowerBound != _value_index_map.end()) {
60
19.1M
            return static_cast<size_t>(lowerBound->second);
61
19.1M
        } else {
62
0
            return 0;
63
0
        }
64
19.1M
    } else {
65
5
        return 0;
66
5
    }
67
19.1M
}
68
69
namespace {
70
const HistogramBucketMapper bucket_mapper;
71
}
72
73
265
HistogramStat::HistogramStat() : _num_buckets(bucket_mapper.bucket_count()) {
74
265
    DCHECK(_num_buckets == sizeof(_buckets) / sizeof(*_buckets));
75
265
    clear();
76
265
}
77
78
522
void HistogramStat::clear() {
79
522
    _min.store(bucket_mapper.last_value(), std::memory_order_relaxed);
80
522
    _max.store(0, std::memory_order_relaxed);
81
522
    _num.store(0, std::memory_order_relaxed);
82
522
    _sum.store(0, std::memory_order_relaxed);
83
522
    _sum_squares.store(0, std::memory_order_relaxed);
84
57.4k
    for (unsigned int b = 0; b < _num_buckets; b++) {
85
56.8k
        _buckets[b].store(0, std::memory_order_relaxed);
86
56.8k
    }
87
522
};
88
89
2
bool HistogramStat::is_empty() const {
90
2
    return num() == 0;
91
2
}
92
93
19.1M
void HistogramStat::add(const uint64_t& value) {
94
    // This function is designed to be lock free, as it's in the critical path
95
    // of any operation. Each individual value is atomic and the order of updates
96
    // by concurrent threads is tolerable.
97
19.1M
    const size_t index = bucket_mapper.index_for_value(value);
98
19.1M
    DCHECK(index < _num_buckets);
99
19.1M
    _buckets[index].store(_buckets[index].load(std::memory_order_relaxed) + 1,
100
19.1M
                          std::memory_order_relaxed);
101
102
19.1M
    uint64_t old_min = min();
103
19.1M
    if (value < old_min) {
104
376
        _min.store(value, std::memory_order_relaxed);
105
376
    }
106
107
19.1M
    uint64_t old_max = max();
108
19.1M
    if (value > old_max) {
109
2.57k
        _max.store(value, std::memory_order_relaxed);
110
2.57k
    }
111
112
19.1M
    _num.store(_num.load(std::memory_order_relaxed) + 1, std::memory_order_relaxed);
113
19.1M
    _sum.store(_sum.load(std::memory_order_relaxed) + value, std::memory_order_relaxed);
114
19.1M
    _sum_squares.store(_sum_squares.load(std::memory_order_relaxed) + value * value,
115
19.1M
                       std::memory_order_relaxed);
116
19.1M
}
117
118
257
void HistogramStat::merge(const HistogramStat& other) {
119
    // This function needs to be performed with the outer lock acquired
120
    // However, atomic operation on every member is still need, since Add()
121
    // requires no lock and value update can still happen concurrently
122
257
    uint64_t old_min = min();
123
257
    uint64_t other_min = other.min();
124
257
    while (other_min < old_min && !_min.compare_exchange_weak(old_min, other_min)) {
125
0
    }
126
127
257
    uint64_t old_max = max();
128
257
    uint64_t other_max = other.max();
129
257
    while (other_max > old_max && !_max.compare_exchange_weak(old_max, other_max)) {
130
0
    }
131
132
257
    _num.fetch_add(other.num(), std::memory_order_relaxed);
133
257
    _sum.fetch_add(other.sum(), std::memory_order_relaxed);
134
257
    _sum_squares.fetch_add(other.sum_squares(), std::memory_order_relaxed);
135
28.2k
    for (unsigned int b = 0; b < _num_buckets; b++) {
136
28.0k
        _buckets[b].fetch_add(other.bucket_at(b), std::memory_order_relaxed);
137
28.0k
    }
138
257
}
139
140
12
double HistogramStat::median() const {
141
12
    return percentile(50.0);
142
12
}
143
144
60
double HistogramStat::percentile(double p) const {
145
60
    double threshold = num() * (p / 100.0);
146
60
    uint64_t cumulative_sum = 0;
147
407
    for (unsigned int b = 0; b < _num_buckets; b++) {
148
407
        uint64_t bucket_value = bucket_at(b);
149
407
        cumulative_sum += bucket_value;
150
407
        if (cumulative_sum >= threshold) {
151
            // Scale linearly within this bucket
152
60
            uint64_t left_point = (b == 0) ? 0 : bucket_mapper.bucket_limit(b - 1);
153
60
            uint64_t right_point = bucket_mapper.bucket_limit(b);
154
60
            uint64_t left_sum = cumulative_sum - bucket_value;
155
60
            uint64_t right_sum = cumulative_sum;
156
60
            double pos = 0;
157
60
            uint64_t right_left_diff = right_sum - left_sum;
158
60
            if (right_left_diff != 0) {
159
38
                pos = (threshold - left_sum) / right_left_diff;
160
38
            }
161
60
            double r = left_point + (right_point - left_point) * pos;
162
60
            uint64_t cur_min = min();
163
60
            uint64_t cur_max = max();
164
60
            if (r < cur_min) r = static_cast<double>(cur_min);
165
60
            if (r > cur_max) r = static_cast<double>(cur_max);
166
60
            return r;
167
60
        }
168
407
    }
169
0
    return static_cast<double>(max());
170
60
}
171
172
12
double HistogramStat::average() const {
173
12
    uint64_t cur_num = num();
174
12
    uint64_t cur_sum = sum();
175
12
    if (cur_num == 0) return 0;
176
7
    return static_cast<double>(cur_sum) / static_cast<double>(cur_num);
177
12
}
178
179
9
double HistogramStat::standard_deviation() const {
180
9
    uint64_t cur_num = num();
181
9
    uint64_t cur_sum = sum();
182
9
    uint64_t cur_sum_squares = sum_squares();
183
9
    if (cur_num == 0) return 0;
184
5
    double variance = static_cast<double>(cur_sum_squares * cur_num - cur_sum * cur_sum) /
185
5
                      static_cast<double>(cur_num * cur_num);
186
5
    return std::sqrt(variance);
187
9
}
188
0
std::string HistogramStat::to_string() const {
189
0
    uint64_t cur_num = num();
190
0
    std::string r;
191
0
    char buf[1650];
192
0
    snprintf(buf, sizeof(buf), "Count: %" PRIu64 " Average: %.4f  StdDev: %.2f\n", cur_num,
193
0
             average(), standard_deviation());
194
0
    r.append(buf);
195
0
    snprintf(buf, sizeof(buf), "Min: %" PRIu64 "  Median: %.4f  Max: %" PRIu64 "\n",
196
0
             (cur_num == 0 ? 0 : min()), median(), (cur_num == 0 ? 0 : max()));
197
0
    r.append(buf);
198
0
    snprintf(buf, sizeof(buf),
199
0
             "Percentiles: "
200
0
             "P50: %.2f P75: %.2f P99: %.2f P99.9: %.2f P99.99: %.2f\n",
201
0
             percentile(50), percentile(75), percentile(99), percentile(99.9), percentile(99.99));
202
0
    r.append(buf);
203
0
    r.append("------------------------------------------------------\n");
204
0
    if (cur_num == 0) return r; // all buckets are empty
205
0
    const double mult = 100.0 / cur_num;
206
0
    uint64_t cumulative_sum = 0;
207
0
    for (unsigned int b = 0; b < _num_buckets; b++) {
208
0
        uint64_t bucket_value = bucket_at(b);
209
0
        if (bucket_value <= 0.0) continue;
210
0
        cumulative_sum += bucket_value;
211
0
        snprintf(buf, sizeof(buf), "%c %7" PRIu64 ", %7" PRIu64 " ] %8" PRIu64 " %7.3f%% %7.3f%% ",
212
0
                 (b == 0) ? '[' : '(',
213
0
                 (b == 0) ? 0 : bucket_mapper.bucket_limit(b - 1), // left
214
0
                 bucket_mapper.bucket_limit(b),                    // right
215
0
                 bucket_value,                                     // count
216
0
                 (mult * bucket_value),                            // percentage
217
0
                 (mult * cumulative_sum));                         // cumulative percentage
218
0
        r.append(buf);
219
220
        // Add hash marks based on percentage; 20 marks for 100%.
221
0
        size_t marks = static_cast<size_t>(mult * bucket_value / 5 + 0.5);
222
0
        r.append(marks, '#');
223
0
        r.push_back('\n');
224
0
    }
225
0
    return r;
226
0
}
227
228
} // namespace doris