/root/doris/be/src/util/histogram.cpp

Source (jump to first uncovered line)
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.

#include "util/histogram.h"

#include <stdio.h>

#include <algorithm>
#include <cinttypes>
#include <cmath>
#include <limits>
#include <utility>

namespace doris {

HistogramBucketMapper::HistogramBucketMapper() {
    // If you change this, you also need to change
    // size of array buckets_ in HistogramStat
    _bucket_values = {1, 2};
    _value_index_map = {{1, 0}, {2, 1}};
    double bucket_val = static_cast<double>(_bucket_values.back());
    while ((bucket_val = 1.5 * bucket_val) <=
           static_cast<double>(std::numeric_limits<uint64_t>::max())) {
        _bucket_values.push_back(static_cast<uint64_t>(bucket_val));
        // Extracts two most significant digits to make histogram buckets more
        // human-readable. E.g., 172 becomes 170.
        uint64_t pow_of_ten = 1;
        while (_bucket_values.back() / 10 > 10) {
            _bucket_values.back() /= 10;
            pow_of_ten *= 10;
        }
        _bucket_values.back() *= pow_of_ten;
        _value_index_map[_bucket_values.back()] = _bucket_values.size() - 1;
    }
    _max_bucket_value = _bucket_values.back();
    _min_bucket_value = _bucket_values.front();
}

size_t HistogramBucketMapper::index_for_value(const uint64_t& value) const {
    if (value >= _max_bucket_value) {
        return _bucket_values.size() - 1;
    } else if (value >= _min_bucket_value) {
        std::map<uint64_t, uint64_t>::const_iterator lowerBound =
                _value_index_map.lower_bound(value);
        if (lowerBound != _value_index_map.end()) {
            return static_cast<size_t>(lowerBound->second);
        } else {
            return 0;
        }
    } else {
        return 0;
    }
}

namespace {
const HistogramBucketMapper bucket_mapper;
}

HistogramStat::HistogramStat() : _num_buckets(bucket_mapper.bucket_count()) {
    DCHECK(_num_buckets == sizeof(_buckets) / sizeof(*_buckets));
    clear();
}

void HistogramStat::clear() {
    _min.store(bucket_mapper.last_value(), std::memory_order_relaxed);
    _max.store(0, std::memory_order_relaxed);
    _num.store(0, std::memory_order_relaxed);
    _sum.store(0, std::memory_order_relaxed);
    _sum_squares.store(0, std::memory_order_relaxed);
    for (unsigned int b = 0; b < _num_buckets; b++) {
        _buckets[b].store(0, std::memory_order_relaxed);
    }
};

bool HistogramStat::is_empty() const {
    return num() == 0;
}

void HistogramStat::add(const uint64_t& value) {
    // This function is designed to be lock free, as it's in the critical path
    // of any operation. Each individual value is atomic and the order of updates
    // by concurrent threads is tolerable.
    const size_t index = bucket_mapper.index_for_value(value);
    DCHECK(index < _num_buckets);
    _buckets[index].store(_buckets[index].load(std::memory_order_relaxed) + 1,
                          std::memory_order_relaxed);

    uint64_t old_min = min();
    if (value < old_min) {
        _min.store(value, std::memory_order_relaxed);
    }

    uint64_t old_max = max();
    if (value > old_max) {
        _max.store(value, std::memory_order_relaxed);
    }

    _num.store(_num.load(std::memory_order_relaxed) + 1, std::memory_order_relaxed);
    _sum.store(_sum.load(std::memory_order_relaxed) + value, std::memory_order_relaxed);
    _sum_squares.store(_sum_squares.load(std::memory_order_relaxed) + value * value,
                       std::memory_order_relaxed);
}

void HistogramStat::merge(const HistogramStat& other) {
    // This function needs to be performed with the outer lock acquired
    // However, atomic operation on every member is still need, since Add()
    // requires no lock and value update can still happen concurrently
    uint64_t old_min = min();
    uint64_t other_min = other.min();
    while (other_min < old_min && !_min.compare_exchange_weak(old_min, other_min)) {
    }

    uint64_t old_max = max();
    uint64_t other_max = other.max();
    while (other_max > old_max && !_max.compare_exchange_weak(old_max, other_max)) {
    }

    _num.fetch_add(other.num(), std::memory_order_relaxed);
    _sum.fetch_add(other.sum(), std::memory_order_relaxed);
    _sum_squares.fetch_add(other.sum_squares(), std::memory_order_relaxed);
    for (unsigned int b = 0; b < _num_buckets; b++) {
        _buckets[b].fetch_add(other.bucket_at(b), std::memory_order_relaxed);
    }
}

double HistogramStat::median() const {
    return percentile(50.0);
}

double HistogramStat::percentile(double p) const {
    double threshold = num() * (p / 100.0);
    uint64_t cumulative_sum = 0;
    for (unsigned int b = 0; b < _num_buckets; b++) {
        uint64_t bucket_value = bucket_at(b);
        cumulative_sum += bucket_value;
        if (cumulative_sum >= threshold) {
            // Scale linearly within this bucket
            uint64_t left_point = (b == 0) ? 0 : bucket_mapper.bucket_limit(b - 1);
            uint64_t right_point = bucket_mapper.bucket_limit(b);
            uint64_t left_sum = cumulative_sum - bucket_value;
            uint64_t right_sum = cumulative_sum;
            double pos = 0;
            uint64_t right_left_diff = right_sum - left_sum;
            if (right_left_diff != 0) {
                pos = (threshold - left_sum) / right_left_diff;
            }
            double r = left_point + (right_point - left_point) * pos;
            uint64_t cur_min = min();
            uint64_t cur_max = max();
            if (r < cur_min) r = static_cast<double>(cur_min);
            if (r > cur_max) r = static_cast<double>(cur_max);
            return r;
        }
    }
    return static_cast<double>(max());
}

double HistogramStat::average() const {
    uint64_t cur_num = num();
    uint64_t cur_sum = sum();
    if (cur_num == 0) return 0;
    return static_cast<double>(cur_sum) / static_cast<double>(cur_num);
}

double HistogramStat::standard_deviation() const {
    uint64_t cur_num = num();
    uint64_t cur_sum = sum();
    uint64_t cur_sum_squares = sum_squares();
    if (cur_num == 0) return 0;
    double variance = static_cast<double>(cur_sum_squares * cur_num - cur_sum * cur_sum) /
                      static_cast<double>(cur_num * cur_num);
    return std::sqrt(variance);
}
std::string HistogramStat::to_string() const {
    uint64_t cur_num = num();
    std::string r;
    char buf[1650];
    snprintf(buf, sizeof(buf), "Count: %" PRIu64 " Average: %.4f  StdDev: %.2f\n", cur_num,
             average(), standard_deviation());
    r.append(buf);
    snprintf(buf, sizeof(buf), "Min: %" PRIu64 "  Median: %.4f  Max: %" PRIu64 "\n",
             (cur_num == 0 ? 0 : min()), median(), (cur_num == 0 ? 0 : max()));
    r.append(buf);
    snprintf(buf, sizeof(buf),
             "Percentiles: "
             "P50: %.2f P75: %.2f P99: %.2f P99.9: %.2f P99.99: %.2f\n",
             percentile(50), percentile(75), percentile(99), percentile(99.9), percentile(99.99));
    r.append(buf);
    r.append("------------------------------------------------------\n");
    if (cur_num == 0) return r; // all buckets are empty
    const double mult = 100.0 / cur_num;
    uint64_t cumulative_sum = 0;
    for (unsigned int b = 0; b < _num_buckets; b++) {
        uint64_t bucket_value = bucket_at(b);
        if (bucket_value <= 0.0) continue;
        cumulative_sum += bucket_value;
        snprintf(buf, sizeof(buf), "%c %7" PRIu64 ", %7" PRIu64 " ] %8" PRIu64 " %7.3f%% %7.3f%% ",
                 (b == 0) ? '[' : '(',
                 (b == 0) ? 0 : bucket_mapper.bucket_limit(b - 1), // left
                 bucket_mapper.bucket_limit(b),                    // right
                 bucket_value,                                     // count
                 (mult * bucket_value),                            // percentage
                 (mult * cumulative_sum));                         // cumulative percentage
        r.append(buf);

        // Add hash marks based on percentage; 20 marks for 100%.
        size_t marks = static_cast<size_t>(mult * bucket_value / 5 + 0.5);
        r.append(marks, '#');
        r.push_back('\n');
    }
    return r;
}

} // namespace doris

Coverage Report

Created: 2025-10-30 15:07

Line	Count	Source (jump to first uncovered line)
1		// Licensed to the Apache Software Foundation (ASF) under one
2		// or more contributor license agreements. See the NOTICE file
3		// distributed with this work for additional information
4		// regarding copyright ownership. The ASF licenses this file
5		// to you under the Apache License, Version 2.0 (the
6		// "License"); you may not use this file except in compliance
7		// with the License. You may obtain a copy of the License at
8		//
9		// http://www.apache.org/licenses/LICENSE-2.0
10		//
11		// Unless required by applicable law or agreed to in writing,
12		// software distributed under the License is distributed on an
13		// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14		// KIND, either express or implied. See the License for the
15		// specific language governing permissions and limitations
16		// under the License.
17
18		#include "util/histogram.h"
19
20		#include <stdio.h>
21
22		#include <algorithm>
23		#include <cinttypes>
24		#include <cmath>
25		#include <limits>
26		#include <utility>
27
28		namespace doris {
29
30	2	HistogramBucketMapper::HistogramBucketMapper() {
31		// If you change this, you also need to change
32		// size of array buckets_ in HistogramStat
33	2	_bucket_values = {1, 2};
34	2	_value_index_map = {{1, 0}, {2, 1}};
35	2	double bucket_val = static_cast<double>(_bucket_values.back());
36	216	while ((bucket_val = 1.5 * bucket_val) <= Branch (36:12): [True: 214, False: 2]
37	216	static_cast<double>(std::numeric_limits<uint64_t>::max())) {
38	214	_bucket_values.push_back(static_cast<uint64_t>(bucket_val));
39		// Extracts two most significant digits to make histogram buckets more
40		// human-readable. E.g., 172 becomes 170.
41	214	uint64_t pow_of_ten = 1;
42	1.99k	while (_bucket_values.back() / 10 > 10) { Branch (42:16): [True: 1.77k, False: 214]
43	1.77k	_bucket_values.back() /= 10;
44	1.77k	pow_of_ten *= 10;
45	1.77k	}
46	214	_bucket_values.back() *= pow_of_ten;
47	214	_value_index_map[_bucket_values.back()] = _bucket_values.size() - 1;
48	214	}
49	2	_max_bucket_value = _bucket_values.back();
50	2	_min_bucket_value = _bucket_values.front();
51	2	}
52
53	1.65k	size_t HistogramBucketMapper::index_for_value(const uint64_t& value) const {
54	1.65k	if (value >= _max_bucket_value) { Branch (54:9): [True: 0, False: 1.65k]
55	0	return _bucket_values.size() - 1;
56	1.65k	} else if (value >= _min_bucket_value) { Branch (56:16): [True: 1.65k, False: 0]
57	1.65k	std::map<uint64_t, uint64_t>::const_iterator lowerBound =
58	1.65k	_value_index_map.lower_bound(value);
59	1.65k	if (lowerBound != _value_index_map.end()) { Branch (59:13): [True: 1.65k, False: 0]
60	1.65k	return static_cast<size_t>(lowerBound->second);
61	1.65k	} else {
62	0	return 0;
63	0	}
64	1.65k	} else {
65	0	return 0;
66	0	}
67	1.65k	}
68
69		namespace {
70		const HistogramBucketMapper bucket_mapper;
71		}
72
73	8	HistogramStat::HistogramStat() : _num_buckets(bucket_mapper.bucket_count()) {
74	8	DCHECK(_num_buckets == sizeof(_buckets) / sizeof(*_buckets));
75	8	clear();
76	8	}
77
78	9	void HistogramStat::clear() {
79	9	_min.store(bucket_mapper.last_value(), std::memory_order_relaxed);
80	9	_max.store(0, std::memory_order_relaxed);
81	9	_num.store(0, std::memory_order_relaxed);
82	9	_sum.store(0, std::memory_order_relaxed);
83	9	_sum_squares.store(0, std::memory_order_relaxed);
84	990	for (unsigned int b = 0; b < _num_buckets; b++) { Branch (84:30): [True: 981, False: 9]
85	981	_buckets[b].store(0, std::memory_order_relaxed);
86	981	}
87	9	};
88
89	2	bool HistogramStat::is_empty() const {
90	2	return num() == 0;
91	2	}
92
93	1.65k	void HistogramStat::add(const uint64_t& value) {
94		// This function is designed to be lock free, as it's in the critical path
95		// of any operation. Each individual value is atomic and the order of updates
96		// by concurrent threads is tolerable.
97	1.65k	const size_t index = bucket_mapper.index_for_value(value);
98	1.65k	DCHECK(index < _num_buckets);
99	1.65k	_buckets[index].store(_buckets[index].load(std::memory_order_relaxed) + 1,
100	1.65k	std::memory_order_relaxed);
101
102	1.65k	uint64_t old_min = min();
103	1.65k	if (value < old_min) { Branch (103:9): [True: 6, False: 1.64k]
104	6	_min.store(value, std::memory_order_relaxed);
105	6	}
106
107	1.65k	uint64_t old_max = max();
108	1.65k	if (value > old_max) { Branch (108:9): [True: 660, False: 990]
109	660	_max.store(value, std::memory_order_relaxed);
110	660	}
111
112	1.65k	_num.store(_num.load(std::memory_order_relaxed) + 1, std::memory_order_relaxed);
113	1.65k	_sum.store(_sum.load(std::memory_order_relaxed) + value, std::memory_order_relaxed);
114	1.65k	_sum_squares.store(_sum_squares.load(std::memory_order_relaxed) + value * value,
115	1.65k	std::memory_order_relaxed);
116	1.65k	}
117
118	1	void HistogramStat::merge(const HistogramStat& other) {
119		// This function needs to be performed with the outer lock acquired
120		// However, atomic operation on every member is still need, since Add()
121		// requires no lock and value update can still happen concurrently
122	1	uint64_t old_min = min();
123	1	uint64_t other_min = other.min();
124	1	while (other_min < old_min && !_min.compare_exchange_weak(old_min, other_min)) { Branch (124:12): [True: 0, False: 1] Branch (124:35): [True: 0, False: 0]
125	0	}
126
127	1	uint64_t old_max = max();
128	1	uint64_t other_max = other.max();
129	1	while (other_max > old_max && !_max.compare_exchange_weak(old_max, other_max)) { Branch (129:12): [True: 1, False: 0] Branch (129:35): [True: 0, False: 1]
130	0	}
131
132	1	_num.fetch_add(other.num(), std::memory_order_relaxed);
133	1	_sum.fetch_add(other.sum(), std::memory_order_relaxed);
134	1	_sum_squares.fetch_add(other.sum_squares(), std::memory_order_relaxed);
135	110	for (unsigned int b = 0; b < _num_buckets; b++) { Branch (135:30): [True: 109, False: 1]
136	109	_buckets[b].fetch_add(other.bucket_at(b), std::memory_order_relaxed);
137	109	}
138	1	}
139
140	11	double HistogramStat::median() const {
141	11	return percentile(50.0);
142	11	}
143
144	54	double HistogramStat::percentile(double p) const {
145	54	double threshold = num() * (p / 100.0);
146	54	uint64_t cumulative_sum = 0;
147	392	for (unsigned int b = 0; b < _num_buckets; b++) { Branch (147:30): [True: 392, False: 0]
148	392	uint64_t bucket_value = bucket_at(b);
149	392	cumulative_sum += bucket_value;
150	392	if (cumulative_sum >= threshold) { Branch (150:13): [True: 54, False: 338]
151		// Scale linearly within this bucket
152	54	uint64_t left_point = (b == 0) ? 0 : bucket_mapper.bucket_limit(b - 1); Branch (152:35): [True: 22, False: 32]
153	54	uint64_t right_point = bucket_mapper.bucket_limit(b);
154	54	uint64_t left_sum = cumulative_sum - bucket_value;
155	54	uint64_t right_sum = cumulative_sum;
156	54	double pos = 0;
157	54	uint64_t right_left_diff = right_sum - left_sum;
158	54	if (right_left_diff != 0) { Branch (158:17): [True: 32, False: 22]
159	32	pos = (threshold - left_sum) / right_left_diff;
160	32	}
161	54	double r = left_point + (right_point - left_point) * pos;
162	54	uint64_t cur_min = min();
163	54	uint64_t cur_max = max();
164	54	if (r < cur_min) r = static_cast<double>(cur_min); Branch (164:17): [True: 22, False: 32]
165	54	if (r > cur_max) r = static_cast<double>(cur_max); Branch (165:17): [True: 30, False: 24]
166	54	return r;
167	54	}
168	392	}
169	0	return static_cast<double>(max());
170	54	}
171
172	11	double HistogramStat::average() const {
173	11	uint64_t cur_num = num();
174	11	uint64_t cur_sum = sum();
175	11	if (cur_num == 0) return 0; Branch (175:9): [True: 5, False: 6]
176	6	return static_cast<double>(cur_sum) / static_cast<double>(cur_num);
177	11	}
178
179	8	double HistogramStat::standard_deviation() const {
180	8	uint64_t cur_num = num();
181	8	uint64_t cur_sum = sum();
182	8	uint64_t cur_sum_squares = sum_squares();
183	8	if (cur_num == 0) return 0; Branch (183:9): [True: 4, False: 4]
184	4	double variance = static_cast<double>(cur_sum_squares * cur_num - cur_sum * cur_sum) /
185	4	static_cast<double>(cur_num * cur_num);
186	4	return std::sqrt(variance);
187	8	}
188	0	std::string HistogramStat::to_string() const {
189	0	uint64_t cur_num = num();
190	0	std::string r;
191	0	char buf[1650];
192	0	snprintf(buf, sizeof(buf), "Count: %" PRIu64 " Average: %.4f StdDev: %.2f\n", cur_num,
193	0	average(), standard_deviation());
194	0	r.append(buf);
195	0	snprintf(buf, sizeof(buf), "Min: %" PRIu64 " Median: %.4f Max: %" PRIu64 "\n",
196	0	(cur_num == 0 ? 0 : min()), median(), (cur_num == 0 ? 0 : max())); Branch (196:15): [True: 0, False: 0] Branch (196:53): [True: 0, False: 0]
197	0	r.append(buf);
198	0	snprintf(buf, sizeof(buf),
199	0	"Percentiles: "
200	0	"P50: %.2f P75: %.2f P99: %.2f P99.9: %.2f P99.99: %.2f\n",
201	0	percentile(50), percentile(75), percentile(99), percentile(99.9), percentile(99.99));
202	0	r.append(buf);
203	0	r.append("------------------------------------------------------\n");
204	0	if (cur_num == 0) return r; // all buckets are empty Branch (204:9): [True: 0, False: 0]
205	0	const double mult = 100.0 / cur_num;
206	0	uint64_t cumulative_sum = 0;
207	0	for (unsigned int b = 0; b < _num_buckets; b++) { Branch (207:30): [True: 0, False: 0]
208	0	uint64_t bucket_value = bucket_at(b);
209	0	if (bucket_value <= 0.0) continue; Branch (209:13): [True: 0, False: 0]
210	0	cumulative_sum += bucket_value;
211	0	snprintf(buf, sizeof(buf), "%c %7" PRIu64 ", %7" PRIu64 " ] %8" PRIu64 " %7.3f%% %7.3f%% ",
212	0	(b == 0) ? '[' : '(', Branch (212:18): [True: 0, False: 0]
213	0	(b == 0) ? 0 : bucket_mapper.bucket_limit(b - 1), // left Branch (213:18): [True: 0, False: 0]
214	0	bucket_mapper.bucket_limit(b), // right
215	0	bucket_value, // count
216	0	(mult * bucket_value), // percentage
217	0	(mult * cumulative_sum)); // cumulative percentage
218	0	r.append(buf);
219
220		// Add hash marks based on percentage; 20 marks for 100%.
221	0	size_t marks = static_cast<size_t>(mult * bucket_value / 5 + 0.5);
222	0	r.append(marks, '#');
223	0	r.push_back('\n');
224	0	}
225	0	return r;
226	0	}
227
228		} // namespace doris