/root/doris/be/src/util/histogram.h
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #pragma once |
19 | | |
20 | | #include <stddef.h> |
21 | | #include <stdint.h> |
22 | | |
23 | | #include <atomic> |
24 | | #include <map> |
25 | | #include <string> |
26 | | #include <vector> |
27 | | |
28 | | #include "common/logging.h" |
29 | | |
30 | | namespace doris { |
31 | | |
32 | | // Histogram data structure implementation: |
33 | | // |
34 | | // After construction, the 'value_index_map' will be set to: |
35 | | // |
36 | | // BucketValue: | 1 | 2 | 2*1.5 |2*1.5^2|2*1.5^3| ... |2*1.5^n| ... |UINT64MAX| |
37 | | // Index: | 0 | 1 | 2 | 3 | 4 | ... | n-1 | ... | 108 | |
38 | | // |
39 | | // The width of bucket is growing by 1.5 times and its initial values are 1 and 2. |
40 | | // The input value will be add to the bucket which lower bound is just greater than |
41 | | // input value. For example, input value A > 2*1.5^n and A <= 2*1.5^(n+1), A will be added |
42 | | // to the latter bucket. |
43 | | class HistogramBucketMapper { |
44 | | public: |
45 | | HistogramBucketMapper(); |
46 | | |
47 | | // converts a value to the bucket index. |
48 | | size_t index_for_value(const uint64_t& value) const; |
49 | | // number of buckets required. |
50 | | |
51 | 94 | size_t bucket_count() const { return _bucket_values.size(); } |
52 | | |
53 | 10 | uint64_t last_value() const { return _max_bucket_value; } |
54 | | |
55 | 0 | uint64_t first_value() const { return _min_bucket_value; } |
56 | | |
57 | 86 | uint64_t bucket_limit(const size_t bucket_number) const { |
58 | 86 | DCHECK(bucket_number < bucket_count()); |
59 | 86 | return _bucket_values[bucket_number]; |
60 | 86 | } |
61 | | |
62 | | private: |
63 | | std::vector<uint64_t> _bucket_values; |
64 | | uint64_t _max_bucket_value; |
65 | | uint64_t _min_bucket_value; |
66 | | std::map<uint64_t, uint64_t> _value_index_map; |
67 | | }; |
68 | | |
69 | | struct HistogramStat { |
70 | | HistogramStat(); |
71 | 8 | ~HistogramStat() {} |
72 | | |
73 | | HistogramStat(const HistogramStat&) = delete; |
74 | | HistogramStat& operator=(const HistogramStat&) = delete; |
75 | | |
76 | | void clear(); |
77 | | bool is_empty() const; |
78 | | void add(const uint64_t& value); |
79 | | void merge(const HistogramStat& other); |
80 | | |
81 | 1.71k | uint64_t min() const { return _min.load(std::memory_order_relaxed); } |
82 | 1.71k | uint64_t max() const { return _max.load(std::memory_order_relaxed); } |
83 | 85 | uint64_t num() const { return _num.load(std::memory_order_relaxed); } |
84 | 27 | uint64_t sum() const { return _sum.load(std::memory_order_relaxed); } |
85 | 9 | uint64_t sum_squares() const { return _sum_squares.load(std::memory_order_relaxed); } |
86 | 501 | uint64_t bucket_at(size_t b) const { return _buckets[b].load(std::memory_order_relaxed); } |
87 | | |
88 | | double median() const; |
89 | | double percentile(double p) const; |
90 | | double average() const; |
91 | | double standard_deviation() const; |
92 | | std::string to_string() const; |
93 | | |
94 | | // To be able to use HistogramStat as thread local variable, it |
95 | | // cannot have dynamic allocated member. That's why we're |
96 | | // using manually values from BucketMapper |
97 | | std::atomic<uint64_t> _min; |
98 | | std::atomic<uint64_t> _max; |
99 | | std::atomic<uint64_t> _num; |
100 | | std::atomic<uint64_t> _sum; |
101 | | std::atomic<uint64_t> _sum_squares; |
102 | | std::atomic<uint64_t> _buckets[109]; // 109==BucketMapper::bucket_count() |
103 | | const uint64_t _num_buckets; |
104 | | }; |
105 | | |
106 | | } // namespace doris |