Coverage Report

Created: 2026-04-18 03:38

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/util/counts.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <pdqsort.h>
21
22
#include <algorithm>
23
#include <cmath>
24
#include <queue>
25
26
#include "core/pod_array.h"
27
#include "core/string_buffer.hpp"
28
29
namespace doris {
30
31
template <typename Ty>
32
class Counts {
33
public:
34
5
    Counts() = default;
Unexecuted instantiation: _ZN5doris6CountsIaEC2Ev
Unexecuted instantiation: _ZN5doris6CountsIsEC2Ev
Unexecuted instantiation: _ZN5doris6CountsIiEC2Ev
_ZN5doris6CountsIlEC2Ev
Line
Count
Source
34
5
    Counts() = default;
Unexecuted instantiation: _ZN5doris6CountsInEC2Ev
Unexecuted instantiation: _ZN5doris6CountsIfEC2Ev
Unexecuted instantiation: _ZN5doris6CountsIdEC2Ev
35
36
2
    void merge(Counts* other) {
37
2
        if (other != nullptr && !other->_nums.empty()) {
38
2
            _sorted_nums_vec.emplace_back(std::move(other->_nums));
39
2
        }
40
2
    }
Unexecuted instantiation: _ZN5doris6CountsIaE5mergeEPS1_
Unexecuted instantiation: _ZN5doris6CountsIsE5mergeEPS1_
Unexecuted instantiation: _ZN5doris6CountsIiE5mergeEPS1_
_ZN5doris6CountsIlE5mergeEPS1_
Line
Count
Source
36
2
    void merge(Counts* other) {
37
2
        if (other != nullptr && !other->_nums.empty()) {
38
2
            _sorted_nums_vec.emplace_back(std::move(other->_nums));
39
2
        }
40
2
    }
Unexecuted instantiation: _ZN5doris6CountsInE5mergeEPS1_
Unexecuted instantiation: _ZN5doris6CountsIfE5mergeEPS1_
Unexecuted instantiation: _ZN5doris6CountsIdE5mergeEPS1_
41
42
    void increment(Ty key, uint32_t i) {
43
        auto old_size = _nums.size();
44
        _nums.resize(_nums.size() + i);
45
        for (uint32_t j = 0; j < i; ++j) {
46
            _nums[old_size + j] = key;
47
        }
48
    }
49
50
0
    void increment(Ty key) { _nums.push_back(key); }
Unexecuted instantiation: _ZN5doris6CountsIaE9incrementEa
Unexecuted instantiation: _ZN5doris6CountsIsE9incrementEs
Unexecuted instantiation: _ZN5doris6CountsIiE9incrementEi
Unexecuted instantiation: _ZN5doris6CountsIlE9incrementEl
Unexecuted instantiation: _ZN5doris6CountsInE9incrementEn
Unexecuted instantiation: _ZN5doris6CountsIfE9incrementEf
Unexecuted instantiation: _ZN5doris6CountsIdE9incrementEd
51
52
0
    void increment_batch(const PaddedPODArray<Ty>& keys) { _nums.insert(keys.begin(), keys.end()); }
Unexecuted instantiation: _ZN5doris6CountsIaE15increment_batchERKNS_8PODArrayIaLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEE
Unexecuted instantiation: _ZN5doris6CountsIsE15increment_batchERKNS_8PODArrayIsLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEE
Unexecuted instantiation: _ZN5doris6CountsIiE15increment_batchERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEE
Unexecuted instantiation: _ZN5doris6CountsIlE15increment_batchERKNS_8PODArrayIlLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEE
Unexecuted instantiation: _ZN5doris6CountsInE15increment_batchERKNS_8PODArrayInLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEE
Unexecuted instantiation: _ZN5doris6CountsIfE15increment_batchERKNS_8PODArrayIfLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEE
Unexecuted instantiation: _ZN5doris6CountsIdE15increment_batchERKNS_8PODArrayIdLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEE
53
54
2
    void serialize(BufferWritable& buf) {
55
2
        if (!_nums.empty()) {
56
2
            pdqsort(_nums.begin(), _nums.end());
57
2
            size_t size = _nums.size();
58
2
            buf.write_binary(size);
59
2
            buf.write(reinterpret_cast<const char*>(_nums.data()), sizeof(Ty) * size);
60
2
        } else {
61
            // convert _sorted_nums_vec to _nums and do seiralize again
62
0
            _convert_sorted_num_vec_to_nums();
63
0
            serialize(buf);
64
0
        }
65
2
    }
Unexecuted instantiation: _ZN5doris6CountsIaE9serializeERNS_14BufferWritableE
Unexecuted instantiation: _ZN5doris6CountsIsE9serializeERNS_14BufferWritableE
Unexecuted instantiation: _ZN5doris6CountsIiE9serializeERNS_14BufferWritableE
_ZN5doris6CountsIlE9serializeERNS_14BufferWritableE
Line
Count
Source
54
2
    void serialize(BufferWritable& buf) {
55
2
        if (!_nums.empty()) {
56
2
            pdqsort(_nums.begin(), _nums.end());
57
2
            size_t size = _nums.size();
58
2
            buf.write_binary(size);
59
2
            buf.write(reinterpret_cast<const char*>(_nums.data()), sizeof(Ty) * size);
60
2
        } else {
61
            // convert _sorted_nums_vec to _nums and do seiralize again
62
0
            _convert_sorted_num_vec_to_nums();
63
0
            serialize(buf);
64
0
        }
65
2
    }
Unexecuted instantiation: _ZN5doris6CountsInE9serializeERNS_14BufferWritableE
Unexecuted instantiation: _ZN5doris6CountsIfE9serializeERNS_14BufferWritableE
Unexecuted instantiation: _ZN5doris6CountsIdE9serializeERNS_14BufferWritableE
66
67
2
    void unserialize(BufferReadable& buf) {
68
2
        size_t size;
69
2
        buf.read_binary(size);
70
2
        _nums.resize(size);
71
2
        auto buff = buf.read(sizeof(Ty) * size);
72
2
        memcpy(_nums.data(), buff.data, buff.size);
73
2
    }
Unexecuted instantiation: _ZN5doris6CountsIaE11unserializeERNS_14BufferReadableE
Unexecuted instantiation: _ZN5doris6CountsIsE11unserializeERNS_14BufferReadableE
Unexecuted instantiation: _ZN5doris6CountsIiE11unserializeERNS_14BufferReadableE
_ZN5doris6CountsIlE11unserializeERNS_14BufferReadableE
Line
Count
Source
67
2
    void unserialize(BufferReadable& buf) {
68
2
        size_t size;
69
2
        buf.read_binary(size);
70
2
        _nums.resize(size);
71
2
        auto buff = buf.read(sizeof(Ty) * size);
72
2
        memcpy(_nums.data(), buff.data, buff.size);
73
2
    }
Unexecuted instantiation: _ZN5doris6CountsInE11unserializeERNS_14BufferReadableE
Unexecuted instantiation: _ZN5doris6CountsIfE11unserializeERNS_14BufferReadableE
Unexecuted instantiation: _ZN5doris6CountsIdE11unserializeERNS_14BufferReadableE
74
75
3
    double terminate(double quantile) {
76
3
        if (_sorted_nums_vec.size() <= 1) {
77
2
            if (_sorted_nums_vec.size() == 1) {
78
0
                _nums = std::move(_sorted_nums_vec[0]);
79
0
            }
80
81
2
            if (_nums.empty()) {
82
                // Although set null here, but the value is 0.0 and the call method just
83
                // get val in aggregate_function_percentile_approx.h
84
0
                return 0.0;
85
0
            }
86
87
2
            if (UNLIKELY(!std::is_sorted(_nums.begin(), _nums.end()))) {
88
1
                pdqsort(_nums.begin(), _nums.end());
89
1
            }
90
91
2
            if (quantile == 1 || _nums.size() == 1) {
92
0
                return _nums.back();
93
0
            }
94
95
2
            double u = (_nums.size() - 1) * quantile;
96
2
            auto index = static_cast<uint32_t>(u);
97
2
            return _nums[index] +
98
2
                   (u - static_cast<double>(index)) * (static_cast<double>(_nums[index + 1]) -
99
2
                                                       static_cast<double>(_nums[index]));
100
2
        } else {
101
1
            DCHECK(_nums.empty());
102
1
            size_t rows = 0;
103
2
            for (const auto& i : _sorted_nums_vec) {
104
2
                rows += i.size();
105
2
            }
106
1
            const bool reverse = quantile > 0.5 && rows > 2;
107
1
            double u = (rows - 1) * quantile;
108
1
            auto index = static_cast<uint32_t>(u);
109
            // if reverse, the step of target should start 0 like not reverse
110
            // so here rows need to minus index + 2
111
            // eg: rows = 10, index = 5
112
            // if not reverse, so the first number loc is 5, the second number loc is 6
113
            // if reverse, so the second number is 3, the first number is 4
114
            // 5 + 4 = 3 + 6 = 9 = rows - 1.
115
            // the rows must GE 2 beacuse `_sorted_nums_vec` size GE 2
116
1
            size_t target = reverse ? rows - index - 2 : index;
117
1
            if (quantile == 1) {
118
0
                target = 0;
119
0
            }
120
1
            auto [first_number, second_number] = _merge_sort_and_get_numbers(target, reverse);
121
1
            if (quantile == 1) {
122
0
                return second_number;
123
0
            }
124
1
            return first_number +
125
1
                   (u - static_cast<double>(index)) *
126
1
                           (static_cast<double>(second_number) - static_cast<double>(first_number));
127
1
        }
128
3
    }
Unexecuted instantiation: _ZN5doris6CountsIaE9terminateEd
Unexecuted instantiation: _ZN5doris6CountsIsE9terminateEd
Unexecuted instantiation: _ZN5doris6CountsIiE9terminateEd
_ZN5doris6CountsIlE9terminateEd
Line
Count
Source
75
3
    double terminate(double quantile) {
76
3
        if (_sorted_nums_vec.size() <= 1) {
77
2
            if (_sorted_nums_vec.size() == 1) {
78
0
                _nums = std::move(_sorted_nums_vec[0]);
79
0
            }
80
81
2
            if (_nums.empty()) {
82
                // Although set null here, but the value is 0.0 and the call method just
83
                // get val in aggregate_function_percentile_approx.h
84
0
                return 0.0;
85
0
            }
86
87
2
            if (UNLIKELY(!std::is_sorted(_nums.begin(), _nums.end()))) {
88
1
                pdqsort(_nums.begin(), _nums.end());
89
1
            }
90
91
2
            if (quantile == 1 || _nums.size() == 1) {
92
0
                return _nums.back();
93
0
            }
94
95
2
            double u = (_nums.size() - 1) * quantile;
96
2
            auto index = static_cast<uint32_t>(u);
97
2
            return _nums[index] +
98
2
                   (u - static_cast<double>(index)) * (static_cast<double>(_nums[index + 1]) -
99
2
                                                       static_cast<double>(_nums[index]));
100
2
        } else {
101
1
            DCHECK(_nums.empty());
102
1
            size_t rows = 0;
103
2
            for (const auto& i : _sorted_nums_vec) {
104
2
                rows += i.size();
105
2
            }
106
1
            const bool reverse = quantile > 0.5 && rows > 2;
107
1
            double u = (rows - 1) * quantile;
108
1
            auto index = static_cast<uint32_t>(u);
109
            // if reverse, the step of target should start 0 like not reverse
110
            // so here rows need to minus index + 2
111
            // eg: rows = 10, index = 5
112
            // if not reverse, so the first number loc is 5, the second number loc is 6
113
            // if reverse, so the second number is 3, the first number is 4
114
            // 5 + 4 = 3 + 6 = 9 = rows - 1.
115
            // the rows must GE 2 beacuse `_sorted_nums_vec` size GE 2
116
1
            size_t target = reverse ? rows - index - 2 : index;
117
1
            if (quantile == 1) {
118
0
                target = 0;
119
0
            }
120
1
            auto [first_number, second_number] = _merge_sort_and_get_numbers(target, reverse);
121
1
            if (quantile == 1) {
122
0
                return second_number;
123
0
            }
124
1
            return first_number +
125
1
                   (u - static_cast<double>(index)) *
126
1
                           (static_cast<double>(second_number) - static_cast<double>(first_number));
127
1
        }
128
3
    }
Unexecuted instantiation: _ZN5doris6CountsInE9terminateEd
Unexecuted instantiation: _ZN5doris6CountsIfE9terminateEd
Unexecuted instantiation: _ZN5doris6CountsIdE9terminateEd
129
130
private:
131
    struct Node {
132
        Ty value;
133
        int array_index;
134
        int64_t element_index;
135
136
7
        auto operator<=>(const Node& other) const { return value <=> other.value; }
Unexecuted instantiation: _ZNK5doris6CountsIaE4NodessERKS2_
Unexecuted instantiation: _ZNK5doris6CountsIsE4NodessERKS2_
Unexecuted instantiation: _ZNK5doris6CountsIiE4NodessERKS2_
_ZNK5doris6CountsIlE4NodessERKS2_
Line
Count
Source
136
7
        auto operator<=>(const Node& other) const { return value <=> other.value; }
Unexecuted instantiation: _ZNK5doris6CountsInE4NodessERKS2_
Unexecuted instantiation: _ZNK5doris6CountsIfE4NodessERKS2_
Unexecuted instantiation: _ZNK5doris6CountsIdE4NodessERKS2_
137
    };
138
139
0
    void _convert_sorted_num_vec_to_nums() {
140
0
        size_t rows = 0;
141
0
        for (const auto& i : _sorted_nums_vec) {
142
0
            rows += i.size();
143
0
        }
144
0
        _nums.resize(rows);
145
0
        size_t count = 0;
146
147
0
        std::priority_queue<Node, std::vector<Node>, std::greater<Node>> min_heap;
148
0
        for (int i = 0; i < _sorted_nums_vec.size(); ++i) {
149
0
            if (!_sorted_nums_vec[i].empty()) {
150
0
                min_heap.emplace(_sorted_nums_vec[i][0], i, 0);
151
0
            }
152
0
        }
153
154
0
        while (!min_heap.empty()) {
155
0
            Node node = min_heap.top();
156
0
            min_heap.pop();
157
0
            _nums[count++] = node.value;
158
0
            if (++node.element_index < _sorted_nums_vec[node.array_index].size()) {
159
0
                node.value = _sorted_nums_vec[node.array_index][node.element_index];
160
0
                min_heap.push(node);
161
0
            }
162
0
        }
163
0
        _sorted_nums_vec.clear();
164
0
    }
Unexecuted instantiation: _ZN5doris6CountsIaE31_convert_sorted_num_vec_to_numsEv
Unexecuted instantiation: _ZN5doris6CountsIsE31_convert_sorted_num_vec_to_numsEv
Unexecuted instantiation: _ZN5doris6CountsIiE31_convert_sorted_num_vec_to_numsEv
Unexecuted instantiation: _ZN5doris6CountsIlE31_convert_sorted_num_vec_to_numsEv
Unexecuted instantiation: _ZN5doris6CountsInE31_convert_sorted_num_vec_to_numsEv
Unexecuted instantiation: _ZN5doris6CountsIfE31_convert_sorted_num_vec_to_numsEv
Unexecuted instantiation: _ZN5doris6CountsIdE31_convert_sorted_num_vec_to_numsEv
165
166
1
    std::pair<Ty, Ty> _merge_sort_and_get_numbers(int64_t target, bool reverse) {
167
1
        Ty first_number = 0, second_number = 0;
168
1
        size_t count = 0;
169
1
        if (reverse) {
170
0
            std::priority_queue<Node> max_heap;
171
0
            for (int i = 0; i < _sorted_nums_vec.size(); ++i) {
172
0
                if (!_sorted_nums_vec[i].empty()) {
173
0
                    max_heap.emplace(_sorted_nums_vec[i][_sorted_nums_vec[i].size() - 1], i,
174
0
                                     _sorted_nums_vec[i].size() - 1);
175
0
                }
176
0
            }
177
178
0
            while (!max_heap.empty()) {
179
0
                Node node = max_heap.top();
180
0
                max_heap.pop();
181
0
                if (count == target) {
182
0
                    second_number = node.value;
183
0
                } else if (count == target + 1) {
184
0
                    first_number = node.value;
185
0
                    break;
186
0
                }
187
0
                ++count;
188
0
                if (--node.element_index >= 0) {
189
0
                    node.value = _sorted_nums_vec[node.array_index][node.element_index];
190
0
                    max_heap.push(node);
191
0
                }
192
0
            }
193
194
1
        } else {
195
1
            std::priority_queue<Node, std::vector<Node>, std::greater<Node>> min_heap;
196
3
            for (int i = 0; i < _sorted_nums_vec.size(); ++i) {
197
2
                if (!_sorted_nums_vec[i].empty()) {
198
2
                    min_heap.emplace(_sorted_nums_vec[i][0], i, 0);
199
2
                }
200
2
            }
201
202
7
            while (!min_heap.empty()) {
203
7
                Node node = min_heap.top();
204
7
                min_heap.pop();
205
7
                if (count == target) {
206
1
                    first_number = node.value;
207
6
                } else if (count == target + 1) {
208
1
                    second_number = node.value;
209
1
                    break;
210
1
                }
211
6
                ++count;
212
6
                if (++node.element_index < _sorted_nums_vec[node.array_index].size()) {
213
6
                    node.value = _sorted_nums_vec[node.array_index][node.element_index];
214
6
                    min_heap.push(node);
215
6
                }
216
6
            }
217
1
        }
218
219
1
        return {first_number, second_number};
220
1
    }
Unexecuted instantiation: _ZN5doris6CountsIaE27_merge_sort_and_get_numbersElb
Unexecuted instantiation: _ZN5doris6CountsIsE27_merge_sort_and_get_numbersElb
Unexecuted instantiation: _ZN5doris6CountsIiE27_merge_sort_and_get_numbersElb
_ZN5doris6CountsIlE27_merge_sort_and_get_numbersElb
Line
Count
Source
166
1
    std::pair<Ty, Ty> _merge_sort_and_get_numbers(int64_t target, bool reverse) {
167
1
        Ty first_number = 0, second_number = 0;
168
1
        size_t count = 0;
169
1
        if (reverse) {
170
0
            std::priority_queue<Node> max_heap;
171
0
            for (int i = 0; i < _sorted_nums_vec.size(); ++i) {
172
0
                if (!_sorted_nums_vec[i].empty()) {
173
0
                    max_heap.emplace(_sorted_nums_vec[i][_sorted_nums_vec[i].size() - 1], i,
174
0
                                     _sorted_nums_vec[i].size() - 1);
175
0
                }
176
0
            }
177
178
0
            while (!max_heap.empty()) {
179
0
                Node node = max_heap.top();
180
0
                max_heap.pop();
181
0
                if (count == target) {
182
0
                    second_number = node.value;
183
0
                } else if (count == target + 1) {
184
0
                    first_number = node.value;
185
0
                    break;
186
0
                }
187
0
                ++count;
188
0
                if (--node.element_index >= 0) {
189
0
                    node.value = _sorted_nums_vec[node.array_index][node.element_index];
190
0
                    max_heap.push(node);
191
0
                }
192
0
            }
193
194
1
        } else {
195
1
            std::priority_queue<Node, std::vector<Node>, std::greater<Node>> min_heap;
196
3
            for (int i = 0; i < _sorted_nums_vec.size(); ++i) {
197
2
                if (!_sorted_nums_vec[i].empty()) {
198
2
                    min_heap.emplace(_sorted_nums_vec[i][0], i, 0);
199
2
                }
200
2
            }
201
202
7
            while (!min_heap.empty()) {
203
7
                Node node = min_heap.top();
204
7
                min_heap.pop();
205
7
                if (count == target) {
206
1
                    first_number = node.value;
207
6
                } else if (count == target + 1) {
208
1
                    second_number = node.value;
209
1
                    break;
210
1
                }
211
6
                ++count;
212
6
                if (++node.element_index < _sorted_nums_vec[node.array_index].size()) {
213
6
                    node.value = _sorted_nums_vec[node.array_index][node.element_index];
214
6
                    min_heap.push(node);
215
6
                }
216
6
            }
217
1
        }
218
219
1
        return {first_number, second_number};
220
1
    }
Unexecuted instantiation: _ZN5doris6CountsInE27_merge_sort_and_get_numbersElb
Unexecuted instantiation: _ZN5doris6CountsIfE27_merge_sort_and_get_numbersElb
Unexecuted instantiation: _ZN5doris6CountsIdE27_merge_sort_and_get_numbersElb
221
222
    PODArray<Ty> _nums;
223
    std::vector<PODArray<Ty>> _sorted_nums_vec;
224
};
225
226
} // namespace doris