Coverage Report

Created: 2025-08-03 04:54

/root/doris/be/src/util/counts.h
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <pdqsort.h>
21
22
#include <algorithm>
23
#include <cmath>
24
#include <queue>
25
26
#include "udf/udf.h"
27
#include "vec/common/pod_array.h"
28
#include "vec/common/string_buffer.hpp"
29
#include "vec/io/io_helper.h"
30
31
namespace doris {
32
33
class OldCounts {
34
public:
35
    OldCounts() = default;
36
37
0
    inline void merge(const OldCounts* other) {
38
0
        if (other == nullptr || other->_counts.empty()) {
39
0
            return;
40
0
        }
41
0
42
0
        for (auto& cell : other->_counts) {
43
0
            increment(cell.first, cell.second);
44
0
        }
45
0
    }
46
47
0
    void increment(int64_t key, uint32_t i) {
48
0
        auto item = _counts.find(key);
49
0
        if (item != _counts.end()) {
50
0
            item->second += i;
51
0
        } else {
52
0
            _counts.emplace(std::make_pair(key, i));
53
0
        }
54
0
    }
55
56
0
    uint32_t serialized_size() const {
57
0
        return sizeof(uint32_t) + sizeof(int64_t) * _counts.size() +
58
0
               sizeof(uint32_t) * _counts.size();
59
0
    }
60
61
0
    void serialize(uint8_t* writer) const {
62
0
        uint32_t size = _counts.size();
63
0
        memcpy(writer, &size, sizeof(uint32_t));
64
0
        writer += sizeof(uint32_t);
65
0
        for (auto& cell : _counts) {
66
0
            memcpy(writer, &cell.first, sizeof(int64_t));
67
0
            writer += sizeof(int64_t);
68
0
            memcpy(writer, &cell.second, sizeof(uint32_t));
69
0
            writer += sizeof(uint32_t);
70
0
        }
71
0
    }
72
73
0
    void unserialize(const uint8_t* type_reader) {
74
0
        uint32_t size;
75
0
        memcpy(&size, type_reader, sizeof(uint32_t));
76
0
        type_reader += sizeof(uint32_t);
77
0
        for (uint32_t i = 0; i < size; ++i) {
78
0
            int64_t key;
79
0
            uint32_t count;
80
0
            memcpy(&key, type_reader, sizeof(int64_t));
81
0
            type_reader += sizeof(int64_t);
82
0
            memcpy(&count, type_reader, sizeof(uint32_t));
83
0
            type_reader += sizeof(uint32_t);
84
0
            _counts.emplace(std::make_pair(key, count));
85
0
        }
86
0
    }
87
88
    double get_percentile(std::vector<std::pair<int64_t, uint32_t>>& counts,
89
0
                          double position) const {
90
0
        long lower = long(std::floor(position));
91
0
        long higher = long(std::ceil(position));
92
0
93
0
        auto iter = counts.begin();
94
0
        for (; iter != counts.end() && iter->second < lower + 1; ++iter)
95
0
            ;
96
0
97
0
        int64_t lower_key = iter->first;
98
0
        if (higher == lower) {
99
0
            return lower_key;
100
0
        }
101
0
102
0
        if (iter->second < higher + 1) {
103
0
            iter++;
104
0
        }
105
0
106
0
        int64_t higher_key = iter->first;
107
0
        if (lower_key == higher_key) {
108
0
            return lower_key;
109
0
        }
110
0
111
0
        return (higher - position) * lower_key + (position - lower) * higher_key;
112
0
    }
113
114
0
    double terminate(double quantile) const {
115
0
        if (_counts.empty()) {
116
0
            // Although set null here, but the value is 0.0 and the call method just
117
0
            // get val in aggregate_function_percentile_approx.h
118
0
            return 0.0;
119
0
        }
120
0
121
0
        std::vector<std::pair<int64_t, uint32_t>> elems(_counts.begin(), _counts.end());
122
0
        sort(elems.begin(), elems.end(),
123
0
             [](const std::pair<int64_t, uint32_t> l, const std::pair<int64_t, uint32_t> r) {
124
0
                 return l.first < r.first;
125
0
             });
126
0
127
0
        long total = 0;
128
0
        for (auto& cell : elems) {
129
0
            total += cell.second;
130
0
            cell.second = total;
131
0
        }
132
0
133
0
        long max_position = total - 1;
134
0
        double position = max_position * quantile;
135
0
        return get_percentile(elems, position);
136
0
    }
137
138
private:
139
    std::unordered_map<int64_t, uint32_t> _counts;
140
};
141
template <typename Ty>
142
class Counts {
143
public:
144
7.00k
    Counts() = default;
Unexecuted instantiation: _ZN5doris6CountsIhEC2Ev
_ZN5doris6CountsIaEC2Ev
Line
Count
Source
144
68
    Counts() = default;
_ZN5doris6CountsIsEC2Ev
Line
Count
Source
144
450
    Counts() = default;
_ZN5doris6CountsIiEC2Ev
Line
Count
Source
144
2.36k
    Counts() = default;
_ZN5doris6CountsIlEC2Ev
Line
Count
Source
144
3.65k
    Counts() = default;
_ZN5doris6CountsInEC2Ev
Line
Count
Source
144
60
    Counts() = default;
Unexecuted instantiation: _ZN5doris6CountsIfEC2Ev
_ZN5doris6CountsIdEC2Ev
Line
Count
Source
144
418
    Counts() = default;
145
146
2.43k
    void merge(Counts* other) {
147
2.43k
        if (other != nullptr && !other->_nums.empty()) {
148
2.43k
            _sorted_nums_vec.emplace_back(std::move(other->_nums));
149
2.43k
        }
150
2.43k
    }
Unexecuted instantiation: _ZN5doris6CountsIhE5mergeEPS1_
Unexecuted instantiation: _ZN5doris6CountsIaE5mergeEPS1_
_ZN5doris6CountsIsE5mergeEPS1_
Line
Count
Source
146
6
    void merge(Counts* other) {
147
6
        if (other != nullptr && !other->_nums.empty()) {
148
6
            _sorted_nums_vec.emplace_back(std::move(other->_nums));
149
6
        }
150
6
    }
_ZN5doris6CountsIiE5mergeEPS1_
Line
Count
Source
146
750
    void merge(Counts* other) {
147
750
        if (other != nullptr && !other->_nums.empty()) {
148
750
            _sorted_nums_vec.emplace_back(std::move(other->_nums));
149
750
        }
150
750
    }
_ZN5doris6CountsIlE5mergeEPS1_
Line
Count
Source
146
1.55k
    void merge(Counts* other) {
147
1.55k
        if (other != nullptr && !other->_nums.empty()) {
148
1.55k
            _sorted_nums_vec.emplace_back(std::move(other->_nums));
149
1.55k
        }
150
1.55k
    }
Unexecuted instantiation: _ZN5doris6CountsInE5mergeEPS1_
Unexecuted instantiation: _ZN5doris6CountsIfE5mergeEPS1_
_ZN5doris6CountsIdE5mergeEPS1_
Line
Count
Source
146
124
    void merge(Counts* other) {
147
124
        if (other != nullptr && !other->_nums.empty()) {
148
124
            _sorted_nums_vec.emplace_back(std::move(other->_nums));
149
124
        }
150
124
    }
151
152
    void increment(Ty key, uint32_t i) {
153
        auto old_size = _nums.size();
154
        _nums.resize(_nums.size() + i);
155
        for (uint32_t j = 0; j < i; ++j) {
156
            _nums[old_size + j] = key;
157
        }
158
    }
159
160
6.10k
    void increment(Ty key) { _nums.push_back(key); }
Unexecuted instantiation: _ZN5doris6CountsIhE9incrementEh
_ZN5doris6CountsIaE9incrementEa
Line
Count
Source
160
128
    void increment(Ty key) { _nums.push_back(key); }
_ZN5doris6CountsIsE9incrementEs
Line
Count
Source
160
1.57k
    void increment(Ty key) { _nums.push_back(key); }
_ZN5doris6CountsIiE9incrementEi
Line
Count
Source
160
2.40k
    void increment(Ty key) { _nums.push_back(key); }
_ZN5doris6CountsIlE9incrementEl
Line
Count
Source
160
1.42k
    void increment(Ty key) { _nums.push_back(key); }
_ZN5doris6CountsInE9incrementEn
Line
Count
Source
160
72
    void increment(Ty key) { _nums.push_back(key); }
Unexecuted instantiation: _ZN5doris6CountsIfE9incrementEf
_ZN5doris6CountsIdE9incrementEd
Line
Count
Source
160
500
    void increment(Ty key) { _nums.push_back(key); }
161
162
10
    void increment_batch(const vectorized::PaddedPODArray<Ty>& keys) {
163
10
        _nums.insert(keys.begin(), keys.end());
164
10
    }
Unexecuted instantiation: _ZN5doris6CountsIhE15increment_batchERKNS_10vectorized8PODArrayIhLm4096E9AllocatorILb0ELb0ELb0E22DefaultMemoryAllocatorELm16ELm15EEE
Unexecuted instantiation: _ZN5doris6CountsIaE15increment_batchERKNS_10vectorized8PODArrayIaLm4096E9AllocatorILb0ELb0ELb0E22DefaultMemoryAllocatorELm16ELm15EEE
Unexecuted instantiation: _ZN5doris6CountsIsE15increment_batchERKNS_10vectorized8PODArrayIsLm4096E9AllocatorILb0ELb0ELb0E22DefaultMemoryAllocatorELm16ELm15EEE
Unexecuted instantiation: _ZN5doris6CountsIiE15increment_batchERKNS_10vectorized8PODArrayIiLm4096E9AllocatorILb0ELb0ELb0E22DefaultMemoryAllocatorELm16ELm15EEE
_ZN5doris6CountsIlE15increment_batchERKNS_10vectorized8PODArrayIlLm4096E9AllocatorILb0ELb0ELb0E22DefaultMemoryAllocatorELm16ELm15EEE
Line
Count
Source
162
10
    void increment_batch(const vectorized::PaddedPODArray<Ty>& keys) {
163
10
        _nums.insert(keys.begin(), keys.end());
164
10
    }
Unexecuted instantiation: _ZN5doris6CountsInE15increment_batchERKNS_10vectorized8PODArrayInLm4096E9AllocatorILb0ELb0ELb0E22DefaultMemoryAllocatorELm16ELm15EEE
Unexecuted instantiation: _ZN5doris6CountsIfE15increment_batchERKNS_10vectorized8PODArrayIfLm4096E9AllocatorILb0ELb0ELb0E22DefaultMemoryAllocatorELm16ELm15EEE
Unexecuted instantiation: _ZN5doris6CountsIdE15increment_batchERKNS_10vectorized8PODArrayIdLm4096E9AllocatorILb0ELb0ELb0E22DefaultMemoryAllocatorELm16ELm15EEE
165
166
3.38k
    void serialize(vectorized::BufferWritable& buf) {
167
3.38k
        if (!_nums.empty()) {
168
2.65k
            pdqsort(_nums.begin(), _nums.end());
169
2.65k
            size_t size = _nums.size();
170
2.65k
            write_binary(size, buf);
171
2.65k
            buf.write(reinterpret_cast<const char*>(_nums.data()), sizeof(Ty) * size);
172
2.65k
        } else {
173
            // convert _sorted_nums_vec to _nums and do seiralize again
174
724
            _convert_sorted_num_vec_to_nums();
175
724
            serialize(buf);
176
724
        }
177
3.38k
    }
Unexecuted instantiation: _ZN5doris6CountsIhE9serializeERNS_10vectorized14BufferWritableE
Unexecuted instantiation: _ZN5doris6CountsIaE9serializeERNS_10vectorized14BufferWritableE
_ZN5doris6CountsIsE9serializeERNS_10vectorized14BufferWritableE
Line
Count
Source
166
6
    void serialize(vectorized::BufferWritable& buf) {
167
6
        if (!_nums.empty()) {
168
6
            pdqsort(_nums.begin(), _nums.end());
169
6
            size_t size = _nums.size();
170
6
            write_binary(size, buf);
171
6
            buf.write(reinterpret_cast<const char*>(_nums.data()), sizeof(Ty) * size);
172
6
        } else {
173
            // convert _sorted_nums_vec to _nums and do seiralize again
174
0
            _convert_sorted_num_vec_to_nums();
175
0
            serialize(buf);
176
0
        }
177
6
    }
_ZN5doris6CountsIiE9serializeERNS_10vectorized14BufferWritableE
Line
Count
Source
166
750
    void serialize(vectorized::BufferWritable& buf) {
167
750
        if (!_nums.empty()) {
168
750
            pdqsort(_nums.begin(), _nums.end());
169
750
            size_t size = _nums.size();
170
750
            write_binary(size, buf);
171
750
            buf.write(reinterpret_cast<const char*>(_nums.data()), sizeof(Ty) * size);
172
750
        } else {
173
            // convert _sorted_nums_vec to _nums and do seiralize again
174
0
            _convert_sorted_num_vec_to_nums();
175
0
            serialize(buf);
176
0
        }
177
750
    }
_ZN5doris6CountsIlE9serializeERNS_10vectorized14BufferWritableE
Line
Count
Source
166
2.50k
    void serialize(vectorized::BufferWritable& buf) {
167
2.50k
        if (!_nums.empty()) {
168
1.77k
            pdqsort(_nums.begin(), _nums.end());
169
1.77k
            size_t size = _nums.size();
170
1.77k
            write_binary(size, buf);
171
1.77k
            buf.write(reinterpret_cast<const char*>(_nums.data()), sizeof(Ty) * size);
172
1.77k
        } else {
173
            // convert _sorted_nums_vec to _nums and do seiralize again
174
724
            _convert_sorted_num_vec_to_nums();
175
724
            serialize(buf);
176
724
        }
177
2.50k
    }
Unexecuted instantiation: _ZN5doris6CountsInE9serializeERNS_10vectorized14BufferWritableE
Unexecuted instantiation: _ZN5doris6CountsIfE9serializeERNS_10vectorized14BufferWritableE
_ZN5doris6CountsIdE9serializeERNS_10vectorized14BufferWritableE
Line
Count
Source
166
124
    void serialize(vectorized::BufferWritable& buf) {
167
124
        if (!_nums.empty()) {
168
124
            pdqsort(_nums.begin(), _nums.end());
169
124
            size_t size = _nums.size();
170
124
            write_binary(size, buf);
171
124
            buf.write(reinterpret_cast<const char*>(_nums.data()), sizeof(Ty) * size);
172
124
        } else {
173
            // convert _sorted_nums_vec to _nums and do seiralize again
174
0
            _convert_sorted_num_vec_to_nums();
175
0
            serialize(buf);
176
0
        }
177
124
    }
178
179
2.43k
    void unserialize(vectorized::BufferReadable& buf) {
180
2.43k
        size_t size;
181
2.43k
        read_binary(size, buf);
182
2.43k
        _nums.resize(size);
183
2.43k
        auto buff = buf.read(sizeof(Ty) * size);
184
2.43k
        memcpy(_nums.data(), buff.data, buff.size);
185
2.43k
    }
Unexecuted instantiation: _ZN5doris6CountsIhE11unserializeERNS_10vectorized14BufferReadableE
Unexecuted instantiation: _ZN5doris6CountsIaE11unserializeERNS_10vectorized14BufferReadableE
_ZN5doris6CountsIsE11unserializeERNS_10vectorized14BufferReadableE
Line
Count
Source
179
6
    void unserialize(vectorized::BufferReadable& buf) {
180
6
        size_t size;
181
6
        read_binary(size, buf);
182
6
        _nums.resize(size);
183
6
        auto buff = buf.read(sizeof(Ty) * size);
184
6
        memcpy(_nums.data(), buff.data, buff.size);
185
6
    }
_ZN5doris6CountsIiE11unserializeERNS_10vectorized14BufferReadableE
Line
Count
Source
179
750
    void unserialize(vectorized::BufferReadable& buf) {
180
750
        size_t size;
181
750
        read_binary(size, buf);
182
750
        _nums.resize(size);
183
750
        auto buff = buf.read(sizeof(Ty) * size);
184
750
        memcpy(_nums.data(), buff.data, buff.size);
185
750
    }
_ZN5doris6CountsIlE11unserializeERNS_10vectorized14BufferReadableE
Line
Count
Source
179
1.55k
    void unserialize(vectorized::BufferReadable& buf) {
180
1.55k
        size_t size;
181
1.55k
        read_binary(size, buf);
182
1.55k
        _nums.resize(size);
183
1.55k
        auto buff = buf.read(sizeof(Ty) * size);
184
1.55k
        memcpy(_nums.data(), buff.data, buff.size);
185
1.55k
    }
Unexecuted instantiation: _ZN5doris6CountsInE11unserializeERNS_10vectorized14BufferReadableE
Unexecuted instantiation: _ZN5doris6CountsIfE11unserializeERNS_10vectorized14BufferReadableE
_ZN5doris6CountsIdE11unserializeERNS_10vectorized14BufferReadableE
Line
Count
Source
179
124
    void unserialize(vectorized::BufferReadable& buf) {
180
124
        size_t size;
181
124
        read_binary(size, buf);
182
124
        _nums.resize(size);
183
124
        auto buff = buf.read(sizeof(Ty) * size);
184
124
        memcpy(_nums.data(), buff.data, buff.size);
185
124
    }
186
187
2.20k
    double terminate(double quantile) {
188
2.20k
        if (_sorted_nums_vec.size() <= 1) {
189
2.03k
            if (_sorted_nums_vec.size() == 1) {
190
752
                _nums = std::move(_sorted_nums_vec[0]);
191
752
            }
192
193
2.03k
            if (_nums.empty()) {
194
                // Although set null here, but the value is 0.0 and the call method just
195
                // get val in aggregate_function_percentile_approx.h
196
0
                return 0.0;
197
0
            }
198
199
2.03k
            if (UNLIKELY(!std::is_sorted(_nums.begin(), _nums.end()))) {
200
385
                pdqsort(_nums.begin(), _nums.end());
201
385
            }
202
203
2.03k
            if (quantile == 1 || _nums.size() == 1) {
204
954
                return _nums.back();
205
954
            }
206
207
1.08k
            double u = (_nums.size() - 1) * quantile;
208
1.08k
            auto index = static_cast<uint32_t>(u);
209
1.08k
            return _nums[index] +
210
1.08k
                   (u - static_cast<double>(index)) * (_nums[index + 1] - _nums[index]);
211
2.03k
        } else {
212
169
            DCHECK(_nums.empty());
213
169
            size_t rows = 0;
214
410
            for (const auto& i : _sorted_nums_vec) {
215
410
                rows += i.size();
216
410
            }
217
169
            const bool reverse = quantile > 0.5 && rows > 2;
218
169
            double u = (rows - 1) * quantile;
219
169
            auto index = static_cast<uint32_t>(u);
220
            // if reverse, the step of target should start 0 like not reverse
221
            // so here rows need to minus index + 2
222
            // eg: rows = 10, index = 5
223
            // if not reverse, so the first number loc is 5, the second number loc is 6
224
            // if reverse, so the second number is 3, the first number is 4
225
            // 5 + 4 = 3 + 6 = 9 = rows - 1.
226
            // the rows must GE 2 beacuse `_sorted_nums_vec` size GE 2
227
169
            size_t target = reverse ? rows - index - 2 : index;
228
169
            if (quantile == 1) {
229
0
                target = 0;
230
0
            }
231
169
            auto [first_number, second_number] = _merge_sort_and_get_numbers(target, reverse);
232
169
            if (quantile == 1) {
233
0
                return second_number;
234
0
            }
235
169
            return first_number + (u - static_cast<double>(index)) * (second_number - first_number);
236
169
        }
237
2.20k
    }
Unexecuted instantiation: _ZN5doris6CountsIhE9terminateEd
_ZN5doris6CountsIaE9terminateEd
Line
Count
Source
187
116
    double terminate(double quantile) {
188
116
        if (_sorted_nums_vec.size() <= 1) {
189
116
            if (_sorted_nums_vec.size() == 1) {
190
0
                _nums = std::move(_sorted_nums_vec[0]);
191
0
            }
192
193
116
            if (_nums.empty()) {
194
                // Although set null here, but the value is 0.0 and the call method just
195
                // get val in aggregate_function_percentile_approx.h
196
0
                return 0.0;
197
0
            }
198
199
116
            if (UNLIKELY(!std::is_sorted(_nums.begin(), _nums.end()))) {
200
48
                pdqsort(_nums.begin(), _nums.end());
201
48
            }
202
203
116
            if (quantile == 1 || _nums.size() == 1) {
204
68
                return _nums.back();
205
68
            }
206
207
48
            double u = (_nums.size() - 1) * quantile;
208
48
            auto index = static_cast<uint32_t>(u);
209
48
            return _nums[index] +
210
48
                   (u - static_cast<double>(index)) * (_nums[index + 1] - _nums[index]);
211
116
        } else {
212
0
            DCHECK(_nums.empty());
213
0
            size_t rows = 0;
214
0
            for (const auto& i : _sorted_nums_vec) {
215
0
                rows += i.size();
216
0
            }
217
0
            const bool reverse = quantile > 0.5 && rows > 2;
218
0
            double u = (rows - 1) * quantile;
219
0
            auto index = static_cast<uint32_t>(u);
220
            // if reverse, the step of target should start 0 like not reverse
221
            // so here rows need to minus index + 2
222
            // eg: rows = 10, index = 5
223
            // if not reverse, so the first number loc is 5, the second number loc is 6
224
            // if reverse, so the second number is 3, the first number is 4
225
            // 5 + 4 = 3 + 6 = 9 = rows - 1.
226
            // the rows must GE 2 beacuse `_sorted_nums_vec` size GE 2
227
0
            size_t target = reverse ? rows - index - 2 : index;
228
0
            if (quantile == 1) {
229
0
                target = 0;
230
0
            }
231
0
            auto [first_number, second_number] = _merge_sort_and_get_numbers(target, reverse);
232
0
            if (quantile == 1) {
233
0
                return second_number;
234
0
            }
235
0
            return first_number + (u - static_cast<double>(index)) * (second_number - first_number);
236
0
        }
237
116
    }
_ZN5doris6CountsIsE9terminateEd
Line
Count
Source
187
592
    double terminate(double quantile) {
188
592
        if (_sorted_nums_vec.size() <= 1) {
189
592
            if (_sorted_nums_vec.size() == 1) {
190
6
                _nums = std::move(_sorted_nums_vec[0]);
191
6
            }
192
193
592
            if (_nums.empty()) {
194
                // Although set null here, but the value is 0.0 and the call method just
195
                // get val in aggregate_function_percentile_approx.h
196
0
                return 0.0;
197
0
            }
198
199
592
            if (UNLIKELY(!std::is_sorted(_nums.begin(), _nums.end()))) {
200
330
                pdqsort(_nums.begin(), _nums.end());
201
330
            }
202
203
592
            if (quantile == 1 || _nums.size() == 1) {
204
174
                return _nums.back();
205
174
            }
206
207
418
            double u = (_nums.size() - 1) * quantile;
208
418
            auto index = static_cast<uint32_t>(u);
209
418
            return _nums[index] +
210
418
                   (u - static_cast<double>(index)) * (_nums[index + 1] - _nums[index]);
211
592
        } else {
212
0
            DCHECK(_nums.empty());
213
0
            size_t rows = 0;
214
0
            for (const auto& i : _sorted_nums_vec) {
215
0
                rows += i.size();
216
0
            }
217
0
            const bool reverse = quantile > 0.5 && rows > 2;
218
0
            double u = (rows - 1) * quantile;
219
0
            auto index = static_cast<uint32_t>(u);
220
            // if reverse, the step of target should start 0 like not reverse
221
            // so here rows need to minus index + 2
222
            // eg: rows = 10, index = 5
223
            // if not reverse, so the first number loc is 5, the second number loc is 6
224
            // if reverse, so the second number is 3, the first number is 4
225
            // 5 + 4 = 3 + 6 = 9 = rows - 1.
226
            // the rows must GE 2 beacuse `_sorted_nums_vec` size GE 2
227
0
            size_t target = reverse ? rows - index - 2 : index;
228
0
            if (quantile == 1) {
229
0
                target = 0;
230
0
            }
231
0
            auto [first_number, second_number] = _merge_sort_and_get_numbers(target, reverse);
232
0
            if (quantile == 1) {
233
0
                return second_number;
234
0
            }
235
0
            return first_number + (u - static_cast<double>(index)) * (second_number - first_number);
236
0
        }
237
592
    }
_ZN5doris6CountsIiE9terminateEd
Line
Count
Source
187
862
    double terminate(double quantile) {
188
862
        if (_sorted_nums_vec.size() <= 1) {
189
754
            if (_sorted_nums_vec.size() == 1) {
190
534
                _nums = std::move(_sorted_nums_vec[0]);
191
534
            }
192
193
754
            if (_nums.empty()) {
194
                // Although set null here, but the value is 0.0 and the call method just
195
                // get val in aggregate_function_percentile_approx.h
196
0
                return 0.0;
197
0
            }
198
199
754
            if (UNLIKELY(!std::is_sorted(_nums.begin(), _nums.end()))) {
200
6
                pdqsort(_nums.begin(), _nums.end());
201
6
            }
202
203
754
            if (quantile == 1 || _nums.size() == 1) {
204
478
                return _nums.back();
205
478
            }
206
207
276
            double u = (_nums.size() - 1) * quantile;
208
276
            auto index = static_cast<uint32_t>(u);
209
276
            return _nums[index] +
210
276
                   (u - static_cast<double>(index)) * (_nums[index + 1] - _nums[index]);
211
754
        } else {
212
108
            DCHECK(_nums.empty());
213
108
            size_t rows = 0;
214
216
            for (const auto& i : _sorted_nums_vec) {
215
216
                rows += i.size();
216
216
            }
217
108
            const bool reverse = quantile > 0.5 && rows > 2;
218
108
            double u = (rows - 1) * quantile;
219
108
            auto index = static_cast<uint32_t>(u);
220
            // if reverse, the step of target should start 0 like not reverse
221
            // so here rows need to minus index + 2
222
            // eg: rows = 10, index = 5
223
            // if not reverse, so the first number loc is 5, the second number loc is 6
224
            // if reverse, so the second number is 3, the first number is 4
225
            // 5 + 4 = 3 + 6 = 9 = rows - 1.
226
            // the rows must GE 2 beacuse `_sorted_nums_vec` size GE 2
227
108
            size_t target = reverse ? rows - index - 2 : index;
228
108
            if (quantile == 1) {
229
0
                target = 0;
230
0
            }
231
108
            auto [first_number, second_number] = _merge_sort_and_get_numbers(target, reverse);
232
108
            if (quantile == 1) {
233
0
                return second_number;
234
0
            }
235
108
            return first_number + (u - static_cast<double>(index)) * (second_number - first_number);
236
108
        }
237
862
    }
_ZN5doris6CountsIlE9terminateEd
Line
Count
Source
187
311
    double terminate(double quantile) {
188
311
        if (_sorted_nums_vec.size() <= 1) {
189
274
            if (_sorted_nums_vec.size() == 1) {
190
136
                _nums = std::move(_sorted_nums_vec[0]);
191
136
            }
192
193
274
            if (_nums.empty()) {
194
                // Although set null here, but the value is 0.0 and the call method just
195
                // get val in aggregate_function_percentile_approx.h
196
0
                return 0.0;
197
0
            }
198
199
274
            if (UNLIKELY(!std::is_sorted(_nums.begin(), _nums.end()))) {
200
1
                pdqsort(_nums.begin(), _nums.end());
201
1
            }
202
203
274
            if (quantile == 1 || _nums.size() == 1) {
204
126
                return _nums.back();
205
126
            }
206
207
148
            double u = (_nums.size() - 1) * quantile;
208
148
            auto index = static_cast<uint32_t>(u);
209
148
            return _nums[index] +
210
148
                   (u - static_cast<double>(index)) * (_nums[index + 1] - _nums[index]);
211
274
        } else {
212
37
            DCHECK(_nums.empty());
213
37
            size_t rows = 0;
214
146
            for (const auto& i : _sorted_nums_vec) {
215
146
                rows += i.size();
216
146
            }
217
37
            const bool reverse = quantile > 0.5 && rows > 2;
218
37
            double u = (rows - 1) * quantile;
219
37
            auto index = static_cast<uint32_t>(u);
220
            // if reverse, the step of target should start 0 like not reverse
221
            // so here rows need to minus index + 2
222
            // eg: rows = 10, index = 5
223
            // if not reverse, so the first number loc is 5, the second number loc is 6
224
            // if reverse, so the second number is 3, the first number is 4
225
            // 5 + 4 = 3 + 6 = 9 = rows - 1.
226
            // the rows must GE 2 beacuse `_sorted_nums_vec` size GE 2
227
37
            size_t target = reverse ? rows - index - 2 : index;
228
37
            if (quantile == 1) {
229
0
                target = 0;
230
0
            }
231
37
            auto [first_number, second_number] = _merge_sort_and_get_numbers(target, reverse);
232
37
            if (quantile == 1) {
233
0
                return second_number;
234
0
            }
235
37
            return first_number + (u - static_cast<double>(index)) * (second_number - first_number);
236
37
        }
237
311
    }
_ZN5doris6CountsInE9terminateEd
Line
Count
Source
187
60
    double terminate(double quantile) {
188
60
        if (_sorted_nums_vec.size() <= 1) {
189
60
            if (_sorted_nums_vec.size() == 1) {
190
0
                _nums = std::move(_sorted_nums_vec[0]);
191
0
            }
192
193
60
            if (_nums.empty()) {
194
                // Although set null here, but the value is 0.0 and the call method just
195
                // get val in aggregate_function_percentile_approx.h
196
0
                return 0.0;
197
0
            }
198
199
60
            if (UNLIKELY(!std::is_sorted(_nums.begin(), _nums.end()))) {
200
0
                pdqsort(_nums.begin(), _nums.end());
201
0
            }
202
203
60
            if (quantile == 1 || _nums.size() == 1) {
204
48
                return _nums.back();
205
48
            }
206
207
12
            double u = (_nums.size() - 1) * quantile;
208
12
            auto index = static_cast<uint32_t>(u);
209
12
            return _nums[index] +
210
12
                   (u - static_cast<double>(index)) * (_nums[index + 1] - _nums[index]);
211
60
        } else {
212
0
            DCHECK(_nums.empty());
213
0
            size_t rows = 0;
214
0
            for (const auto& i : _sorted_nums_vec) {
215
0
                rows += i.size();
216
0
            }
217
0
            const bool reverse = quantile > 0.5 && rows > 2;
218
0
            double u = (rows - 1) * quantile;
219
0
            auto index = static_cast<uint32_t>(u);
220
            // if reverse, the step of target should start 0 like not reverse
221
            // so here rows need to minus index + 2
222
            // eg: rows = 10, index = 5
223
            // if not reverse, so the first number loc is 5, the second number loc is 6
224
            // if reverse, so the second number is 3, the first number is 4
225
            // 5 + 4 = 3 + 6 = 9 = rows - 1.
226
            // the rows must GE 2 beacuse `_sorted_nums_vec` size GE 2
227
0
            size_t target = reverse ? rows - index - 2 : index;
228
0
            if (quantile == 1) {
229
0
                target = 0;
230
0
            }
231
0
            auto [first_number, second_number] = _merge_sort_and_get_numbers(target, reverse);
232
0
            if (quantile == 1) {
233
0
                return second_number;
234
0
            }
235
0
            return first_number + (u - static_cast<double>(index)) * (second_number - first_number);
236
0
        }
237
60
    }
Unexecuted instantiation: _ZN5doris6CountsIfE9terminateEd
_ZN5doris6CountsIdE9terminateEd
Line
Count
Source
187
266
    double terminate(double quantile) {
188
266
        if (_sorted_nums_vec.size() <= 1) {
189
242
            if (_sorted_nums_vec.size() == 1) {
190
76
                _nums = std::move(_sorted_nums_vec[0]);
191
76
            }
192
193
242
            if (_nums.empty()) {
194
                // Although set null here, but the value is 0.0 and the call method just
195
                // get val in aggregate_function_percentile_approx.h
196
0
                return 0.0;
197
0
            }
198
199
242
            if (UNLIKELY(!std::is_sorted(_nums.begin(), _nums.end()))) {
200
0
                pdqsort(_nums.begin(), _nums.end());
201
0
            }
202
203
242
            if (quantile == 1 || _nums.size() == 1) {
204
60
                return _nums.back();
205
60
            }
206
207
182
            double u = (_nums.size() - 1) * quantile;
208
182
            auto index = static_cast<uint32_t>(u);
209
182
            return _nums[index] +
210
182
                   (u - static_cast<double>(index)) * (_nums[index + 1] - _nums[index]);
211
242
        } else {
212
24
            DCHECK(_nums.empty());
213
24
            size_t rows = 0;
214
48
            for (const auto& i : _sorted_nums_vec) {
215
48
                rows += i.size();
216
48
            }
217
24
            const bool reverse = quantile > 0.5 && rows > 2;
218
24
            double u = (rows - 1) * quantile;
219
24
            auto index = static_cast<uint32_t>(u);
220
            // if reverse, the step of target should start 0 like not reverse
221
            // so here rows need to minus index + 2
222
            // eg: rows = 10, index = 5
223
            // if not reverse, so the first number loc is 5, the second number loc is 6
224
            // if reverse, so the second number is 3, the first number is 4
225
            // 5 + 4 = 3 + 6 = 9 = rows - 1.
226
            // the rows must GE 2 beacuse `_sorted_nums_vec` size GE 2
227
24
            size_t target = reverse ? rows - index - 2 : index;
228
24
            if (quantile == 1) {
229
0
                target = 0;
230
0
            }
231
24
            auto [first_number, second_number] = _merge_sort_and_get_numbers(target, reverse);
232
24
            if (quantile == 1) {
233
0
                return second_number;
234
0
            }
235
24
            return first_number + (u - static_cast<double>(index)) * (second_number - first_number);
236
24
        }
237
266
    }
238
239
private:
240
    struct Node {
241
        Ty value;
242
        int array_index;
243
        int64_t element_index;
244
245
1.81k
        auto operator<=>(const Node& other) const { return value <=> other.value; }
Unexecuted instantiation: _ZNK5doris6CountsIhE4NodessERKS2_
Unexecuted instantiation: _ZNK5doris6CountsIaE4NodessERKS2_
Unexecuted instantiation: _ZNK5doris6CountsIsE4NodessERKS2_
_ZNK5doris6CountsIiE4NodessERKS2_
Line
Count
Source
245
380
        auto operator<=>(const Node& other) const { return value <=> other.value; }
_ZNK5doris6CountsIlE4NodessERKS2_
Line
Count
Source
245
1.37k
        auto operator<=>(const Node& other) const { return value <=> other.value; }
Unexecuted instantiation: _ZNK5doris6CountsInE4NodessERKS2_
Unexecuted instantiation: _ZNK5doris6CountsIfE4NodessERKS2_
_ZNK5doris6CountsIdE4NodessERKS2_
Line
Count
Source
245
54
        auto operator<=>(const Node& other) const { return value <=> other.value; }
246
    };
247
248
724
    void _convert_sorted_num_vec_to_nums() {
249
724
        size_t rows = 0;
250
1.27k
        for (const auto& i : _sorted_nums_vec) {
251
1.27k
            rows += i.size();
252
1.27k
        }
253
724
        _nums.resize(rows);
254
724
        size_t count = 0;
255
256
724
        std::priority_queue<Node, std::vector<Node>, std::greater<Node>> min_heap;
257
2.00k
        for (int i = 0; i < _sorted_nums_vec.size(); ++i) {
258
1.27k
            if (!_sorted_nums_vec[i].empty()) {
259
1.27k
                min_heap.emplace(_sorted_nums_vec[i][0], i, 0);
260
1.27k
            }
261
1.27k
        }
262
263
2.19k
        while (!min_heap.empty()) {
264
1.46k
            Node node = min_heap.top();
265
1.46k
            min_heap.pop();
266
1.46k
            _nums[count++] = node.value;
267
1.46k
            if (++node.element_index < _sorted_nums_vec[node.array_index].size()) {
268
192
                node.value = _sorted_nums_vec[node.array_index][node.element_index];
269
192
                min_heap.push(node);
270
192
            }
271
1.46k
        }
272
724
        _sorted_nums_vec.clear();
273
724
    }
Unexecuted instantiation: _ZN5doris6CountsIhE31_convert_sorted_num_vec_to_numsEv
Unexecuted instantiation: _ZN5doris6CountsIaE31_convert_sorted_num_vec_to_numsEv
Unexecuted instantiation: _ZN5doris6CountsIsE31_convert_sorted_num_vec_to_numsEv
Unexecuted instantiation: _ZN5doris6CountsIiE31_convert_sorted_num_vec_to_numsEv
_ZN5doris6CountsIlE31_convert_sorted_num_vec_to_numsEv
Line
Count
Source
248
724
    void _convert_sorted_num_vec_to_nums() {
249
724
        size_t rows = 0;
250
1.27k
        for (const auto& i : _sorted_nums_vec) {
251
1.27k
            rows += i.size();
252
1.27k
        }
253
724
        _nums.resize(rows);
254
724
        size_t count = 0;
255
256
724
        std::priority_queue<Node, std::vector<Node>, std::greater<Node>> min_heap;
257
2.00k
        for (int i = 0; i < _sorted_nums_vec.size(); ++i) {
258
1.27k
            if (!_sorted_nums_vec[i].empty()) {
259
1.27k
                min_heap.emplace(_sorted_nums_vec[i][0], i, 0);
260
1.27k
            }
261
1.27k
        }
262
263
2.19k
        while (!min_heap.empty()) {
264
1.46k
            Node node = min_heap.top();
265
1.46k
            min_heap.pop();
266
1.46k
            _nums[count++] = node.value;
267
1.46k
            if (++node.element_index < _sorted_nums_vec[node.array_index].size()) {
268
192
                node.value = _sorted_nums_vec[node.array_index][node.element_index];
269
192
                min_heap.push(node);
270
192
            }
271
1.46k
        }
272
724
        _sorted_nums_vec.clear();
273
724
    }
Unexecuted instantiation: _ZN5doris6CountsInE31_convert_sorted_num_vec_to_numsEv
Unexecuted instantiation: _ZN5doris6CountsIfE31_convert_sorted_num_vec_to_numsEv
Unexecuted instantiation: _ZN5doris6CountsIdE31_convert_sorted_num_vec_to_numsEv
274
275
169
    std::pair<Ty, Ty> _merge_sort_and_get_numbers(int64_t target, bool reverse) {
276
169
        Ty first_number = 0, second_number = 0;
277
169
        size_t count = 0;
278
169
        if (reverse) {
279
52
            std::priority_queue<Node> max_heap;
280
228
            for (int i = 0; i < _sorted_nums_vec.size(); ++i) {
281
176
                if (!_sorted_nums_vec[i].empty()) {
282
176
                    max_heap.emplace(_sorted_nums_vec[i][_sorted_nums_vec[i].size() - 1], i,
283
176
                                     _sorted_nums_vec[i].size() - 1);
284
176
                }
285
176
            }
286
287
230
            while (!max_heap.empty()) {
288
230
                Node node = max_heap.top();
289
230
                max_heap.pop();
290
230
                if (count == target) {
291
52
                    second_number = node.value;
292
178
                } else if (count == target + 1) {
293
52
                    first_number = node.value;
294
52
                    break;
295
52
                }
296
178
                ++count;
297
178
                if (--node.element_index >= 0) {
298
144
                    node.value = _sorted_nums_vec[node.array_index][node.element_index];
299
144
                    max_heap.push(node);
300
144
                }
301
178
            }
302
303
117
        } else {
304
117
            std::priority_queue<Node, std::vector<Node>, std::greater<Node>> min_heap;
305
351
            for (int i = 0; i < _sorted_nums_vec.size(); ++i) {
306
234
                if (!_sorted_nums_vec[i].empty()) {
307
234
                    min_heap.emplace(_sorted_nums_vec[i][0], i, 0);
308
234
                }
309
234
            }
310
311
393
            while (!min_heap.empty()) {
312
393
                Node node = min_heap.top();
313
393
                min_heap.pop();
314
393
                if (count == target) {
315
117
                    first_number = node.value;
316
276
                } else if (count == target + 1) {
317
117
                    second_number = node.value;
318
117
                    break;
319
117
                }
320
276
                ++count;
321
276
                if (++node.element_index < _sorted_nums_vec[node.array_index].size()) {
322
266
                    node.value = _sorted_nums_vec[node.array_index][node.element_index];
323
266
                    min_heap.push(node);
324
266
                }
325
276
            }
326
117
        }
327
328
169
        return {first_number, second_number};
329
169
    }
Unexecuted instantiation: _ZN5doris6CountsIhE27_merge_sort_and_get_numbersElb
Unexecuted instantiation: _ZN5doris6CountsIaE27_merge_sort_and_get_numbersElb
Unexecuted instantiation: _ZN5doris6CountsIsE27_merge_sort_and_get_numbersElb
_ZN5doris6CountsIiE27_merge_sort_and_get_numbersElb
Line
Count
Source
275
108
    std::pair<Ty, Ty> _merge_sort_and_get_numbers(int64_t target, bool reverse) {
276
108
        Ty first_number = 0, second_number = 0;
277
108
        size_t count = 0;
278
108
        if (reverse) {
279
16
            std::priority_queue<Node> max_heap;
280
48
            for (int i = 0; i < _sorted_nums_vec.size(); ++i) {
281
32
                if (!_sorted_nums_vec[i].empty()) {
282
32
                    max_heap.emplace(_sorted_nums_vec[i][_sorted_nums_vec[i].size() - 1], i,
283
32
                                     _sorted_nums_vec[i].size() - 1);
284
32
                }
285
32
            }
286
287
70
            while (!max_heap.empty()) {
288
70
                Node node = max_heap.top();
289
70
                max_heap.pop();
290
70
                if (count == target) {
291
16
                    second_number = node.value;
292
54
                } else if (count == target + 1) {
293
16
                    first_number = node.value;
294
16
                    break;
295
16
                }
296
54
                ++count;
297
54
                if (--node.element_index >= 0) {
298
46
                    node.value = _sorted_nums_vec[node.array_index][node.element_index];
299
46
                    max_heap.push(node);
300
46
                }
301
54
            }
302
303
92
        } else {
304
92
            std::priority_queue<Node, std::vector<Node>, std::greater<Node>> min_heap;
305
276
            for (int i = 0; i < _sorted_nums_vec.size(); ++i) {
306
184
                if (!_sorted_nums_vec[i].empty()) {
307
184
                    min_heap.emplace(_sorted_nums_vec[i][0], i, 0);
308
184
                }
309
184
            }
310
311
320
            while (!min_heap.empty()) {
312
320
                Node node = min_heap.top();
313
320
                min_heap.pop();
314
320
                if (count == target) {
315
92
                    first_number = node.value;
316
228
                } else if (count == target + 1) {
317
92
                    second_number = node.value;
318
92
                    break;
319
92
                }
320
228
                ++count;
321
228
                if (++node.element_index < _sorted_nums_vec[node.array_index].size()) {
322
226
                    node.value = _sorted_nums_vec[node.array_index][node.element_index];
323
226
                    min_heap.push(node);
324
226
                }
325
228
            }
326
92
        }
327
328
108
        return {first_number, second_number};
329
108
    }
_ZN5doris6CountsIlE27_merge_sort_and_get_numbersElb
Line
Count
Source
275
37
    std::pair<Ty, Ty> _merge_sort_and_get_numbers(int64_t target, bool reverse) {
276
37
        Ty first_number = 0, second_number = 0;
277
37
        size_t count = 0;
278
37
        if (reverse) {
279
32
            std::priority_queue<Node> max_heap;
280
168
            for (int i = 0; i < _sorted_nums_vec.size(); ++i) {
281
136
                if (!_sorted_nums_vec[i].empty()) {
282
136
                    max_heap.emplace(_sorted_nums_vec[i][_sorted_nums_vec[i].size() - 1], i,
283
136
                                     _sorted_nums_vec[i].size() - 1);
284
136
                }
285
136
            }
286
287
150
            while (!max_heap.empty()) {
288
150
                Node node = max_heap.top();
289
150
                max_heap.pop();
290
150
                if (count == target) {
291
32
                    second_number = node.value;
292
118
                } else if (count == target + 1) {
293
32
                    first_number = node.value;
294
32
                    break;
295
32
                }
296
118
                ++count;
297
118
                if (--node.element_index >= 0) {
298
92
                    node.value = _sorted_nums_vec[node.array_index][node.element_index];
299
92
                    max_heap.push(node);
300
92
                }
301
118
            }
302
303
32
        } else {
304
5
            std::priority_queue<Node, std::vector<Node>, std::greater<Node>> min_heap;
305
15
            for (int i = 0; i < _sorted_nums_vec.size(); ++i) {
306
10
                if (!_sorted_nums_vec[i].empty()) {
307
10
                    min_heap.emplace(_sorted_nums_vec[i][0], i, 0);
308
10
                }
309
10
            }
310
311
17
            while (!min_heap.empty()) {
312
17
                Node node = min_heap.top();
313
17
                min_heap.pop();
314
17
                if (count == target) {
315
5
                    first_number = node.value;
316
12
                } else if (count == target + 1) {
317
5
                    second_number = node.value;
318
5
                    break;
319
5
                }
320
12
                ++count;
321
12
                if (++node.element_index < _sorted_nums_vec[node.array_index].size()) {
322
12
                    node.value = _sorted_nums_vec[node.array_index][node.element_index];
323
12
                    min_heap.push(node);
324
12
                }
325
12
            }
326
5
        }
327
328
37
        return {first_number, second_number};
329
37
    }
Unexecuted instantiation: _ZN5doris6CountsInE27_merge_sort_and_get_numbersElb
Unexecuted instantiation: _ZN5doris6CountsIfE27_merge_sort_and_get_numbersElb
_ZN5doris6CountsIdE27_merge_sort_and_get_numbersElb
Line
Count
Source
275
24
    std::pair<Ty, Ty> _merge_sort_and_get_numbers(int64_t target, bool reverse) {
276
24
        Ty first_number = 0, second_number = 0;
277
24
        size_t count = 0;
278
24
        if (reverse) {
279
4
            std::priority_queue<Node> max_heap;
280
12
            for (int i = 0; i < _sorted_nums_vec.size(); ++i) {
281
8
                if (!_sorted_nums_vec[i].empty()) {
282
8
                    max_heap.emplace(_sorted_nums_vec[i][_sorted_nums_vec[i].size() - 1], i,
283
8
                                     _sorted_nums_vec[i].size() - 1);
284
8
                }
285
8
            }
286
287
10
            while (!max_heap.empty()) {
288
10
                Node node = max_heap.top();
289
10
                max_heap.pop();
290
10
                if (count == target) {
291
4
                    second_number = node.value;
292
6
                } else if (count == target + 1) {
293
4
                    first_number = node.value;
294
4
                    break;
295
4
                }
296
6
                ++count;
297
6
                if (--node.element_index >= 0) {
298
6
                    node.value = _sorted_nums_vec[node.array_index][node.element_index];
299
6
                    max_heap.push(node);
300
6
                }
301
6
            }
302
303
20
        } else {
304
20
            std::priority_queue<Node, std::vector<Node>, std::greater<Node>> min_heap;
305
60
            for (int i = 0; i < _sorted_nums_vec.size(); ++i) {
306
40
                if (!_sorted_nums_vec[i].empty()) {
307
40
                    min_heap.emplace(_sorted_nums_vec[i][0], i, 0);
308
40
                }
309
40
            }
310
311
56
            while (!min_heap.empty()) {
312
56
                Node node = min_heap.top();
313
56
                min_heap.pop();
314
56
                if (count == target) {
315
20
                    first_number = node.value;
316
36
                } else if (count == target + 1) {
317
20
                    second_number = node.value;
318
20
                    break;
319
20
                }
320
36
                ++count;
321
36
                if (++node.element_index < _sorted_nums_vec[node.array_index].size()) {
322
28
                    node.value = _sorted_nums_vec[node.array_index][node.element_index];
323
28
                    min_heap.push(node);
324
28
                }
325
36
            }
326
20
        }
327
328
24
        return {first_number, second_number};
329
24
    }
330
331
    vectorized::PODArray<Ty> _nums;
332
    std::vector<vectorized::PODArray<Ty>> _sorted_nums_vec;
333
};
334
335
} // namespace doris