be/src/util/percentile_util.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #pragma once |
19 | | |
20 | | #include <pdqsort.h> |
21 | | |
22 | | #include <algorithm> |
23 | | #include <cmath> |
24 | | #include <cstdint> |
25 | | #include <queue> |
26 | | #include <string> |
27 | | #include <vector> |
28 | | |
29 | | #include "common/cast_set.h" |
30 | | #include "common/exception.h" |
31 | | #include "core/column/column_nullable.h" |
32 | | #include "core/pod_array.h" |
33 | | #include "core/string_buffer.hpp" |
34 | | |
35 | | namespace doris { |
36 | | |
37 | 2.00k | inline void check_quantile(double quantile) { |
38 | 2.00k | if (quantile < 0 || quantile > 1) { |
39 | 26 | throw Exception(ErrorCode::INVALID_ARGUMENT, |
40 | 26 | "quantile in func percentile should in [0, 1], but real data is:" + |
41 | 26 | std::to_string(quantile)); |
42 | 26 | } |
43 | 2.00k | } |
44 | | |
45 | | template <typename Ty> |
46 | | class Counts { |
47 | | public: |
48 | 1.30k | Counts() = default; Unexecuted instantiation: _ZN5doris6CountsIaEC2Ev Unexecuted instantiation: _ZN5doris6CountsIsEC2Ev Line | Count | Source | 48 | 795 | Counts() = default; |
Line | Count | Source | 48 | 509 | Counts() = default; |
Unexecuted instantiation: _ZN5doris6CountsInEC2Ev Unexecuted instantiation: _ZN5doris6CountsIfEC2Ev Unexecuted instantiation: _ZN5doris6CountsIdEC2Ev |
49 | | |
50 | 579 | void merge(Counts* other) { |
51 | 579 | if (other != nullptr && !other->_nums.empty()) { |
52 | 579 | _sorted_nums_vec.emplace_back(std::move(other->_nums)); |
53 | 579 | } |
54 | 579 | } Unexecuted instantiation: _ZN5doris6CountsIaE5mergeEPS1_ Unexecuted instantiation: _ZN5doris6CountsIsE5mergeEPS1_ _ZN5doris6CountsIiE5mergeEPS1_ Line | Count | Source | 50 | 339 | void merge(Counts* other) { | 51 | 339 | if (other != nullptr && !other->_nums.empty()) { | 52 | 339 | _sorted_nums_vec.emplace_back(std::move(other->_nums)); | 53 | 339 | } | 54 | 339 | } |
_ZN5doris6CountsIlE5mergeEPS1_ Line | Count | Source | 50 | 240 | void merge(Counts* other) { | 51 | 240 | if (other != nullptr && !other->_nums.empty()) { | 52 | 240 | _sorted_nums_vec.emplace_back(std::move(other->_nums)); | 53 | 240 | } | 54 | 240 | } |
Unexecuted instantiation: _ZN5doris6CountsInE5mergeEPS1_ Unexecuted instantiation: _ZN5doris6CountsIfE5mergeEPS1_ Unexecuted instantiation: _ZN5doris6CountsIdE5mergeEPS1_ |
55 | | |
56 | | void increment(Ty key, uint32_t i) { |
57 | | if constexpr (std::is_floating_point_v<Ty>) { |
58 | | if (std::isnan(key)) { |
59 | | return; |
60 | | } |
61 | | } |
62 | | auto old_size = _nums.size(); |
63 | | _nums.resize(_nums.size() + i); |
64 | | for (uint32_t j = 0; j < i; ++j) { |
65 | | _nums[old_size + j] = key; |
66 | | } |
67 | | } |
68 | | |
69 | 427 | void increment(Ty key) { |
70 | 427 | if constexpr (std::is_floating_point_v<Ty>) { |
71 | 0 | if (std::isnan(key)) { |
72 | 0 | return; |
73 | 0 | } |
74 | 0 | } |
75 | 0 | _nums.push_back(key); |
76 | 427 | } Unexecuted instantiation: _ZN5doris6CountsIaE9incrementEa Unexecuted instantiation: _ZN5doris6CountsIsE9incrementEs _ZN5doris6CountsIiE9incrementEi Line | Count | Source | 69 | 300 | void increment(Ty key) { | 70 | | if constexpr (std::is_floating_point_v<Ty>) { | 71 | | if (std::isnan(key)) { | 72 | | return; | 73 | | } | 74 | | } | 75 | 300 | _nums.push_back(key); | 76 | 300 | } |
_ZN5doris6CountsIlE9incrementEl Line | Count | Source | 69 | 127 | void increment(Ty key) { | 70 | | if constexpr (std::is_floating_point_v<Ty>) { | 71 | | if (std::isnan(key)) { | 72 | | return; | 73 | | } | 74 | | } | 75 | 127 | _nums.push_back(key); | 76 | 127 | } |
Unexecuted instantiation: _ZN5doris6CountsInE9incrementEn Unexecuted instantiation: _ZN5doris6CountsIfE9incrementEf Unexecuted instantiation: _ZN5doris6CountsIdE9incrementEd |
77 | | |
78 | 0 | void increment_batch(const PaddedPODArray<Ty>& keys) { |
79 | 0 | if constexpr (std::is_floating_point_v<Ty>) { |
80 | 0 | _nums.reserve(_nums.size() + keys.size()); |
81 | 0 | for (auto key : keys) { |
82 | 0 | if (!std::isnan(key)) { |
83 | 0 | _nums.push_back(key); |
84 | 0 | } |
85 | 0 | } |
86 | 0 | } else { |
87 | 0 | _nums.insert(keys.begin(), keys.end()); |
88 | 0 | } |
89 | 0 | } Unexecuted instantiation: _ZN5doris6CountsIaE15increment_batchERKNS_8PODArrayIaLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEE Unexecuted instantiation: _ZN5doris6CountsIsE15increment_batchERKNS_8PODArrayIsLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEE Unexecuted instantiation: _ZN5doris6CountsIiE15increment_batchERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEE Unexecuted instantiation: _ZN5doris6CountsIlE15increment_batchERKNS_8PODArrayIlLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEE Unexecuted instantiation: _ZN5doris6CountsInE15increment_batchERKNS_8PODArrayInLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEE Unexecuted instantiation: _ZN5doris6CountsIfE15increment_batchERKNS_8PODArrayIfLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEE Unexecuted instantiation: _ZN5doris6CountsIdE15increment_batchERKNS_8PODArrayIdLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEE |
90 | | |
91 | 968 | void serialize(BufferWritable& buf) { |
92 | 968 | if (!_nums.empty()) { |
93 | 697 | pdqsort(_nums.begin(), _nums.end()); |
94 | 697 | size_t size = _nums.size(); |
95 | 697 | buf.write_binary(size); |
96 | 697 | buf.write(reinterpret_cast<const char*>(_nums.data()), sizeof(Ty) * size); |
97 | 697 | } else { |
98 | | // convert _sorted_nums_vec to _nums and do seiralize again |
99 | 271 | _convert_sorted_num_vec_to_nums(); |
100 | 271 | serialize(buf); |
101 | 271 | } |
102 | 968 | } Unexecuted instantiation: _ZN5doris6CountsIaE9serializeERNS_14BufferWritableE Unexecuted instantiation: _ZN5doris6CountsIsE9serializeERNS_14BufferWritableE _ZN5doris6CountsIiE9serializeERNS_14BufferWritableE Line | Count | Source | 91 | 612 | void serialize(BufferWritable& buf) { | 92 | 612 | if (!_nums.empty()) { | 93 | 456 | pdqsort(_nums.begin(), _nums.end()); | 94 | 456 | size_t size = _nums.size(); | 95 | 456 | buf.write_binary(size); | 96 | 456 | buf.write(reinterpret_cast<const char*>(_nums.data()), sizeof(Ty) * size); | 97 | 456 | } else { | 98 | | // convert _sorted_nums_vec to _nums and do seiralize again | 99 | 156 | _convert_sorted_num_vec_to_nums(); | 100 | 156 | serialize(buf); | 101 | 156 | } | 102 | 612 | } |
_ZN5doris6CountsIlE9serializeERNS_14BufferWritableE Line | Count | Source | 91 | 356 | void serialize(BufferWritable& buf) { | 92 | 356 | if (!_nums.empty()) { | 93 | 241 | pdqsort(_nums.begin(), _nums.end()); | 94 | 241 | size_t size = _nums.size(); | 95 | 241 | buf.write_binary(size); | 96 | 241 | buf.write(reinterpret_cast<const char*>(_nums.data()), sizeof(Ty) * size); | 97 | 241 | } else { | 98 | | // convert _sorted_nums_vec to _nums and do seiralize again | 99 | 115 | _convert_sorted_num_vec_to_nums(); | 100 | 115 | serialize(buf); | 101 | 115 | } | 102 | 356 | } |
Unexecuted instantiation: _ZN5doris6CountsInE9serializeERNS_14BufferWritableE Unexecuted instantiation: _ZN5doris6CountsIfE9serializeERNS_14BufferWritableE Unexecuted instantiation: _ZN5doris6CountsIdE9serializeERNS_14BufferWritableE |
103 | | |
104 | 579 | void unserialize(BufferReadable& buf) { |
105 | 579 | size_t size; |
106 | 579 | buf.read_binary(size); |
107 | 579 | _nums.resize(size); |
108 | 579 | auto buff = buf.read(sizeof(Ty) * size); |
109 | 579 | memcpy(_nums.data(), buff.data, buff.size); |
110 | 579 | } Unexecuted instantiation: _ZN5doris6CountsIaE11unserializeERNS_14BufferReadableE Unexecuted instantiation: _ZN5doris6CountsIsE11unserializeERNS_14BufferReadableE _ZN5doris6CountsIiE11unserializeERNS_14BufferReadableE Line | Count | Source | 104 | 338 | void unserialize(BufferReadable& buf) { | 105 | 338 | size_t size; | 106 | 338 | buf.read_binary(size); | 107 | 338 | _nums.resize(size); | 108 | 338 | auto buff = buf.read(sizeof(Ty) * size); | 109 | 338 | memcpy(_nums.data(), buff.data, buff.size); | 110 | 338 | } |
_ZN5doris6CountsIlE11unserializeERNS_14BufferReadableE Line | Count | Source | 104 | 241 | void unserialize(BufferReadable& buf) { | 105 | 241 | size_t size; | 106 | 241 | buf.read_binary(size); | 107 | 241 | _nums.resize(size); | 108 | 241 | auto buff = buf.read(sizeof(Ty) * size); | 109 | 241 | memcpy(_nums.data(), buff.data, buff.size); | 110 | 241 | } |
Unexecuted instantiation: _ZN5doris6CountsInE11unserializeERNS_14BufferReadableE Unexecuted instantiation: _ZN5doris6CountsIfE11unserializeERNS_14BufferReadableE Unexecuted instantiation: _ZN5doris6CountsIdE11unserializeERNS_14BufferReadableE |
111 | | |
112 | 34 | double terminate(double quantile) { |
113 | 34 | if (_sorted_nums_vec.size() <= 1) { |
114 | 30 | if (_sorted_nums_vec.size() == 1) { |
115 | 21 | _nums = std::move(_sorted_nums_vec[0]); |
116 | 21 | } |
117 | | |
118 | 30 | if (_nums.empty()) { |
119 | | // Although set null here, but the value is 0.0 and the call method just |
120 | | // get val in aggregate_function_percentile_approx.h |
121 | 1 | return 0.0; |
122 | 1 | } |
123 | | |
124 | 29 | if (UNLIKELY(!std::is_sorted(_nums.begin(), _nums.end()))) { |
125 | 1 | pdqsort(_nums.begin(), _nums.end()); |
126 | 1 | } |
127 | | |
128 | 29 | if (quantile == 1 || _nums.size() == 1) { |
129 | 4 | return _nums.back(); |
130 | 4 | } |
131 | | |
132 | 25 | double u = (_nums.size() - 1) * quantile; |
133 | 25 | auto index = static_cast<uint32_t>(u); |
134 | 25 | return _nums[index] + |
135 | 25 | (u - static_cast<double>(index)) * (static_cast<double>(_nums[index + 1]) - |
136 | 25 | static_cast<double>(_nums[index])); |
137 | 29 | } else { |
138 | 4 | DCHECK(_nums.empty()); |
139 | 4 | size_t rows = 0; |
140 | 8 | for (const auto& i : _sorted_nums_vec) { |
141 | 8 | rows += i.size(); |
142 | 8 | } |
143 | 4 | const bool reverse = quantile > 0.5 && rows > 2; |
144 | 4 | double u = (rows - 1) * quantile; |
145 | 4 | auto index = static_cast<uint32_t>(u); |
146 | | // if reverse, the step of target should start 0 like not reverse |
147 | | // so here rows need to minus index + 2 |
148 | | // eg: rows = 10, index = 5 |
149 | | // if not reverse, so the first number loc is 5, the second number loc is 6 |
150 | | // if reverse, so the second number is 3, the first number is 4 |
151 | | // 5 + 4 = 3 + 6 = 9 = rows - 1. |
152 | | // the rows must GE 2 beacuse `_sorted_nums_vec` size GE 2 |
153 | 4 | size_t target = reverse ? rows - index - 2 : index; |
154 | 4 | if (quantile == 1) { |
155 | 0 | target = 0; |
156 | 0 | } |
157 | 4 | auto [first_number, second_number] = _merge_sort_and_get_numbers(target, reverse); |
158 | 4 | if (quantile == 1) { |
159 | 0 | return second_number; |
160 | 0 | } |
161 | 4 | return first_number + |
162 | 4 | (u - static_cast<double>(index)) * |
163 | 4 | (static_cast<double>(second_number) - static_cast<double>(first_number)); |
164 | 4 | } |
165 | 34 | } Unexecuted instantiation: _ZN5doris6CountsIaE9terminateEd Unexecuted instantiation: _ZN5doris6CountsIsE9terminateEd Unexecuted instantiation: _ZN5doris6CountsIiE9terminateEd _ZN5doris6CountsIlE9terminateEd Line | Count | Source | 112 | 34 | double terminate(double quantile) { | 113 | 34 | if (_sorted_nums_vec.size() <= 1) { | 114 | 30 | if (_sorted_nums_vec.size() == 1) { | 115 | 21 | _nums = std::move(_sorted_nums_vec[0]); | 116 | 21 | } | 117 | | | 118 | 30 | if (_nums.empty()) { | 119 | | // Although set null here, but the value is 0.0 and the call method just | 120 | | // get val in aggregate_function_percentile_approx.h | 121 | 1 | return 0.0; | 122 | 1 | } | 123 | | | 124 | 29 | if (UNLIKELY(!std::is_sorted(_nums.begin(), _nums.end()))) { | 125 | 1 | pdqsort(_nums.begin(), _nums.end()); | 126 | 1 | } | 127 | | | 128 | 29 | if (quantile == 1 || _nums.size() == 1) { | 129 | 4 | return _nums.back(); | 130 | 4 | } | 131 | | | 132 | 25 | double u = (_nums.size() - 1) * quantile; | 133 | 25 | auto index = static_cast<uint32_t>(u); | 134 | 25 | return _nums[index] + | 135 | 25 | (u - static_cast<double>(index)) * (static_cast<double>(_nums[index + 1]) - | 136 | 25 | static_cast<double>(_nums[index])); | 137 | 29 | } else { | 138 | 4 | DCHECK(_nums.empty()); | 139 | 4 | size_t rows = 0; | 140 | 8 | for (const auto& i : _sorted_nums_vec) { | 141 | 8 | rows += i.size(); | 142 | 8 | } | 143 | 4 | const bool reverse = quantile > 0.5 && rows > 2; | 144 | 4 | double u = (rows - 1) * quantile; | 145 | 4 | auto index = static_cast<uint32_t>(u); | 146 | | // if reverse, the step of target should start 0 like not reverse | 147 | | // so here rows need to minus index + 2 | 148 | | // eg: rows = 10, index = 5 | 149 | | // if not reverse, so the first number loc is 5, the second number loc is 6 | 150 | | // if reverse, so the second number is 3, the first number is 4 | 151 | | // 5 + 4 = 3 + 6 = 9 = rows - 1. | 152 | | // the rows must GE 2 beacuse `_sorted_nums_vec` size GE 2 | 153 | 4 | size_t target = reverse ? rows - index - 2 : index; | 154 | 4 | if (quantile == 1) { | 155 | 0 | target = 0; | 156 | 0 | } | 157 | 4 | auto [first_number, second_number] = _merge_sort_and_get_numbers(target, reverse); | 158 | 4 | if (quantile == 1) { | 159 | 0 | return second_number; | 160 | 0 | } | 161 | 4 | return first_number + | 162 | 4 | (u - static_cast<double>(index)) * | 163 | 4 | (static_cast<double>(second_number) - static_cast<double>(first_number)); | 164 | 4 | } | 165 | 34 | } |
Unexecuted instantiation: _ZN5doris6CountsInE9terminateEd Unexecuted instantiation: _ZN5doris6CountsIfE9terminateEd Unexecuted instantiation: _ZN5doris6CountsIdE9terminateEd |
166 | | |
167 | | private: |
168 | | struct Node { |
169 | | Ty value; |
170 | | int array_index; |
171 | | int64_t element_index; |
172 | | |
173 | 477 | auto operator<=>(const Node& other) const { return value <=> other.value; }Unexecuted instantiation: _ZNK5doris6CountsIaE4NodessERKS2_ Unexecuted instantiation: _ZNK5doris6CountsIsE4NodessERKS2_ _ZNK5doris6CountsIiE4NodessERKS2_ Line | Count | Source | 173 | 291 | auto operator<=>(const Node& other) const { return value <=> other.value; } |
_ZNK5doris6CountsIlE4NodessERKS2_ Line | Count | Source | 173 | 186 | auto operator<=>(const Node& other) const { return value <=> other.value; } |
Unexecuted instantiation: _ZNK5doris6CountsInE4NodessERKS2_ Unexecuted instantiation: _ZNK5doris6CountsIfE4NodessERKS2_ Unexecuted instantiation: _ZNK5doris6CountsIdE4NodessERKS2_ |
174 | | }; |
175 | | |
176 | 271 | void _convert_sorted_num_vec_to_nums() { |
177 | 271 | size_t rows = 0; |
178 | 550 | for (const auto& i : _sorted_nums_vec) { |
179 | 550 | rows += i.size(); |
180 | 550 | } |
181 | 271 | _nums.resize(rows); |
182 | 271 | size_t count = 0; |
183 | | |
184 | 271 | std::priority_queue<Node, std::vector<Node>, std::greater<Node>> min_heap; |
185 | 821 | for (int i = 0; i < _sorted_nums_vec.size(); ++i) { |
186 | 550 | if (!_sorted_nums_vec[i].empty()) { |
187 | 550 | min_heap.emplace(_sorted_nums_vec[i][0], i, 0); |
188 | 550 | } |
189 | 550 | } |
190 | | |
191 | 970 | while (!min_heap.empty()) { |
192 | 699 | Node node = min_heap.top(); |
193 | 699 | min_heap.pop(); |
194 | 699 | _nums[count++] = node.value; |
195 | 699 | if (++node.element_index < _sorted_nums_vec[node.array_index].size()) { |
196 | 149 | node.value = _sorted_nums_vec[node.array_index][node.element_index]; |
197 | 149 | min_heap.push(node); |
198 | 149 | } |
199 | 699 | } |
200 | 271 | _sorted_nums_vec.clear(); |
201 | 271 | } Unexecuted instantiation: _ZN5doris6CountsIaE31_convert_sorted_num_vec_to_numsEv Unexecuted instantiation: _ZN5doris6CountsIsE31_convert_sorted_num_vec_to_numsEv _ZN5doris6CountsIiE31_convert_sorted_num_vec_to_numsEv Line | Count | Source | 176 | 156 | void _convert_sorted_num_vec_to_nums() { | 177 | 156 | size_t rows = 0; | 178 | 339 | for (const auto& i : _sorted_nums_vec) { | 179 | 339 | rows += i.size(); | 180 | 339 | } | 181 | 156 | _nums.resize(rows); | 182 | 156 | size_t count = 0; | 183 | | | 184 | 156 | std::priority_queue<Node, std::vector<Node>, std::greater<Node>> min_heap; | 185 | 495 | for (int i = 0; i < _sorted_nums_vec.size(); ++i) { | 186 | 339 | if (!_sorted_nums_vec[i].empty()) { | 187 | 339 | min_heap.emplace(_sorted_nums_vec[i][0], i, 0); | 188 | 339 | } | 189 | 339 | } | 190 | | | 191 | 531 | while (!min_heap.empty()) { | 192 | 375 | Node node = min_heap.top(); | 193 | 375 | min_heap.pop(); | 194 | 375 | _nums[count++] = node.value; | 195 | 375 | if (++node.element_index < _sorted_nums_vec[node.array_index].size()) { | 196 | 36 | node.value = _sorted_nums_vec[node.array_index][node.element_index]; | 197 | 36 | min_heap.push(node); | 198 | 36 | } | 199 | 375 | } | 200 | 156 | _sorted_nums_vec.clear(); | 201 | 156 | } |
_ZN5doris6CountsIlE31_convert_sorted_num_vec_to_numsEv Line | Count | Source | 176 | 115 | void _convert_sorted_num_vec_to_nums() { | 177 | 115 | size_t rows = 0; | 178 | 211 | for (const auto& i : _sorted_nums_vec) { | 179 | 211 | rows += i.size(); | 180 | 211 | } | 181 | 115 | _nums.resize(rows); | 182 | 115 | size_t count = 0; | 183 | | | 184 | 115 | std::priority_queue<Node, std::vector<Node>, std::greater<Node>> min_heap; | 185 | 326 | for (int i = 0; i < _sorted_nums_vec.size(); ++i) { | 186 | 211 | if (!_sorted_nums_vec[i].empty()) { | 187 | 211 | min_heap.emplace(_sorted_nums_vec[i][0], i, 0); | 188 | 211 | } | 189 | 211 | } | 190 | | | 191 | 439 | while (!min_heap.empty()) { | 192 | 324 | Node node = min_heap.top(); | 193 | 324 | min_heap.pop(); | 194 | 324 | _nums[count++] = node.value; | 195 | 324 | if (++node.element_index < _sorted_nums_vec[node.array_index].size()) { | 196 | 113 | node.value = _sorted_nums_vec[node.array_index][node.element_index]; | 197 | 113 | min_heap.push(node); | 198 | 113 | } | 199 | 324 | } | 200 | 115 | _sorted_nums_vec.clear(); | 201 | 115 | } |
Unexecuted instantiation: _ZN5doris6CountsInE31_convert_sorted_num_vec_to_numsEv Unexecuted instantiation: _ZN5doris6CountsIfE31_convert_sorted_num_vec_to_numsEv Unexecuted instantiation: _ZN5doris6CountsIdE31_convert_sorted_num_vec_to_numsEv |
202 | | |
203 | 4 | std::pair<Ty, Ty> _merge_sort_and_get_numbers(int64_t target, bool reverse) { |
204 | 4 | Ty first_number = 0, second_number = 0; |
205 | 4 | size_t count = 0; |
206 | 4 | if (reverse) { |
207 | 1 | std::priority_queue<Node> max_heap; |
208 | 3 | for (int i = 0; i < _sorted_nums_vec.size(); ++i) { |
209 | 2 | if (!_sorted_nums_vec[i].empty()) { |
210 | 2 | max_heap.emplace(_sorted_nums_vec[i][_sorted_nums_vec[i].size() - 1], i, |
211 | 2 | _sorted_nums_vec[i].size() - 1); |
212 | 2 | } |
213 | 2 | } |
214 | | |
215 | 2 | while (!max_heap.empty()) { |
216 | 2 | Node node = max_heap.top(); |
217 | 2 | max_heap.pop(); |
218 | 2 | if (count == target) { |
219 | 1 | second_number = node.value; |
220 | 1 | } else if (count == target + 1) { |
221 | 1 | first_number = node.value; |
222 | 1 | break; |
223 | 1 | } |
224 | 1 | ++count; |
225 | 1 | if (--node.element_index >= 0) { |
226 | 1 | node.value = _sorted_nums_vec[node.array_index][node.element_index]; |
227 | 1 | max_heap.push(node); |
228 | 1 | } |
229 | 1 | } |
230 | | |
231 | 3 | } else { |
232 | 3 | std::priority_queue<Node, std::vector<Node>, std::greater<Node>> min_heap; |
233 | 9 | for (int i = 0; i < _sorted_nums_vec.size(); ++i) { |
234 | 6 | if (!_sorted_nums_vec[i].empty()) { |
235 | 6 | min_heap.emplace(_sorted_nums_vec[i][0], i, 0); |
236 | 6 | } |
237 | 6 | } |
238 | | |
239 | 13 | while (!min_heap.empty()) { |
240 | 13 | Node node = min_heap.top(); |
241 | 13 | min_heap.pop(); |
242 | 13 | if (count == target) { |
243 | 3 | first_number = node.value; |
244 | 10 | } else if (count == target + 1) { |
245 | 3 | second_number = node.value; |
246 | 3 | break; |
247 | 3 | } |
248 | 10 | ++count; |
249 | 10 | if (++node.element_index < _sorted_nums_vec[node.array_index].size()) { |
250 | 10 | node.value = _sorted_nums_vec[node.array_index][node.element_index]; |
251 | 10 | min_heap.push(node); |
252 | 10 | } |
253 | 10 | } |
254 | 3 | } |
255 | | |
256 | 4 | return {first_number, second_number}; |
257 | 4 | } Unexecuted instantiation: _ZN5doris6CountsIaE27_merge_sort_and_get_numbersElb Unexecuted instantiation: _ZN5doris6CountsIsE27_merge_sort_and_get_numbersElb Unexecuted instantiation: _ZN5doris6CountsIiE27_merge_sort_and_get_numbersElb _ZN5doris6CountsIlE27_merge_sort_and_get_numbersElb Line | Count | Source | 203 | 4 | std::pair<Ty, Ty> _merge_sort_and_get_numbers(int64_t target, bool reverse) { | 204 | 4 | Ty first_number = 0, second_number = 0; | 205 | 4 | size_t count = 0; | 206 | 4 | if (reverse) { | 207 | 1 | std::priority_queue<Node> max_heap; | 208 | 3 | for (int i = 0; i < _sorted_nums_vec.size(); ++i) { | 209 | 2 | if (!_sorted_nums_vec[i].empty()) { | 210 | 2 | max_heap.emplace(_sorted_nums_vec[i][_sorted_nums_vec[i].size() - 1], i, | 211 | 2 | _sorted_nums_vec[i].size() - 1); | 212 | 2 | } | 213 | 2 | } | 214 | | | 215 | 2 | while (!max_heap.empty()) { | 216 | 2 | Node node = max_heap.top(); | 217 | 2 | max_heap.pop(); | 218 | 2 | if (count == target) { | 219 | 1 | second_number = node.value; | 220 | 1 | } else if (count == target + 1) { | 221 | 1 | first_number = node.value; | 222 | 1 | break; | 223 | 1 | } | 224 | 1 | ++count; | 225 | 1 | if (--node.element_index >= 0) { | 226 | 1 | node.value = _sorted_nums_vec[node.array_index][node.element_index]; | 227 | 1 | max_heap.push(node); | 228 | 1 | } | 229 | 1 | } | 230 | | | 231 | 3 | } else { | 232 | 3 | std::priority_queue<Node, std::vector<Node>, std::greater<Node>> min_heap; | 233 | 9 | for (int i = 0; i < _sorted_nums_vec.size(); ++i) { | 234 | 6 | if (!_sorted_nums_vec[i].empty()) { | 235 | 6 | min_heap.emplace(_sorted_nums_vec[i][0], i, 0); | 236 | 6 | } | 237 | 6 | } | 238 | | | 239 | 13 | while (!min_heap.empty()) { | 240 | 13 | Node node = min_heap.top(); | 241 | 13 | min_heap.pop(); | 242 | 13 | if (count == target) { | 243 | 3 | first_number = node.value; | 244 | 10 | } else if (count == target + 1) { | 245 | 3 | second_number = node.value; | 246 | 3 | break; | 247 | 3 | } | 248 | 10 | ++count; | 249 | 10 | if (++node.element_index < _sorted_nums_vec[node.array_index].size()) { | 250 | 10 | node.value = _sorted_nums_vec[node.array_index][node.element_index]; | 251 | 10 | min_heap.push(node); | 252 | 10 | } | 253 | 10 | } | 254 | 3 | } | 255 | | | 256 | 4 | return {first_number, second_number}; | 257 | 4 | } |
Unexecuted instantiation: _ZN5doris6CountsInE27_merge_sort_and_get_numbersElb Unexecuted instantiation: _ZN5doris6CountsIfE27_merge_sort_and_get_numbersElb Unexecuted instantiation: _ZN5doris6CountsIdE27_merge_sort_and_get_numbersElb |
258 | | |
259 | | PODArray<Ty> _nums; |
260 | | std::vector<PODArray<Ty>> _sorted_nums_vec; |
261 | | }; |
262 | | |
263 | | class PercentileLevels { |
264 | | public: |
265 | 165 | void merge(const PercentileLevels& rhs) { |
266 | 165 | if (rhs.empty()) { |
267 | 1 | return; |
268 | 1 | } |
269 | | |
270 | 164 | if (empty()) { |
271 | 1 | quantiles = rhs.quantiles; |
272 | 1 | permutation = rhs.permutation; |
273 | 1 | return; |
274 | 1 | } |
275 | | |
276 | 164 | DCHECK_EQ(quantiles.size(), rhs.quantiles.size()); |
277 | 425 | for (size_t i = 0; i < quantiles.size(); ++i) { |
278 | 262 | DCHECK_EQ(quantiles[i], rhs.quantiles[i]); |
279 | 262 | } |
280 | 163 | } |
281 | | |
282 | 415 | void write(BufferWritable& buf) const { |
283 | 415 | int size_num = cast_set<int>(quantiles.size()); |
284 | 415 | buf.write_binary(size_num); |
285 | 714 | for (const auto& quantile : quantiles) { |
286 | 714 | buf.write_binary(quantile); |
287 | 714 | } |
288 | 415 | } |
289 | | |
290 | 416 | void read(BufferReadable& buf) { |
291 | 416 | int size_num = 0; |
292 | 416 | buf.read_binary(size_num); |
293 | | |
294 | 416 | quantiles.resize(size_num); |
295 | 416 | permutation.resize(size_num); |
296 | 1.13k | for (int i = 0; i < size_num; ++i) { |
297 | 718 | buf.read_binary(quantiles[i]); |
298 | 718 | permutation[i] = cast_set<size_t>(i); |
299 | 718 | } |
300 | 416 | } |
301 | | |
302 | 533 | void clear() { |
303 | 533 | quantiles.clear(); |
304 | 533 | permutation.clear(); |
305 | 533 | } |
306 | | |
307 | 2.28k | bool empty() const { return quantiles.empty(); } |
308 | | |
309 | 130 | const std::vector<size_t>& get_permutation() const { |
310 | 130 | sort_permutation(); |
311 | 130 | return permutation; |
312 | 130 | } |
313 | | |
314 | 130 | void sort_permutation() const { |
315 | 130 | pdqsort(permutation.begin(), permutation.end(), |
316 | 211 | [this](size_t lhs, size_t rhs) { return quantiles[lhs] < quantiles[rhs]; }); |
317 | 130 | } |
318 | | |
319 | | std::vector<double> quantiles; |
320 | | mutable std::vector<size_t> permutation; |
321 | | }; |
322 | | |
323 | | } // namespace doris |