be/src/exec/common/histogram_helpers.hpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #pragma once |
19 | | |
20 | | #include <rapidjson/document.h> |
21 | | #include <rapidjson/prettywriter.h> |
22 | | #include <rapidjson/stringbuffer.h> |
23 | | |
24 | | #include <boost/dynamic_bitset.hpp> |
25 | | |
26 | | #include "common/cast_set.h" |
27 | | #include "core/data_type/data_type_decimal.h" |
28 | | |
29 | | namespace doris { |
30 | | template <typename T> |
31 | | struct Bucket { |
32 | | public: |
33 | | Bucket() = default; |
34 | | Bucket(T lower, T upper, size_t ndv, size_t count, size_t pre_sum) |
35 | 1.85k | : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {}_ZN5doris6BucketINS_7DecimalIiEEEC2ES2_S2_mmm Line | Count | Source | 35 | 133 | : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {} |
Unexecuted instantiation: _ZN5doris6BucketINS_7DecimalIlEEEC2ES2_S2_mmm Unexecuted instantiation: _ZN5doris6BucketINS_12Decimal128V3EEC2ES1_S1_mmm Unexecuted instantiation: _ZN5doris6BucketINS_7DecimalIN4wide7integerILm256EiEEEEEC2ES5_S5_mmm _ZN5doris6BucketINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEC2ES6_S6_mmm Line | Count | Source | 35 | 178 | : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {} |
_ZN5doris6BucketINS_11DateV2ValueINS_15DateV2ValueTypeEEEEC2ES3_S3_mmm Line | Count | Source | 35 | 256 | : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {} |
_ZN5doris6BucketINS_11DateV2ValueINS_19DateTimeV2ValueTypeEEEEC2ES3_S3_mmm Line | Count | Source | 35 | 255 | : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {} |
_ZN5doris6BucketIhEC2Ehhmmm Line | Count | Source | 35 | 2 | : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {} |
_ZN5doris6BucketIaEC2Eaammm Line | Count | Source | 35 | 129 | : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {} |
_ZN5doris6BucketIsEC2Essmmm Line | Count | Source | 35 | 132 | : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {} |
_ZN5doris6BucketIiEC2Eiimmm Line | Count | Source | 35 | 163 | : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {} |
_ZN5doris6BucketIlEC2Ellmmm Line | Count | Source | 35 | 212 | : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {} |
_ZN5doris6BucketInEC2Ennmmm Line | Count | Source | 35 | 141 | : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {} |
_ZN5doris6BucketIfEC2Effmmm Line | Count | Source | 35 | 128 | : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {} |
_ZN5doris6BucketIdEC2Eddmmm Line | Count | Source | 35 | 128 | : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {} |
|
36 | | |
37 | | T lower; |
38 | | T upper; |
39 | | size_t ndv; |
40 | | size_t count; |
41 | | size_t pre_sum; |
42 | | }; |
43 | | |
44 | | /** |
45 | | * Checks if it is possible to assign the provided value_map to the given |
46 | | * number of buckets such that no bucket has a size larger than max_bucket_size. |
47 | | * |
48 | | * @param value_map A mapping of values to their counts. |
49 | | * @param max_bucket_size The maximum size that any bucket is allowed to have. |
50 | | * @param num_buckets The number of buckets that we want to assign values to. |
51 | | * |
52 | | * @return true if the values can be assigned to the buckets, false otherwise. |
53 | | */ |
54 | | template <typename T> |
55 | | bool can_assign_into_buckets(const std::map<T, size_t>& value_map, const size_t max_bucket_size, |
56 | 505 | const size_t num_buckets) { |
57 | 505 | if (value_map.empty()) { |
58 | 1 | return false; |
59 | 504 | }; |
60 | | |
61 | 504 | size_t used_buckets = 1; |
62 | 504 | size_t current_bucket_size = 0; |
63 | | |
64 | 30.1k | for (const auto& [value, count] : value_map) { |
65 | 30.1k | current_bucket_size += count; |
66 | | |
67 | | // If adding the current value to the current bucket would exceed max_bucket_size, |
68 | | // then we start a new bucket. |
69 | 30.1k | if (current_bucket_size > max_bucket_size) { |
70 | 1.14k | ++used_buckets; |
71 | 1.14k | current_bucket_size = count; |
72 | 1.14k | } |
73 | | |
74 | | // If we have used more buckets than num_buckets, we cannot assign the values to buckets. |
75 | 30.1k | if (used_buckets > num_buckets) { |
76 | 222 | return false; |
77 | 222 | } |
78 | 30.1k | } |
79 | | |
80 | 282 | return true; |
81 | 504 | } Unexecuted instantiation: _ZN5doris23can_assign_into_bucketsIhEEbRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEmm _ZN5doris23can_assign_into_bucketsIaEEbRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEmm Line | Count | Source | 56 | 27 | const size_t num_buckets) { | 57 | 27 | if (value_map.empty()) { | 58 | 0 | return false; | 59 | 27 | }; | 60 | | | 61 | 27 | size_t used_buckets = 1; | 62 | 27 | size_t current_bucket_size = 0; | 63 | | | 64 | 1.24k | for (const auto& [value, count] : value_map) { | 65 | 1.24k | current_bucket_size += count; | 66 | | | 67 | | // If adding the current value to the current bucket would exceed max_bucket_size, | 68 | | // then we start a new bucket. | 69 | 1.24k | if (current_bucket_size > max_bucket_size) { | 70 | 70 | ++used_buckets; | 71 | 70 | current_bucket_size = count; | 72 | 70 | } | 73 | | | 74 | | // If we have used more buckets than num_buckets, we cannot assign the values to buckets. | 75 | 1.24k | if (used_buckets > num_buckets) { | 76 | 11 | return false; | 77 | 11 | } | 78 | 1.24k | } | 79 | | | 80 | 16 | return true; | 81 | 27 | } |
_ZN5doris23can_assign_into_bucketsIsEEbRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEmm Line | Count | Source | 56 | 40 | const size_t num_buckets) { | 57 | 40 | if (value_map.empty()) { | 58 | 0 | return false; | 59 | 40 | }; | 60 | | | 61 | 40 | size_t used_buckets = 1; | 62 | 40 | size_t current_bucket_size = 0; | 63 | | | 64 | 1.34k | for (const auto& [value, count] : value_map) { | 65 | 1.34k | current_bucket_size += count; | 66 | | | 67 | | // If adding the current value to the current bucket would exceed max_bucket_size, | 68 | | // then we start a new bucket. | 69 | 1.34k | if (current_bucket_size > max_bucket_size) { | 70 | 88 | ++used_buckets; | 71 | 88 | current_bucket_size = count; | 72 | 88 | } | 73 | | | 74 | | // If we have used more buckets than num_buckets, we cannot assign the values to buckets. | 75 | 1.34k | if (used_buckets > num_buckets) { | 76 | 18 | return false; | 77 | 18 | } | 78 | 1.34k | } | 79 | | | 80 | 22 | return true; | 81 | 40 | } |
_ZN5doris23can_assign_into_bucketsIiEEbRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEmm Line | Count | Source | 56 | 69 | const size_t num_buckets) { | 57 | 69 | if (value_map.empty()) { | 58 | 1 | return false; | 59 | 68 | }; | 60 | | | 61 | 68 | size_t used_buckets = 1; | 62 | 68 | size_t current_bucket_size = 0; | 63 | | | 64 | 1.40k | for (const auto& [value, count] : value_map) { | 65 | 1.40k | current_bucket_size += count; | 66 | | | 67 | | // If adding the current value to the current bucket would exceed max_bucket_size, | 68 | | // then we start a new bucket. | 69 | 1.40k | if (current_bucket_size > max_bucket_size) { | 70 | 169 | ++used_buckets; | 71 | 169 | current_bucket_size = count; | 72 | 169 | } | 73 | | | 74 | | // If we have used more buckets than num_buckets, we cannot assign the values to buckets. | 75 | 1.40k | if (used_buckets > num_buckets) { | 76 | 30 | return false; | 77 | 30 | } | 78 | 1.40k | } | 79 | | | 80 | 38 | return true; | 81 | 68 | } |
_ZN5doris23can_assign_into_bucketsIlEEbRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEmm Line | Count | Source | 56 | 34 | const size_t num_buckets) { | 57 | 34 | if (value_map.empty()) { | 58 | 0 | return false; | 59 | 34 | }; | 60 | | | 61 | 34 | size_t used_buckets = 1; | 62 | 34 | size_t current_bucket_size = 0; | 63 | | | 64 | 1.29k | for (const auto& [value, count] : value_map) { | 65 | 1.29k | current_bucket_size += count; | 66 | | | 67 | | // If adding the current value to the current bucket would exceed max_bucket_size, | 68 | | // then we start a new bucket. | 69 | 1.29k | if (current_bucket_size > max_bucket_size) { | 70 | 90 | ++used_buckets; | 71 | 90 | current_bucket_size = count; | 72 | 90 | } | 73 | | | 74 | | // If we have used more buckets than num_buckets, we cannot assign the values to buckets. | 75 | 1.29k | if (used_buckets > num_buckets) { | 76 | 17 | return false; | 77 | 17 | } | 78 | 1.29k | } | 79 | | | 80 | 17 | return true; | 81 | 34 | } |
_ZN5doris23can_assign_into_bucketsInEEbRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEmm Line | Count | Source | 56 | 32 | const size_t num_buckets) { | 57 | 32 | if (value_map.empty()) { | 58 | 0 | return false; | 59 | 32 | }; | 60 | | | 61 | 32 | size_t used_buckets = 1; | 62 | 32 | size_t current_bucket_size = 0; | 63 | | | 64 | 1.28k | for (const auto& [value, count] : value_map) { | 65 | 1.28k | current_bucket_size += count; | 66 | | | 67 | | // If adding the current value to the current bucket would exceed max_bucket_size, | 68 | | // then we start a new bucket. | 69 | 1.28k | if (current_bucket_size > max_bucket_size) { | 70 | 96 | ++used_buckets; | 71 | 96 | current_bucket_size = count; | 72 | 96 | } | 73 | | | 74 | | // If we have used more buckets than num_buckets, we cannot assign the values to buckets. | 75 | 1.28k | if (used_buckets > num_buckets) { | 76 | 15 | return false; | 77 | 15 | } | 78 | 1.28k | } | 79 | | | 80 | 17 | return true; | 81 | 32 | } |
_ZN5doris23can_assign_into_bucketsIfEEbRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEmm Line | Count | Source | 56 | 31 | const size_t num_buckets) { | 57 | 31 | if (value_map.empty()) { | 58 | 0 | return false; | 59 | 31 | }; | 60 | | | 61 | 31 | size_t used_buckets = 1; | 62 | 31 | size_t current_bucket_size = 0; | 63 | | | 64 | 1.25k | for (const auto& [value, count] : value_map) { | 65 | 1.25k | current_bucket_size += count; | 66 | | | 67 | | // If adding the current value to the current bucket would exceed max_bucket_size, | 68 | | // then we start a new bucket. | 69 | 1.25k | if (current_bucket_size > max_bucket_size) { | 70 | 78 | ++used_buckets; | 71 | 78 | current_bucket_size = count; | 72 | 78 | } | 73 | | | 74 | | // If we have used more buckets than num_buckets, we cannot assign the values to buckets. | 75 | 1.25k | if (used_buckets > num_buckets) { | 76 | 15 | return false; | 77 | 15 | } | 78 | 1.25k | } | 79 | | | 80 | 16 | return true; | 81 | 31 | } |
_ZN5doris23can_assign_into_bucketsIdEEbRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEmm Line | Count | Source | 56 | 31 | const size_t num_buckets) { | 57 | 31 | if (value_map.empty()) { | 58 | 0 | return false; | 59 | 31 | }; | 60 | | | 61 | 31 | size_t used_buckets = 1; | 62 | 31 | size_t current_bucket_size = 0; | 63 | | | 64 | 1.25k | for (const auto& [value, count] : value_map) { | 65 | 1.25k | current_bucket_size += count; | 66 | | | 67 | | // If adding the current value to the current bucket would exceed max_bucket_size, | 68 | | // then we start a new bucket. | 69 | 1.25k | if (current_bucket_size > max_bucket_size) { | 70 | 78 | ++used_buckets; | 71 | 78 | current_bucket_size = count; | 72 | 78 | } | 73 | | | 74 | | // If we have used more buckets than num_buckets, we cannot assign the values to buckets. | 75 | 1.25k | if (used_buckets > num_buckets) { | 76 | 15 | return false; | 77 | 15 | } | 78 | 1.25k | } | 79 | | | 80 | 16 | return true; | 81 | 31 | } |
_ZN5doris23can_assign_into_bucketsINS_7DecimalIiEEEEbRKSt3mapIT_mSt4lessIS4_ESaISt4pairIKS4_mEEEmm Line | Count | Source | 56 | 39 | const size_t num_buckets) { | 57 | 39 | if (value_map.empty()) { | 58 | 0 | return false; | 59 | 39 | }; | 60 | | | 61 | 39 | size_t used_buckets = 1; | 62 | 39 | size_t current_bucket_size = 0; | 63 | | | 64 | 161 | for (const auto& [value, count] : value_map) { | 65 | 161 | current_bucket_size += count; | 66 | | | 67 | | // If adding the current value to the current bucket would exceed max_bucket_size, | 68 | | // then we start a new bucket. | 69 | 161 | if (current_bucket_size > max_bucket_size) { | 70 | 54 | ++used_buckets; | 71 | 54 | current_bucket_size = count; | 72 | 54 | } | 73 | | | 74 | | // If we have used more buckets than num_buckets, we cannot assign the values to buckets. | 75 | 161 | if (used_buckets > num_buckets) { | 76 | 17 | return false; | 77 | 17 | } | 78 | 161 | } | 79 | | | 80 | 22 | return true; | 81 | 39 | } |
Unexecuted instantiation: _ZN5doris23can_assign_into_bucketsINS_7DecimalIlEEEEbRKSt3mapIT_mSt4lessIS4_ESaISt4pairIKS4_mEEEmm Unexecuted instantiation: _ZN5doris23can_assign_into_bucketsINS_12Decimal128V3EEEbRKSt3mapIT_mSt4lessIS3_ESaISt4pairIKS3_mEEEmm Unexecuted instantiation: _ZN5doris23can_assign_into_bucketsINS_7DecimalIN4wide7integerILm256EiEEEEEEbRKSt3mapIT_mSt4lessIS7_ESaISt4pairIKS7_mEEEmm _ZN5doris23can_assign_into_bucketsINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEEbRKSt3mapIT_mSt4lessIS8_ESaISt4pairIKS8_mEEEmm Line | Count | Source | 56 | 100 | const size_t num_buckets) { | 57 | 100 | if (value_map.empty()) { | 58 | 0 | return false; | 59 | 100 | }; | 60 | | | 61 | 100 | size_t used_buckets = 1; | 62 | 100 | size_t current_bucket_size = 0; | 63 | | | 64 | 18.3k | for (const auto& [value, count] : value_map) { | 65 | 18.3k | current_bucket_size += count; | 66 | | | 67 | | // If adding the current value to the current bucket would exceed max_bucket_size, | 68 | | // then we start a new bucket. | 69 | 18.3k | if (current_bucket_size > max_bucket_size) { | 70 | 197 | ++used_buckets; | 71 | 197 | current_bucket_size = count; | 72 | 197 | } | 73 | | | 74 | | // If we have used more buckets than num_buckets, we cannot assign the values to buckets. | 75 | 18.3k | if (used_buckets > num_buckets) { | 76 | 38 | return false; | 77 | 38 | } | 78 | 18.3k | } | 79 | | | 80 | 62 | return true; | 81 | 100 | } |
_ZN5doris23can_assign_into_bucketsINS_11DateV2ValueINS_15DateV2ValueTypeEEEEEbRKSt3mapIT_mSt4lessIS5_ESaISt4pairIKS5_mEEEmm Line | Count | Source | 56 | 51 | const size_t num_buckets) { | 57 | 51 | if (value_map.empty()) { | 58 | 0 | return false; | 59 | 51 | }; | 60 | | | 61 | 51 | size_t used_buckets = 1; | 62 | 51 | size_t current_bucket_size = 0; | 63 | | | 64 | 1.31k | for (const auto& [value, count] : value_map) { | 65 | 1.31k | current_bucket_size += count; | 66 | | | 67 | | // If adding the current value to the current bucket would exceed max_bucket_size, | 68 | | // then we start a new bucket. | 69 | 1.31k | if (current_bucket_size > max_bucket_size) { | 70 | 114 | ++used_buckets; | 71 | 114 | current_bucket_size = count; | 72 | 114 | } | 73 | | | 74 | | // If we have used more buckets than num_buckets, we cannot assign the values to buckets. | 75 | 1.31k | if (used_buckets > num_buckets) { | 76 | 23 | return false; | 77 | 23 | } | 78 | 1.31k | } | 79 | | | 80 | 28 | return true; | 81 | 51 | } |
_ZN5doris23can_assign_into_bucketsINS_11DateV2ValueINS_19DateTimeV2ValueTypeEEEEEbRKSt3mapIT_mSt4lessIS5_ESaISt4pairIKS5_mEEEmm Line | Count | Source | 56 | 51 | const size_t num_buckets) { | 57 | 51 | if (value_map.empty()) { | 58 | 0 | return false; | 59 | 51 | }; | 60 | | | 61 | 51 | size_t used_buckets = 1; | 62 | 51 | size_t current_bucket_size = 0; | 63 | | | 64 | 1.31k | for (const auto& [value, count] : value_map) { | 65 | 1.31k | current_bucket_size += count; | 66 | | | 67 | | // If adding the current value to the current bucket would exceed max_bucket_size, | 68 | | // then we start a new bucket. | 69 | 1.31k | if (current_bucket_size > max_bucket_size) { | 70 | 115 | ++used_buckets; | 71 | 115 | current_bucket_size = count; | 72 | 115 | } | 73 | | | 74 | | // If we have used more buckets than num_buckets, we cannot assign the values to buckets. | 75 | 1.31k | if (used_buckets > num_buckets) { | 76 | 23 | return false; | 77 | 23 | } | 78 | 1.31k | } | 79 | | | 80 | 28 | return true; | 81 | 51 | } |
|
82 | | |
83 | | /** |
84 | | * Calculates the maximum number of values that can fit into each bucket given a set of values |
85 | | * and the desired number of buckets. |
86 | | * |
87 | | * @tparam T the type of the values in the value map |
88 | | * @param value_map the map of values and their counts |
89 | | * @param num_buckets the desired number of buckets |
90 | | * @return the maximum number of values that can fit into each bucket |
91 | | */ |
92 | | template <typename T> |
93 | 647 | size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) { |
94 | | // Ensure that the value map is not empty |
95 | 647 | assert(!value_map.empty()); |
96 | | |
97 | | // Calculate the total number of values in the map using std::accumulate() |
98 | 647 | size_t total_values = 0; |
99 | 5.76k | for (const auto& [value, count] : value_map) { |
100 | 5.76k | total_values += count; |
101 | 5.76k | } |
102 | | |
103 | | // If there is only one bucket, then all values will be assigned to that bucket |
104 | 647 | if (num_buckets == 1) { |
105 | 4 | return total_values; |
106 | 4 | } |
107 | | |
108 | | // To calculate the maximum value count in each bucket, we first calculate a conservative upper |
109 | | // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed |
110 | | // the actual maximum value count, but it does not underestimate it. The subsequent binary search |
111 | | // algorithm will approach the actual maximum value count. |
112 | 643 | size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1; |
113 | | |
114 | | // Initialize the lower bound to 0 |
115 | 643 | size_t lower_bucket_values = 0; |
116 | | |
117 | | // Perform a binary search to find the maximum number of values that can fit into each bucket |
118 | 643 | int search_step = 0; |
119 | 643 | const int max_search_steps = |
120 | 643 | 10; // Limit the number of search steps to avoid excessive iteration |
121 | | |
122 | 1.13k | while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) { |
123 | | // Calculate the midpoint of the upper and lower bounds |
124 | 491 | const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2; |
125 | | |
126 | | // Check if the given number of values can be assigned to the desired number of buckets |
127 | 491 | if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) { |
128 | | // If it can, then set the upper bound to the midpoint |
129 | 275 | upper_bucket_values = bucket_values; |
130 | 275 | } else { |
131 | | // If it can't, then set the lower bound to the midpoint |
132 | 216 | lower_bucket_values = bucket_values; |
133 | 216 | } |
134 | | // Increment the search step counter |
135 | 491 | ++search_step; |
136 | 491 | } |
137 | | |
138 | 643 | return upper_bucket_values; |
139 | 647 | } _ZN5doris27calculate_bucket_max_valuesIhEEmRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEm Line | Count | Source | 93 | 1 | size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) { | 94 | | // Ensure that the value map is not empty | 95 | 1 | assert(!value_map.empty()); | 96 | | | 97 | | // Calculate the total number of values in the map using std::accumulate() | 98 | 1 | size_t total_values = 0; | 99 | 2 | for (const auto& [value, count] : value_map) { | 100 | 2 | total_values += count; | 101 | 2 | } | 102 | | | 103 | | // If there is only one bucket, then all values will be assigned to that bucket | 104 | 1 | if (num_buckets == 1) { | 105 | 0 | return total_values; | 106 | 0 | } | 107 | | | 108 | | // To calculate the maximum value count in each bucket, we first calculate a conservative upper | 109 | | // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed | 110 | | // the actual maximum value count, but it does not underestimate it. The subsequent binary search | 111 | | // algorithm will approach the actual maximum value count. | 112 | 1 | size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1; | 113 | | | 114 | | // Initialize the lower bound to 0 | 115 | 1 | size_t lower_bucket_values = 0; | 116 | | | 117 | | // Perform a binary search to find the maximum number of values that can fit into each bucket | 118 | 1 | int search_step = 0; | 119 | 1 | const int max_search_steps = | 120 | 1 | 10; // Limit the number of search steps to avoid excessive iteration | 121 | | | 122 | 1 | while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) { | 123 | | // Calculate the midpoint of the upper and lower bounds | 124 | 0 | const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2; | 125 | | | 126 | | // Check if the given number of values can be assigned to the desired number of buckets | 127 | 0 | if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) { | 128 | | // If it can, then set the upper bound to the midpoint | 129 | 0 | upper_bucket_values = bucket_values; | 130 | 0 | } else { | 131 | | // If it can't, then set the lower bound to the midpoint | 132 | 0 | lower_bucket_values = bucket_values; | 133 | 0 | } | 134 | | // Increment the search step counter | 135 | 0 | ++search_step; | 136 | 0 | } | 137 | | | 138 | 1 | return upper_bucket_values; | 139 | 1 | } |
_ZN5doris27calculate_bucket_max_valuesIaEEmRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEm Line | Count | Source | 93 | 42 | size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) { | 94 | | // Ensure that the value map is not empty | 95 | 42 | assert(!value_map.empty()); | 96 | | | 97 | | // Calculate the total number of values in the map using std::accumulate() | 98 | 42 | size_t total_values = 0; | 99 | 340 | for (const auto& [value, count] : value_map) { | 100 | 340 | total_values += count; | 101 | 340 | } | 102 | | | 103 | | // If there is only one bucket, then all values will be assigned to that bucket | 104 | 42 | if (num_buckets == 1) { | 105 | 1 | return total_values; | 106 | 1 | } | 107 | | | 108 | | // To calculate the maximum value count in each bucket, we first calculate a conservative upper | 109 | | // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed | 110 | | // the actual maximum value count, but it does not underestimate it. The subsequent binary search | 111 | | // algorithm will approach the actual maximum value count. | 112 | 41 | size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1; | 113 | | | 114 | | // Initialize the lower bound to 0 | 115 | 41 | size_t lower_bucket_values = 0; | 116 | | | 117 | | // Perform a binary search to find the maximum number of values that can fit into each bucket | 118 | 41 | int search_step = 0; | 119 | 41 | const int max_search_steps = | 120 | 41 | 10; // Limit the number of search steps to avoid excessive iteration | 121 | | | 122 | 68 | while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) { | 123 | | // Calculate the midpoint of the upper and lower bounds | 124 | 27 | const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2; | 125 | | | 126 | | // Check if the given number of values can be assigned to the desired number of buckets | 127 | 27 | if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) { | 128 | | // If it can, then set the upper bound to the midpoint | 129 | 16 | upper_bucket_values = bucket_values; | 130 | 16 | } else { | 131 | | // If it can't, then set the lower bound to the midpoint | 132 | 11 | lower_bucket_values = bucket_values; | 133 | 11 | } | 134 | | // Increment the search step counter | 135 | 27 | ++search_step; | 136 | 27 | } | 137 | | | 138 | 41 | return upper_bucket_values; | 139 | 42 | } |
_ZN5doris27calculate_bucket_max_valuesIsEEmRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEm Line | Count | Source | 93 | 43 | size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) { | 94 | | // Ensure that the value map is not empty | 95 | 43 | assert(!value_map.empty()); | 96 | | | 97 | | // Calculate the total number of values in the map using std::accumulate() | 98 | 43 | size_t total_values = 0; | 99 | 349 | for (const auto& [value, count] : value_map) { | 100 | 349 | total_values += count; | 101 | 349 | } | 102 | | | 103 | | // If there is only one bucket, then all values will be assigned to that bucket | 104 | 43 | if (num_buckets == 1) { | 105 | 0 | return total_values; | 106 | 0 | } | 107 | | | 108 | | // To calculate the maximum value count in each bucket, we first calculate a conservative upper | 109 | | // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed | 110 | | // the actual maximum value count, but it does not underestimate it. The subsequent binary search | 111 | | // algorithm will approach the actual maximum value count. | 112 | 43 | size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1; | 113 | | | 114 | | // Initialize the lower bound to 0 | 115 | 43 | size_t lower_bucket_values = 0; | 116 | | | 117 | | // Perform a binary search to find the maximum number of values that can fit into each bucket | 118 | 43 | int search_step = 0; | 119 | 43 | const int max_search_steps = | 120 | 43 | 10; // Limit the number of search steps to avoid excessive iteration | 121 | | | 122 | 83 | while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) { | 123 | | // Calculate the midpoint of the upper and lower bounds | 124 | 40 | const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2; | 125 | | | 126 | | // Check if the given number of values can be assigned to the desired number of buckets | 127 | 40 | if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) { | 128 | | // If it can, then set the upper bound to the midpoint | 129 | 22 | upper_bucket_values = bucket_values; | 130 | 22 | } else { | 131 | | // If it can't, then set the lower bound to the midpoint | 132 | 18 | lower_bucket_values = bucket_values; | 133 | 18 | } | 134 | | // Increment the search step counter | 135 | 40 | ++search_step; | 136 | 40 | } | 137 | | | 138 | 43 | return upper_bucket_values; | 139 | 43 | } |
_ZN5doris27calculate_bucket_max_valuesIiEEmRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEm Line | Count | Source | 93 | 71 | size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) { | 94 | | // Ensure that the value map is not empty | 95 | 71 | assert(!value_map.empty()); | 96 | | | 97 | | // Calculate the total number of values in the map using std::accumulate() | 98 | 71 | size_t total_values = 0; | 99 | 375 | for (const auto& [value, count] : value_map) { | 100 | 375 | total_values += count; | 101 | 375 | } | 102 | | | 103 | | // If there is only one bucket, then all values will be assigned to that bucket | 104 | 71 | if (num_buckets == 1) { | 105 | 3 | return total_values; | 106 | 3 | } | 107 | | | 108 | | // To calculate the maximum value count in each bucket, we first calculate a conservative upper | 109 | | // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed | 110 | | // the actual maximum value count, but it does not underestimate it. The subsequent binary search | 111 | | // algorithm will approach the actual maximum value count. | 112 | 68 | size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1; | 113 | | | 114 | | // Initialize the lower bound to 0 | 115 | 68 | size_t lower_bucket_values = 0; | 116 | | | 117 | | // Perform a binary search to find the maximum number of values that can fit into each bucket | 118 | 68 | int search_step = 0; | 119 | 68 | const int max_search_steps = | 120 | 68 | 10; // Limit the number of search steps to avoid excessive iteration | 121 | | | 122 | 123 | while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) { | 123 | | // Calculate the midpoint of the upper and lower bounds | 124 | 55 | const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2; | 125 | | | 126 | | // Check if the given number of values can be assigned to the desired number of buckets | 127 | 55 | if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) { | 128 | | // If it can, then set the upper bound to the midpoint | 129 | 31 | upper_bucket_values = bucket_values; | 130 | 31 | } else { | 131 | | // If it can't, then set the lower bound to the midpoint | 132 | 24 | lower_bucket_values = bucket_values; | 133 | 24 | } | 134 | | // Increment the search step counter | 135 | 55 | ++search_step; | 136 | 55 | } | 137 | | | 138 | 68 | return upper_bucket_values; | 139 | 71 | } |
_ZN5doris27calculate_bucket_max_valuesIlEEmRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEm Line | Count | Source | 93 | 59 | size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) { | 94 | | // Ensure that the value map is not empty | 95 | 59 | assert(!value_map.empty()); | 96 | | | 97 | | // Calculate the total number of values in the map using std::accumulate() | 98 | 59 | size_t total_values = 0; | 99 | 418 | for (const auto& [value, count] : value_map) { | 100 | 418 | total_values += count; | 101 | 418 | } | 102 | | | 103 | | // If there is only one bucket, then all values will be assigned to that bucket | 104 | 59 | if (num_buckets == 1) { | 105 | 0 | return total_values; | 106 | 0 | } | 107 | | | 108 | | // To calculate the maximum value count in each bucket, we first calculate a conservative upper | 109 | | // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed | 110 | | // the actual maximum value count, but it does not underestimate it. The subsequent binary search | 111 | | // algorithm will approach the actual maximum value count. | 112 | 59 | size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1; | 113 | | | 114 | | // Initialize the lower bound to 0 | 115 | 59 | size_t lower_bucket_values = 0; | 116 | | | 117 | | // Perform a binary search to find the maximum number of values that can fit into each bucket | 118 | 59 | int search_step = 0; | 119 | 59 | const int max_search_steps = | 120 | 59 | 10; // Limit the number of search steps to avoid excessive iteration | 121 | | | 122 | 93 | while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) { | 123 | | // Calculate the midpoint of the upper and lower bounds | 124 | 34 | const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2; | 125 | | | 126 | | // Check if the given number of values can be assigned to the desired number of buckets | 127 | 34 | if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) { | 128 | | // If it can, then set the upper bound to the midpoint | 129 | 17 | upper_bucket_values = bucket_values; | 130 | 17 | } else { | 131 | | // If it can't, then set the lower bound to the midpoint | 132 | 17 | lower_bucket_values = bucket_values; | 133 | 17 | } | 134 | | // Increment the search step counter | 135 | 34 | ++search_step; | 136 | 34 | } | 137 | | | 138 | 59 | return upper_bucket_values; | 139 | 59 | } |
_ZN5doris27calculate_bucket_max_valuesInEEmRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEm Line | Count | Source | 93 | 42 | size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) { | 94 | | // Ensure that the value map is not empty | 95 | 42 | assert(!value_map.empty()); | 96 | | | 97 | | // Calculate the total number of values in the map using std::accumulate() | 98 | 42 | size_t total_values = 0; | 99 | 343 | for (const auto& [value, count] : value_map) { | 100 | 343 | total_values += count; | 101 | 343 | } | 102 | | | 103 | | // If there is only one bucket, then all values will be assigned to that bucket | 104 | 42 | if (num_buckets == 1) { | 105 | 0 | return total_values; | 106 | 0 | } | 107 | | | 108 | | // To calculate the maximum value count in each bucket, we first calculate a conservative upper | 109 | | // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed | 110 | | // the actual maximum value count, but it does not underestimate it. The subsequent binary search | 111 | | // algorithm will approach the actual maximum value count. | 112 | 42 | size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1; | 113 | | | 114 | | // Initialize the lower bound to 0 | 115 | 42 | size_t lower_bucket_values = 0; | 116 | | | 117 | | // Perform a binary search to find the maximum number of values that can fit into each bucket | 118 | 42 | int search_step = 0; | 119 | 42 | const int max_search_steps = | 120 | 42 | 10; // Limit the number of search steps to avoid excessive iteration | 121 | | | 122 | 74 | while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) { | 123 | | // Calculate the midpoint of the upper and lower bounds | 124 | 32 | const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2; | 125 | | | 126 | | // Check if the given number of values can be assigned to the desired number of buckets | 127 | 32 | if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) { | 128 | | // If it can, then set the upper bound to the midpoint | 129 | 17 | upper_bucket_values = bucket_values; | 130 | 17 | } else { | 131 | | // If it can't, then set the lower bound to the midpoint | 132 | 15 | lower_bucket_values = bucket_values; | 133 | 15 | } | 134 | | // Increment the search step counter | 135 | 32 | ++search_step; | 136 | 32 | } | 137 | | | 138 | 42 | return upper_bucket_values; | 139 | 42 | } |
_ZN5doris27calculate_bucket_max_valuesIfEEmRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEm Line | Count | Source | 93 | 41 | size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) { | 94 | | // Ensure that the value map is not empty | 95 | 41 | assert(!value_map.empty()); | 96 | | | 97 | | // Calculate the total number of values in the map using std::accumulate() | 98 | 41 | size_t total_values = 0; | 99 | 328 | for (const auto& [value, count] : value_map) { | 100 | 328 | total_values += count; | 101 | 328 | } | 102 | | | 103 | | // If there is only one bucket, then all values will be assigned to that bucket | 104 | 41 | if (num_buckets == 1) { | 105 | 0 | return total_values; | 106 | 0 | } | 107 | | | 108 | | // To calculate the maximum value count in each bucket, we first calculate a conservative upper | 109 | | // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed | 110 | | // the actual maximum value count, but it does not underestimate it. The subsequent binary search | 111 | | // algorithm will approach the actual maximum value count. | 112 | 41 | size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1; | 113 | | | 114 | | // Initialize the lower bound to 0 | 115 | 41 | size_t lower_bucket_values = 0; | 116 | | | 117 | | // Perform a binary search to find the maximum number of values that can fit into each bucket | 118 | 41 | int search_step = 0; | 119 | 41 | const int max_search_steps = | 120 | 41 | 10; // Limit the number of search steps to avoid excessive iteration | 121 | | | 122 | 72 | while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) { | 123 | | // Calculate the midpoint of the upper and lower bounds | 124 | 31 | const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2; | 125 | | | 126 | | // Check if the given number of values can be assigned to the desired number of buckets | 127 | 31 | if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) { | 128 | | // If it can, then set the upper bound to the midpoint | 129 | 16 | upper_bucket_values = bucket_values; | 130 | 16 | } else { | 131 | | // If it can't, then set the lower bound to the midpoint | 132 | 15 | lower_bucket_values = bucket_values; | 133 | 15 | } | 134 | | // Increment the search step counter | 135 | 31 | ++search_step; | 136 | 31 | } | 137 | | | 138 | 41 | return upper_bucket_values; | 139 | 41 | } |
_ZN5doris27calculate_bucket_max_valuesIdEEmRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEm Line | Count | Source | 93 | 41 | size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) { | 94 | | // Ensure that the value map is not empty | 95 | 41 | assert(!value_map.empty()); | 96 | | | 97 | | // Calculate the total number of values in the map using std::accumulate() | 98 | 41 | size_t total_values = 0; | 99 | 328 | for (const auto& [value, count] : value_map) { | 100 | 328 | total_values += count; | 101 | 328 | } | 102 | | | 103 | | // If there is only one bucket, then all values will be assigned to that bucket | 104 | 41 | if (num_buckets == 1) { | 105 | 0 | return total_values; | 106 | 0 | } | 107 | | | 108 | | // To calculate the maximum value count in each bucket, we first calculate a conservative upper | 109 | | // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed | 110 | | // the actual maximum value count, but it does not underestimate it. The subsequent binary search | 111 | | // algorithm will approach the actual maximum value count. | 112 | 41 | size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1; | 113 | | | 114 | | // Initialize the lower bound to 0 | 115 | 41 | size_t lower_bucket_values = 0; | 116 | | | 117 | | // Perform a binary search to find the maximum number of values that can fit into each bucket | 118 | 41 | int search_step = 0; | 119 | 41 | const int max_search_steps = | 120 | 41 | 10; // Limit the number of search steps to avoid excessive iteration | 121 | | | 122 | 72 | while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) { | 123 | | // Calculate the midpoint of the upper and lower bounds | 124 | 31 | const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2; | 125 | | | 126 | | // Check if the given number of values can be assigned to the desired number of buckets | 127 | 31 | if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) { | 128 | | // If it can, then set the upper bound to the midpoint | 129 | 16 | upper_bucket_values = bucket_values; | 130 | 16 | } else { | 131 | | // If it can't, then set the lower bound to the midpoint | 132 | 15 | lower_bucket_values = bucket_values; | 133 | 15 | } | 134 | | // Increment the search step counter | 135 | 31 | ++search_step; | 136 | 31 | } | 137 | | | 138 | 41 | return upper_bucket_values; | 139 | 41 | } |
_ZN5doris27calculate_bucket_max_valuesINS_7DecimalIiEEEEmRKSt3mapIT_mSt4lessIS4_ESaISt4pairIKS4_mEEEm Line | Count | Source | 93 | 45 | size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) { | 94 | | // Ensure that the value map is not empty | 95 | 45 | assert(!value_map.empty()); | 96 | | | 97 | | // Calculate the total number of values in the map using std::accumulate() | 98 | 45 | size_t total_values = 0; | 99 | 161 | for (const auto& [value, count] : value_map) { | 100 | 161 | total_values += count; | 101 | 161 | } | 102 | | | 103 | | // If there is only one bucket, then all values will be assigned to that bucket | 104 | 45 | if (num_buckets == 1) { | 105 | 0 | return total_values; | 106 | 0 | } | 107 | | | 108 | | // To calculate the maximum value count in each bucket, we first calculate a conservative upper | 109 | | // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed | 110 | | // the actual maximum value count, but it does not underestimate it. The subsequent binary search | 111 | | // algorithm will approach the actual maximum value count. | 112 | 45 | size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1; | 113 | | | 114 | | // Initialize the lower bound to 0 | 115 | 45 | size_t lower_bucket_values = 0; | 116 | | | 117 | | // Perform a binary search to find the maximum number of values that can fit into each bucket | 118 | 45 | int search_step = 0; | 119 | 45 | const int max_search_steps = | 120 | 45 | 10; // Limit the number of search steps to avoid excessive iteration | 121 | | | 122 | 84 | while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) { | 123 | | // Calculate the midpoint of the upper and lower bounds | 124 | 39 | const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2; | 125 | | | 126 | | // Check if the given number of values can be assigned to the desired number of buckets | 127 | 39 | if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) { | 128 | | // If it can, then set the upper bound to the midpoint | 129 | 22 | upper_bucket_values = bucket_values; | 130 | 22 | } else { | 131 | | // If it can't, then set the lower bound to the midpoint | 132 | 17 | lower_bucket_values = bucket_values; | 133 | 17 | } | 134 | | // Increment the search step counter | 135 | 39 | ++search_step; | 136 | 39 | } | 137 | | | 138 | 45 | return upper_bucket_values; | 139 | 45 | } |
Unexecuted instantiation: _ZN5doris27calculate_bucket_max_valuesINS_7DecimalIlEEEEmRKSt3mapIT_mSt4lessIS4_ESaISt4pairIKS4_mEEEm Unexecuted instantiation: _ZN5doris27calculate_bucket_max_valuesINS_12Decimal128V3EEEmRKSt3mapIT_mSt4lessIS3_ESaISt4pairIKS3_mEEEm Unexecuted instantiation: _ZN5doris27calculate_bucket_max_valuesINS_7DecimalIN4wide7integerILm256EiEEEEEEmRKSt3mapIT_mSt4lessIS7_ESaISt4pairIKS7_mEEEm _ZN5doris27calculate_bucket_max_valuesINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEEmRKSt3mapIT_mSt4lessIS8_ESaISt4pairIKS8_mEEEm Line | Count | Source | 93 | 100 | size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) { | 94 | | // Ensure that the value map is not empty | 95 | 100 | assert(!value_map.empty()); | 96 | | | 97 | | // Calculate the total number of values in the map using std::accumulate() | 98 | 100 | size_t total_values = 0; | 99 | 2.20k | for (const auto& [value, count] : value_map) { | 100 | 2.20k | total_values += count; | 101 | 2.20k | } | 102 | | | 103 | | // If there is only one bucket, then all values will be assigned to that bucket | 104 | 100 | if (num_buckets == 1) { | 105 | 0 | return total_values; | 106 | 0 | } | 107 | | | 108 | | // To calculate the maximum value count in each bucket, we first calculate a conservative upper | 109 | | // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed | 110 | | // the actual maximum value count, but it does not underestimate it. The subsequent binary search | 111 | | // algorithm will approach the actual maximum value count. | 112 | 100 | size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1; | 113 | | | 114 | | // Initialize the lower bound to 0 | 115 | 100 | size_t lower_bucket_values = 0; | 116 | | | 117 | | // Perform a binary search to find the maximum number of values that can fit into each bucket | 118 | 100 | int search_step = 0; | 119 | 100 | const int max_search_steps = | 120 | 100 | 10; // Limit the number of search steps to avoid excessive iteration | 121 | | | 122 | 200 | while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) { | 123 | | // Calculate the midpoint of the upper and lower bounds | 124 | 100 | const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2; | 125 | | | 126 | | // Check if the given number of values can be assigned to the desired number of buckets | 127 | 100 | if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) { | 128 | | // If it can, then set the upper bound to the midpoint | 129 | 62 | upper_bucket_values = bucket_values; | 130 | 62 | } else { | 131 | | // If it can't, then set the lower bound to the midpoint | 132 | 38 | lower_bucket_values = bucket_values; | 133 | 38 | } | 134 | | // Increment the search step counter | 135 | 100 | ++search_step; | 136 | 100 | } | 137 | | | 138 | 100 | return upper_bucket_values; | 139 | 100 | } |
_ZN5doris27calculate_bucket_max_valuesINS_11DateV2ValueINS_15DateV2ValueTypeEEEEEmRKSt3mapIT_mSt4lessIS5_ESaISt4pairIKS5_mEEEm Line | Count | Source | 93 | 81 | size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) { | 94 | | // Ensure that the value map is not empty | 95 | 81 | assert(!value_map.empty()); | 96 | | | 97 | | // Calculate the total number of values in the map using std::accumulate() | 98 | 81 | size_t total_values = 0; | 99 | 458 | for (const auto& [value, count] : value_map) { | 100 | 458 | total_values += count; | 101 | 458 | } | 102 | | | 103 | | // If there is only one bucket, then all values will be assigned to that bucket | 104 | 81 | if (num_buckets == 1) { | 105 | 0 | return total_values; | 106 | 0 | } | 107 | | | 108 | | // To calculate the maximum value count in each bucket, we first calculate a conservative upper | 109 | | // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed | 110 | | // the actual maximum value count, but it does not underestimate it. The subsequent binary search | 111 | | // algorithm will approach the actual maximum value count. | 112 | 81 | size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1; | 113 | | | 114 | | // Initialize the lower bound to 0 | 115 | 81 | size_t lower_bucket_values = 0; | 116 | | | 117 | | // Perform a binary search to find the maximum number of values that can fit into each bucket | 118 | 81 | int search_step = 0; | 119 | 81 | const int max_search_steps = | 120 | 81 | 10; // Limit the number of search steps to avoid excessive iteration | 121 | | | 122 | 132 | while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) { | 123 | | // Calculate the midpoint of the upper and lower bounds | 124 | 51 | const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2; | 125 | | | 126 | | // Check if the given number of values can be assigned to the desired number of buckets | 127 | 51 | if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) { | 128 | | // If it can, then set the upper bound to the midpoint | 129 | 28 | upper_bucket_values = bucket_values; | 130 | 28 | } else { | 131 | | // If it can't, then set the lower bound to the midpoint | 132 | 23 | lower_bucket_values = bucket_values; | 133 | 23 | } | 134 | | // Increment the search step counter | 135 | 51 | ++search_step; | 136 | 51 | } | 137 | | | 138 | 81 | return upper_bucket_values; | 139 | 81 | } |
_ZN5doris27calculate_bucket_max_valuesINS_11DateV2ValueINS_19DateTimeV2ValueTypeEEEEEmRKSt3mapIT_mSt4lessIS5_ESaISt4pairIKS5_mEEEm Line | Count | Source | 93 | 81 | size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) { | 94 | | // Ensure that the value map is not empty | 95 | 81 | assert(!value_map.empty()); | 96 | | | 97 | | // Calculate the total number of values in the map using std::accumulate() | 98 | 81 | size_t total_values = 0; | 99 | 457 | for (const auto& [value, count] : value_map) { | 100 | 457 | total_values += count; | 101 | 457 | } | 102 | | | 103 | | // If there is only one bucket, then all values will be assigned to that bucket | 104 | 81 | if (num_buckets == 1) { | 105 | 0 | return total_values; | 106 | 0 | } | 107 | | | 108 | | // To calculate the maximum value count in each bucket, we first calculate a conservative upper | 109 | | // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed | 110 | | // the actual maximum value count, but it does not underestimate it. The subsequent binary search | 111 | | // algorithm will approach the actual maximum value count. | 112 | 81 | size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1; | 113 | | | 114 | | // Initialize the lower bound to 0 | 115 | 81 | size_t lower_bucket_values = 0; | 116 | | | 117 | | // Perform a binary search to find the maximum number of values that can fit into each bucket | 118 | 81 | int search_step = 0; | 119 | 81 | const int max_search_steps = | 120 | 81 | 10; // Limit the number of search steps to avoid excessive iteration | 121 | | | 122 | 132 | while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) { | 123 | | // Calculate the midpoint of the upper and lower bounds | 124 | 51 | const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2; | 125 | | | 126 | | // Check if the given number of values can be assigned to the desired number of buckets | 127 | 51 | if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) { | 128 | | // If it can, then set the upper bound to the midpoint | 129 | 28 | upper_bucket_values = bucket_values; | 130 | 28 | } else { | 131 | | // If it can't, then set the lower bound to the midpoint | 132 | 23 | lower_bucket_values = bucket_values; | 133 | 23 | } | 134 | | // Increment the search step counter | 135 | 51 | ++search_step; | 136 | 51 | } | 137 | | | 138 | 81 | return upper_bucket_values; | 139 | 81 | } |
|
140 | | |
141 | | /** |
142 | | * Greedy equi-height histogram construction algorithm, inspired by the MySQL |
143 | | * equi_height implementation(https://dev.mysql.com/doc/dev/mysql-server/latest/equi__height_8h.html). |
144 | | * |
145 | | * Given an ordered collection of [value, count] pairs and a maximum bucket |
146 | | * size, construct a histogram by inserting values into a bucket while keeping |
147 | | * track of its size. If the insertion of a value into a non-empty bucket |
148 | | * causes the bucket to exceed the maximum size, create a new empty bucket and |
149 | | * continue. |
150 | | * |
151 | | * The algorithm guarantees a selectivity estimation error of at most ~2 * |
152 | | * #values / #buckets, often less. Values with a higher relative frequency are |
153 | | * guaranteed to be placed in singleton buckets. |
154 | | * |
155 | | * The minimum composite bucket size is used to minimize the worst case |
156 | | * selectivity estimation error. In general, the algorithm will adapt to the |
157 | | * data distribution to minimize the size of composite buckets. The heavy values |
158 | | * can be placed in singleton buckets and the remaining values will be evenly |
159 | | * spread across the remaining buckets, leading to a lower composite bucket size. |
160 | | * |
161 | | * Note: The term "value" refers to an entry in a column and the actual value |
162 | | * of an entry. The ordered_map is an ordered collection of [distinct value, |
163 | | * value count] pairs. For example, a Value_map<String> could contain the pairs ["a", 1], ["b", 2] |
164 | | * to represent one "a" value and two "b" values. |
165 | | * |
166 | | * @param buckets A vector of empty buckets that will be populated with data. |
167 | | * @param ordered_map An ordered map of distinct values and their counts. |
168 | | * @param max_num_buckets The maximum number of buckets that can be used. |
169 | | * |
170 | | * @return True if the buckets were successfully built, false otherwise. |
171 | | */ |
172 | | template <typename T> |
173 | | bool build_histogram(std::vector<Bucket<T>>& buckets, const std::map<T, size_t>& ordered_map, |
174 | 707 | const size_t max_num_buckets) { |
175 | | // If the input map is empty, there is nothing to build. |
176 | 707 | if (ordered_map.empty()) { |
177 | 66 | return false; |
178 | 66 | } |
179 | | |
180 | | // Calculate the maximum number of values that can be assigned to each bucket. |
181 | 641 | auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets); |
182 | | |
183 | | // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional |
184 | | // allocations when inserting buckets. |
185 | 641 | buckets.clear(); |
186 | 641 | buckets.reserve(max_num_buckets); |
187 | | |
188 | | // Initialize bucket variables. |
189 | 641 | size_t distinct_values_count = 0; |
190 | 641 | size_t values_count = 0; |
191 | 641 | size_t cumulative_values = 0; |
192 | | |
193 | | // Record how many values still need to be assigned. |
194 | 641 | auto remaining_distinct_values = ordered_map.size(); |
195 | | |
196 | 641 | auto it = ordered_map.begin(); |
197 | | |
198 | | // Lower value of the current bucket. |
199 | 641 | const T* lower_value = &it->first; |
200 | | |
201 | | // Iterate over the ordered map of distinct values and their counts. |
202 | 6.39k | for (; it != ordered_map.end(); ++it) { |
203 | 5.74k | const auto count = it->second; |
204 | 5.74k | const auto current_value = it->first; |
205 | | |
206 | | // Update the bucket counts and track the number of distinct values assigned. |
207 | 5.74k | distinct_values_count++; |
208 | 5.74k | remaining_distinct_values--; |
209 | 5.74k | values_count += count; |
210 | 5.74k | cumulative_values += count; |
211 | | |
212 | | // Check whether the current value should be added to the current bucket. |
213 | 5.74k | auto next = std::next(it); |
214 | 5.74k | size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1; |
215 | | |
216 | 5.74k | if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets && |
217 | 5.74k | values_count + next->second <= bucket_max_values) { |
218 | | // If the current value is the last in the input map and there are more remaining |
219 | | // distinct values than empty buckets and adding the value does not cause the bucket |
220 | | // to exceed its max size, skip adding the value to the current bucket. |
221 | 3.89k | continue; |
222 | 3.89k | } |
223 | | |
224 | | // Finalize the current bucket and add it to our collection of buckets. |
225 | 1.85k | auto pre_sum = cumulative_values - values_count; |
226 | | |
227 | 1.85k | Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count, |
228 | 1.85k | pre_sum); |
229 | 1.85k | buckets.push_back(new_bucket); |
230 | | |
231 | | // Reset variables for the next bucket. |
232 | 1.85k | if (next != ordered_map.end()) { |
233 | 1.21k | lower_value = &next->first; |
234 | 1.21k | } |
235 | 1.85k | values_count = 0; |
236 | 1.85k | distinct_values_count = 0; |
237 | 1.85k | } |
238 | | |
239 | 641 | return true; |
240 | 707 | } _ZN5doris15build_histogramIhEEbRSt6vectorINS_6BucketIT_EESaIS4_EERKSt3mapIS3_mSt4lessIS3_ESaISt4pairIKS3_mEEEm Line | Count | Source | 174 | 2 | const size_t max_num_buckets) { | 175 | | // If the input map is empty, there is nothing to build. | 176 | 2 | if (ordered_map.empty()) { | 177 | 1 | return false; | 178 | 1 | } | 179 | | | 180 | | // Calculate the maximum number of values that can be assigned to each bucket. | 181 | 1 | auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets); | 182 | | | 183 | | // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional | 184 | | // allocations when inserting buckets. | 185 | 1 | buckets.clear(); | 186 | 1 | buckets.reserve(max_num_buckets); | 187 | | | 188 | | // Initialize bucket variables. | 189 | 1 | size_t distinct_values_count = 0; | 190 | 1 | size_t values_count = 0; | 191 | 1 | size_t cumulative_values = 0; | 192 | | | 193 | | // Record how many values still need to be assigned. | 194 | 1 | auto remaining_distinct_values = ordered_map.size(); | 195 | | | 196 | 1 | auto it = ordered_map.begin(); | 197 | | | 198 | | // Lower value of the current bucket. | 199 | 1 | const T* lower_value = &it->first; | 200 | | | 201 | | // Iterate over the ordered map of distinct values and their counts. | 202 | 3 | for (; it != ordered_map.end(); ++it) { | 203 | 2 | const auto count = it->second; | 204 | 2 | const auto current_value = it->first; | 205 | | | 206 | | // Update the bucket counts and track the number of distinct values assigned. | 207 | 2 | distinct_values_count++; | 208 | 2 | remaining_distinct_values--; | 209 | 2 | values_count += count; | 210 | 2 | cumulative_values += count; | 211 | | | 212 | | // Check whether the current value should be added to the current bucket. | 213 | 2 | auto next = std::next(it); | 214 | 2 | size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1; | 215 | | | 216 | 2 | if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets && | 217 | 2 | values_count + next->second <= bucket_max_values) { | 218 | | // If the current value is the last in the input map and there are more remaining | 219 | | // distinct values than empty buckets and adding the value does not cause the bucket | 220 | | // to exceed its max size, skip adding the value to the current bucket. | 221 | 0 | continue; | 222 | 0 | } | 223 | | | 224 | | // Finalize the current bucket and add it to our collection of buckets. | 225 | 2 | auto pre_sum = cumulative_values - values_count; | 226 | | | 227 | 2 | Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count, | 228 | 2 | pre_sum); | 229 | 2 | buckets.push_back(new_bucket); | 230 | | | 231 | | // Reset variables for the next bucket. | 232 | 2 | if (next != ordered_map.end()) { | 233 | 1 | lower_value = &next->first; | 234 | 1 | } | 235 | 2 | values_count = 0; | 236 | 2 | distinct_values_count = 0; | 237 | 2 | } | 238 | | | 239 | 1 | return true; | 240 | 2 | } |
_ZN5doris15build_histogramIaEEbRSt6vectorINS_6BucketIT_EESaIS4_EERKSt3mapIS3_mSt4lessIS3_ESaISt4pairIKS3_mEEEm Line | Count | Source | 174 | 48 | const size_t max_num_buckets) { | 175 | | // If the input map is empty, there is nothing to build. | 176 | 48 | if (ordered_map.empty()) { | 177 | 6 | return false; | 178 | 6 | } | 179 | | | 180 | | // Calculate the maximum number of values that can be assigned to each bucket. | 181 | 42 | auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets); | 182 | | | 183 | | // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional | 184 | | // allocations when inserting buckets. | 185 | 42 | buckets.clear(); | 186 | 42 | buckets.reserve(max_num_buckets); | 187 | | | 188 | | // Initialize bucket variables. | 189 | 42 | size_t distinct_values_count = 0; | 190 | 42 | size_t values_count = 0; | 191 | 42 | size_t cumulative_values = 0; | 192 | | | 193 | | // Record how many values still need to be assigned. | 194 | 42 | auto remaining_distinct_values = ordered_map.size(); | 195 | | | 196 | 42 | auto it = ordered_map.begin(); | 197 | | | 198 | | // Lower value of the current bucket. | 199 | 42 | const T* lower_value = &it->first; | 200 | | | 201 | | // Iterate over the ordered map of distinct values and their counts. | 202 | 382 | for (; it != ordered_map.end(); ++it) { | 203 | 340 | const auto count = it->second; | 204 | 340 | const auto current_value = it->first; | 205 | | | 206 | | // Update the bucket counts and track the number of distinct values assigned. | 207 | 340 | distinct_values_count++; | 208 | 340 | remaining_distinct_values--; | 209 | 340 | values_count += count; | 210 | 340 | cumulative_values += count; | 211 | | | 212 | | // Check whether the current value should be added to the current bucket. | 213 | 340 | auto next = std::next(it); | 214 | 340 | size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1; | 215 | | | 216 | 340 | if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets && | 217 | 340 | values_count + next->second <= bucket_max_values) { | 218 | | // If the current value is the last in the input map and there are more remaining | 219 | | // distinct values than empty buckets and adding the value does not cause the bucket | 220 | | // to exceed its max size, skip adding the value to the current bucket. | 221 | 211 | continue; | 222 | 211 | } | 223 | | | 224 | | // Finalize the current bucket and add it to our collection of buckets. | 225 | 129 | auto pre_sum = cumulative_values - values_count; | 226 | | | 227 | 129 | Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count, | 228 | 129 | pre_sum); | 229 | 129 | buckets.push_back(new_bucket); | 230 | | | 231 | | // Reset variables for the next bucket. | 232 | 129 | if (next != ordered_map.end()) { | 233 | 87 | lower_value = &next->first; | 234 | 87 | } | 235 | 129 | values_count = 0; | 236 | 129 | distinct_values_count = 0; | 237 | 129 | } | 238 | | | 239 | 42 | return true; | 240 | 48 | } |
_ZN5doris15build_histogramIsEEbRSt6vectorINS_6BucketIT_EESaIS4_EERKSt3mapIS3_mSt4lessIS3_ESaISt4pairIKS3_mEEEm Line | Count | Source | 174 | 48 | const size_t max_num_buckets) { | 175 | | // If the input map is empty, there is nothing to build. | 176 | 48 | if (ordered_map.empty()) { | 177 | 5 | return false; | 178 | 5 | } | 179 | | | 180 | | // Calculate the maximum number of values that can be assigned to each bucket. | 181 | 43 | auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets); | 182 | | | 183 | | // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional | 184 | | // allocations when inserting buckets. | 185 | 43 | buckets.clear(); | 186 | 43 | buckets.reserve(max_num_buckets); | 187 | | | 188 | | // Initialize bucket variables. | 189 | 43 | size_t distinct_values_count = 0; | 190 | 43 | size_t values_count = 0; | 191 | 43 | size_t cumulative_values = 0; | 192 | | | 193 | | // Record how many values still need to be assigned. | 194 | 43 | auto remaining_distinct_values = ordered_map.size(); | 195 | | | 196 | 43 | auto it = ordered_map.begin(); | 197 | | | 198 | | // Lower value of the current bucket. | 199 | 43 | const T* lower_value = &it->first; | 200 | | | 201 | | // Iterate over the ordered map of distinct values and their counts. | 202 | 392 | for (; it != ordered_map.end(); ++it) { | 203 | 349 | const auto count = it->second; | 204 | 349 | const auto current_value = it->first; | 205 | | | 206 | | // Update the bucket counts and track the number of distinct values assigned. | 207 | 349 | distinct_values_count++; | 208 | 349 | remaining_distinct_values--; | 209 | 349 | values_count += count; | 210 | 349 | cumulative_values += count; | 211 | | | 212 | | // Check whether the current value should be added to the current bucket. | 213 | 349 | auto next = std::next(it); | 214 | 349 | size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1; | 215 | | | 216 | 349 | if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets && | 217 | 349 | values_count + next->second <= bucket_max_values) { | 218 | | // If the current value is the last in the input map and there are more remaining | 219 | | // distinct values than empty buckets and adding the value does not cause the bucket | 220 | | // to exceed its max size, skip adding the value to the current bucket. | 221 | 217 | continue; | 222 | 217 | } | 223 | | | 224 | | // Finalize the current bucket and add it to our collection of buckets. | 225 | 132 | auto pre_sum = cumulative_values - values_count; | 226 | | | 227 | 132 | Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count, | 228 | 132 | pre_sum); | 229 | 132 | buckets.push_back(new_bucket); | 230 | | | 231 | | // Reset variables for the next bucket. | 232 | 132 | if (next != ordered_map.end()) { | 233 | 89 | lower_value = &next->first; | 234 | 89 | } | 235 | 132 | values_count = 0; | 236 | 132 | distinct_values_count = 0; | 237 | 132 | } | 238 | | | 239 | 43 | return true; | 240 | 48 | } |
_ZN5doris15build_histogramIiEEbRSt6vectorINS_6BucketIT_EESaIS4_EERKSt3mapIS3_mSt4lessIS3_ESaISt4pairIKS3_mEEEm Line | Count | Source | 174 | 74 | const size_t max_num_buckets) { | 175 | | // If the input map is empty, there is nothing to build. | 176 | 74 | if (ordered_map.empty()) { | 177 | 9 | return false; | 178 | 9 | } | 179 | | | 180 | | // Calculate the maximum number of values that can be assigned to each bucket. | 181 | 65 | auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets); | 182 | | | 183 | | // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional | 184 | | // allocations when inserting buckets. | 185 | 65 | buckets.clear(); | 186 | 65 | buckets.reserve(max_num_buckets); | 187 | | | 188 | | // Initialize bucket variables. | 189 | 65 | size_t distinct_values_count = 0; | 190 | 65 | size_t values_count = 0; | 191 | 65 | size_t cumulative_values = 0; | 192 | | | 193 | | // Record how many values still need to be assigned. | 194 | 65 | auto remaining_distinct_values = ordered_map.size(); | 195 | | | 196 | 65 | auto it = ordered_map.begin(); | 197 | | | 198 | | // Lower value of the current bucket. | 199 | 65 | const T* lower_value = &it->first; | 200 | | | 201 | | // Iterate over the ordered map of distinct values and their counts. | 202 | 428 | for (; it != ordered_map.end(); ++it) { | 203 | 363 | const auto count = it->second; | 204 | 363 | const auto current_value = it->first; | 205 | | | 206 | | // Update the bucket counts and track the number of distinct values assigned. | 207 | 363 | distinct_values_count++; | 208 | 363 | remaining_distinct_values--; | 209 | 363 | values_count += count; | 210 | 363 | cumulative_values += count; | 211 | | | 212 | | // Check whether the current value should be added to the current bucket. | 213 | 363 | auto next = std::next(it); | 214 | 363 | size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1; | 215 | | | 216 | 363 | if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets && | 217 | 363 | values_count + next->second <= bucket_max_values) { | 218 | | // If the current value is the last in the input map and there are more remaining | 219 | | // distinct values than empty buckets and adding the value does not cause the bucket | 220 | | // to exceed its max size, skip adding the value to the current bucket. | 221 | 202 | continue; | 222 | 202 | } | 223 | | | 224 | | // Finalize the current bucket and add it to our collection of buckets. | 225 | 161 | auto pre_sum = cumulative_values - values_count; | 226 | | | 227 | 161 | Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count, | 228 | 161 | pre_sum); | 229 | 161 | buckets.push_back(new_bucket); | 230 | | | 231 | | // Reset variables for the next bucket. | 232 | 161 | if (next != ordered_map.end()) { | 233 | 96 | lower_value = &next->first; | 234 | 96 | } | 235 | 161 | values_count = 0; | 236 | 161 | distinct_values_count = 0; | 237 | 161 | } | 238 | | | 239 | 65 | return true; | 240 | 74 | } |
_ZN5doris15build_histogramIlEEbRSt6vectorINS_6BucketIT_EESaIS4_EERKSt3mapIS3_mSt4lessIS3_ESaISt4pairIKS3_mEEEm Line | Count | Source | 174 | 65 | const size_t max_num_buckets) { | 175 | | // If the input map is empty, there is nothing to build. | 176 | 65 | if (ordered_map.empty()) { | 177 | 6 | return false; | 178 | 6 | } | 179 | | | 180 | | // Calculate the maximum number of values that can be assigned to each bucket. | 181 | 59 | auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets); | 182 | | | 183 | | // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional | 184 | | // allocations when inserting buckets. | 185 | 59 | buckets.clear(); | 186 | 59 | buckets.reserve(max_num_buckets); | 187 | | | 188 | | // Initialize bucket variables. | 189 | 59 | size_t distinct_values_count = 0; | 190 | 59 | size_t values_count = 0; | 191 | 59 | size_t cumulative_values = 0; | 192 | | | 193 | | // Record how many values still need to be assigned. | 194 | 59 | auto remaining_distinct_values = ordered_map.size(); | 195 | | | 196 | 59 | auto it = ordered_map.begin(); | 197 | | | 198 | | // Lower value of the current bucket. | 199 | 59 | const T* lower_value = &it->first; | 200 | | | 201 | | // Iterate over the ordered map of distinct values and their counts. | 202 | 477 | for (; it != ordered_map.end(); ++it) { | 203 | 418 | const auto count = it->second; | 204 | 418 | const auto current_value = it->first; | 205 | | | 206 | | // Update the bucket counts and track the number of distinct values assigned. | 207 | 418 | distinct_values_count++; | 208 | 418 | remaining_distinct_values--; | 209 | 418 | values_count += count; | 210 | 418 | cumulative_values += count; | 211 | | | 212 | | // Check whether the current value should be added to the current bucket. | 213 | 418 | auto next = std::next(it); | 214 | 418 | size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1; | 215 | | | 216 | 418 | if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets && | 217 | 418 | values_count + next->second <= bucket_max_values) { | 218 | | // If the current value is the last in the input map and there are more remaining | 219 | | // distinct values than empty buckets and adding the value does not cause the bucket | 220 | | // to exceed its max size, skip adding the value to the current bucket. | 221 | 206 | continue; | 222 | 206 | } | 223 | | | 224 | | // Finalize the current bucket and add it to our collection of buckets. | 225 | 212 | auto pre_sum = cumulative_values - values_count; | 226 | | | 227 | 212 | Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count, | 228 | 212 | pre_sum); | 229 | 212 | buckets.push_back(new_bucket); | 230 | | | 231 | | // Reset variables for the next bucket. | 232 | 212 | if (next != ordered_map.end()) { | 233 | 153 | lower_value = &next->first; | 234 | 153 | } | 235 | 212 | values_count = 0; | 236 | 212 | distinct_values_count = 0; | 237 | 212 | } | 238 | | | 239 | 59 | return true; | 240 | 65 | } |
_ZN5doris15build_histogramInEEbRSt6vectorINS_6BucketIT_EESaIS4_EERKSt3mapIS3_mSt4lessIS3_ESaISt4pairIKS3_mEEEm Line | Count | Source | 174 | 48 | const size_t max_num_buckets) { | 175 | | // If the input map is empty, there is nothing to build. | 176 | 48 | if (ordered_map.empty()) { | 177 | 6 | return false; | 178 | 6 | } | 179 | | | 180 | | // Calculate the maximum number of values that can be assigned to each bucket. | 181 | 42 | auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets); | 182 | | | 183 | | // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional | 184 | | // allocations when inserting buckets. | 185 | 42 | buckets.clear(); | 186 | 42 | buckets.reserve(max_num_buckets); | 187 | | | 188 | | // Initialize bucket variables. | 189 | 42 | size_t distinct_values_count = 0; | 190 | 42 | size_t values_count = 0; | 191 | 42 | size_t cumulative_values = 0; | 192 | | | 193 | | // Record how many values still need to be assigned. | 194 | 42 | auto remaining_distinct_values = ordered_map.size(); | 195 | | | 196 | 42 | auto it = ordered_map.begin(); | 197 | | | 198 | | // Lower value of the current bucket. | 199 | 42 | const T* lower_value = &it->first; | 200 | | | 201 | | // Iterate over the ordered map of distinct values and their counts. | 202 | 385 | for (; it != ordered_map.end(); ++it) { | 203 | 343 | const auto count = it->second; | 204 | 343 | const auto current_value = it->first; | 205 | | | 206 | | // Update the bucket counts and track the number of distinct values assigned. | 207 | 343 | distinct_values_count++; | 208 | 343 | remaining_distinct_values--; | 209 | 343 | values_count += count; | 210 | 343 | cumulative_values += count; | 211 | | | 212 | | // Check whether the current value should be added to the current bucket. | 213 | 343 | auto next = std::next(it); | 214 | 343 | size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1; | 215 | | | 216 | 343 | if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets && | 217 | 343 | values_count + next->second <= bucket_max_values) { | 218 | | // If the current value is the last in the input map and there are more remaining | 219 | | // distinct values than empty buckets and adding the value does not cause the bucket | 220 | | // to exceed its max size, skip adding the value to the current bucket. | 221 | 202 | continue; | 222 | 202 | } | 223 | | | 224 | | // Finalize the current bucket and add it to our collection of buckets. | 225 | 141 | auto pre_sum = cumulative_values - values_count; | 226 | | | 227 | 141 | Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count, | 228 | 141 | pre_sum); | 229 | 141 | buckets.push_back(new_bucket); | 230 | | | 231 | | // Reset variables for the next bucket. | 232 | 141 | if (next != ordered_map.end()) { | 233 | 99 | lower_value = &next->first; | 234 | 99 | } | 235 | 141 | values_count = 0; | 236 | 141 | distinct_values_count = 0; | 237 | 141 | } | 238 | | | 239 | 42 | return true; | 240 | 48 | } |
_ZN5doris15build_histogramIfEEbRSt6vectorINS_6BucketIT_EESaIS4_EERKSt3mapIS3_mSt4lessIS3_ESaISt4pairIKS3_mEEEm Line | Count | Source | 174 | 46 | const size_t max_num_buckets) { | 175 | | // If the input map is empty, there is nothing to build. | 176 | 46 | if (ordered_map.empty()) { | 177 | 5 | return false; | 178 | 5 | } | 179 | | | 180 | | // Calculate the maximum number of values that can be assigned to each bucket. | 181 | 41 | auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets); | 182 | | | 183 | | // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional | 184 | | // allocations when inserting buckets. | 185 | 41 | buckets.clear(); | 186 | 41 | buckets.reserve(max_num_buckets); | 187 | | | 188 | | // Initialize bucket variables. | 189 | 41 | size_t distinct_values_count = 0; | 190 | 41 | size_t values_count = 0; | 191 | 41 | size_t cumulative_values = 0; | 192 | | | 193 | | // Record how many values still need to be assigned. | 194 | 41 | auto remaining_distinct_values = ordered_map.size(); | 195 | | | 196 | 41 | auto it = ordered_map.begin(); | 197 | | | 198 | | // Lower value of the current bucket. | 199 | 41 | const T* lower_value = &it->first; | 200 | | | 201 | | // Iterate over the ordered map of distinct values and their counts. | 202 | 369 | for (; it != ordered_map.end(); ++it) { | 203 | 328 | const auto count = it->second; | 204 | 328 | const auto current_value = it->first; | 205 | | | 206 | | // Update the bucket counts and track the number of distinct values assigned. | 207 | 328 | distinct_values_count++; | 208 | 328 | remaining_distinct_values--; | 209 | 328 | values_count += count; | 210 | 328 | cumulative_values += count; | 211 | | | 212 | | // Check whether the current value should be added to the current bucket. | 213 | 328 | auto next = std::next(it); | 214 | 328 | size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1; | 215 | | | 216 | 328 | if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets && | 217 | 328 | values_count + next->second <= bucket_max_values) { | 218 | | // If the current value is the last in the input map and there are more remaining | 219 | | // distinct values than empty buckets and adding the value does not cause the bucket | 220 | | // to exceed its max size, skip adding the value to the current bucket. | 221 | 200 | continue; | 222 | 200 | } | 223 | | | 224 | | // Finalize the current bucket and add it to our collection of buckets. | 225 | 128 | auto pre_sum = cumulative_values - values_count; | 226 | | | 227 | 128 | Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count, | 228 | 128 | pre_sum); | 229 | 128 | buckets.push_back(new_bucket); | 230 | | | 231 | | // Reset variables for the next bucket. | 232 | 128 | if (next != ordered_map.end()) { | 233 | 87 | lower_value = &next->first; | 234 | 87 | } | 235 | 128 | values_count = 0; | 236 | 128 | distinct_values_count = 0; | 237 | 128 | } | 238 | | | 239 | 41 | return true; | 240 | 46 | } |
_ZN5doris15build_histogramIdEEbRSt6vectorINS_6BucketIT_EESaIS4_EERKSt3mapIS3_mSt4lessIS3_ESaISt4pairIKS3_mEEEm Line | Count | Source | 174 | 46 | const size_t max_num_buckets) { | 175 | | // If the input map is empty, there is nothing to build. | 176 | 46 | if (ordered_map.empty()) { | 177 | 5 | return false; | 178 | 5 | } | 179 | | | 180 | | // Calculate the maximum number of values that can be assigned to each bucket. | 181 | 41 | auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets); | 182 | | | 183 | | // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional | 184 | | // allocations when inserting buckets. | 185 | 41 | buckets.clear(); | 186 | 41 | buckets.reserve(max_num_buckets); | 187 | | | 188 | | // Initialize bucket variables. | 189 | 41 | size_t distinct_values_count = 0; | 190 | 41 | size_t values_count = 0; | 191 | 41 | size_t cumulative_values = 0; | 192 | | | 193 | | // Record how many values still need to be assigned. | 194 | 41 | auto remaining_distinct_values = ordered_map.size(); | 195 | | | 196 | 41 | auto it = ordered_map.begin(); | 197 | | | 198 | | // Lower value of the current bucket. | 199 | 41 | const T* lower_value = &it->first; | 200 | | | 201 | | // Iterate over the ordered map of distinct values and their counts. | 202 | 369 | for (; it != ordered_map.end(); ++it) { | 203 | 328 | const auto count = it->second; | 204 | 328 | const auto current_value = it->first; | 205 | | | 206 | | // Update the bucket counts and track the number of distinct values assigned. | 207 | 328 | distinct_values_count++; | 208 | 328 | remaining_distinct_values--; | 209 | 328 | values_count += count; | 210 | 328 | cumulative_values += count; | 211 | | | 212 | | // Check whether the current value should be added to the current bucket. | 213 | 328 | auto next = std::next(it); | 214 | 328 | size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1; | 215 | | | 216 | 328 | if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets && | 217 | 328 | values_count + next->second <= bucket_max_values) { | 218 | | // If the current value is the last in the input map and there are more remaining | 219 | | // distinct values than empty buckets and adding the value does not cause the bucket | 220 | | // to exceed its max size, skip adding the value to the current bucket. | 221 | 200 | continue; | 222 | 200 | } | 223 | | | 224 | | // Finalize the current bucket and add it to our collection of buckets. | 225 | 128 | auto pre_sum = cumulative_values - values_count; | 226 | | | 227 | 128 | Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count, | 228 | 128 | pre_sum); | 229 | 128 | buckets.push_back(new_bucket); | 230 | | | 231 | | // Reset variables for the next bucket. | 232 | 128 | if (next != ordered_map.end()) { | 233 | 87 | lower_value = &next->first; | 234 | 87 | } | 235 | 128 | values_count = 0; | 236 | 128 | distinct_values_count = 0; | 237 | 128 | } | 238 | | | 239 | 41 | return true; | 240 | 46 | } |
_ZN5doris15build_histogramINS_7DecimalIiEEEEbRSt6vectorINS_6BucketIT_EESaIS6_EERKSt3mapIS5_mSt4lessIS5_ESaISt4pairIKS5_mEEEm Line | Count | Source | 174 | 49 | const size_t max_num_buckets) { | 175 | | // If the input map is empty, there is nothing to build. | 176 | 49 | if (ordered_map.empty()) { | 177 | 4 | return false; | 178 | 4 | } | 179 | | | 180 | | // Calculate the maximum number of values that can be assigned to each bucket. | 181 | 45 | auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets); | 182 | | | 183 | | // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional | 184 | | // allocations when inserting buckets. | 185 | 45 | buckets.clear(); | 186 | 45 | buckets.reserve(max_num_buckets); | 187 | | | 188 | | // Initialize bucket variables. | 189 | 45 | size_t distinct_values_count = 0; | 190 | 45 | size_t values_count = 0; | 191 | 45 | size_t cumulative_values = 0; | 192 | | | 193 | | // Record how many values still need to be assigned. | 194 | 45 | auto remaining_distinct_values = ordered_map.size(); | 195 | | | 196 | 45 | auto it = ordered_map.begin(); | 197 | | | 198 | | // Lower value of the current bucket. | 199 | 45 | const T* lower_value = &it->first; | 200 | | | 201 | | // Iterate over the ordered map of distinct values and their counts. | 202 | 206 | for (; it != ordered_map.end(); ++it) { | 203 | 161 | const auto count = it->second; | 204 | 161 | const auto current_value = it->first; | 205 | | | 206 | | // Update the bucket counts and track the number of distinct values assigned. | 207 | 161 | distinct_values_count++; | 208 | 161 | remaining_distinct_values--; | 209 | 161 | values_count += count; | 210 | 161 | cumulative_values += count; | 211 | | | 212 | | // Check whether the current value should be added to the current bucket. | 213 | 161 | auto next = std::next(it); | 214 | 161 | size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1; | 215 | | | 216 | 161 | if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets && | 217 | 161 | values_count + next->second <= bucket_max_values) { | 218 | | // If the current value is the last in the input map and there are more remaining | 219 | | // distinct values than empty buckets and adding the value does not cause the bucket | 220 | | // to exceed its max size, skip adding the value to the current bucket. | 221 | 28 | continue; | 222 | 28 | } | 223 | | | 224 | | // Finalize the current bucket and add it to our collection of buckets. | 225 | 133 | auto pre_sum = cumulative_values - values_count; | 226 | | | 227 | 133 | Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count, | 228 | 133 | pre_sum); | 229 | 133 | buckets.push_back(new_bucket); | 230 | | | 231 | | // Reset variables for the next bucket. | 232 | 133 | if (next != ordered_map.end()) { | 233 | 88 | lower_value = &next->first; | 234 | 88 | } | 235 | 133 | values_count = 0; | 236 | 133 | distinct_values_count = 0; | 237 | 133 | } | 238 | | | 239 | 45 | return true; | 240 | 49 | } |
Unexecuted instantiation: _ZN5doris15build_histogramINS_7DecimalIlEEEEbRSt6vectorINS_6BucketIT_EESaIS6_EERKSt3mapIS5_mSt4lessIS5_ESaISt4pairIKS5_mEEEm Unexecuted instantiation: _ZN5doris15build_histogramINS_12Decimal128V3EEEbRSt6vectorINS_6BucketIT_EESaIS5_EERKSt3mapIS4_mSt4lessIS4_ESaISt4pairIKS4_mEEEm Unexecuted instantiation: _ZN5doris15build_histogramINS_7DecimalIN4wide7integerILm256EiEEEEEEbRSt6vectorINS_6BucketIT_EESaIS9_EERKSt3mapIS8_mSt4lessIS8_ESaISt4pairIKS8_mEEEm _ZN5doris15build_histogramINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEEbRSt6vectorINS_6BucketIT_EESaISA_EERKSt3mapIS9_mSt4lessIS9_ESaISt4pairIKS9_mEEEm Line | Count | Source | 174 | 105 | const size_t max_num_buckets) { | 175 | | // If the input map is empty, there is nothing to build. | 176 | 105 | if (ordered_map.empty()) { | 177 | 5 | return false; | 178 | 5 | } | 179 | | | 180 | | // Calculate the maximum number of values that can be assigned to each bucket. | 181 | 100 | auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets); | 182 | | | 183 | | // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional | 184 | | // allocations when inserting buckets. | 185 | 100 | buckets.clear(); | 186 | 100 | buckets.reserve(max_num_buckets); | 187 | | | 188 | | // Initialize bucket variables. | 189 | 100 | size_t distinct_values_count = 0; | 190 | 100 | size_t values_count = 0; | 191 | 100 | size_t cumulative_values = 0; | 192 | | | 193 | | // Record how many values still need to be assigned. | 194 | 100 | auto remaining_distinct_values = ordered_map.size(); | 195 | | | 196 | 100 | auto it = ordered_map.begin(); | 197 | | | 198 | | // Lower value of the current bucket. | 199 | 100 | const T* lower_value = &it->first; | 200 | | | 201 | | // Iterate over the ordered map of distinct values and their counts. | 202 | 2.30k | for (; it != ordered_map.end(); ++it) { | 203 | 2.20k | const auto count = it->second; | 204 | 2.20k | const auto current_value = it->first; | 205 | | | 206 | | // Update the bucket counts and track the number of distinct values assigned. | 207 | 2.20k | distinct_values_count++; | 208 | 2.20k | remaining_distinct_values--; | 209 | 2.20k | values_count += count; | 210 | 2.20k | cumulative_values += count; | 211 | | | 212 | | // Check whether the current value should be added to the current bucket. | 213 | 2.20k | auto next = std::next(it); | 214 | 2.20k | size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1; | 215 | | | 216 | 2.20k | if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets && | 217 | 2.20k | values_count + next->second <= bucket_max_values) { | 218 | | // If the current value is the last in the input map and there are more remaining | 219 | | // distinct values than empty buckets and adding the value does not cause the bucket | 220 | | // to exceed its max size, skip adding the value to the current bucket. | 221 | 2.02k | continue; | 222 | 2.02k | } | 223 | | | 224 | | // Finalize the current bucket and add it to our collection of buckets. | 225 | 178 | auto pre_sum = cumulative_values - values_count; | 226 | | | 227 | 178 | Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count, | 228 | 178 | pre_sum); | 229 | 178 | buckets.push_back(new_bucket); | 230 | | | 231 | | // Reset variables for the next bucket. | 232 | 178 | if (next != ordered_map.end()) { | 233 | 78 | lower_value = &next->first; | 234 | 78 | } | 235 | 178 | values_count = 0; | 236 | 178 | distinct_values_count = 0; | 237 | 178 | } | 238 | | | 239 | 100 | return true; | 240 | 105 | } |
_ZN5doris15build_histogramINS_11DateV2ValueINS_15DateV2ValueTypeEEEEEbRSt6vectorINS_6BucketIT_EESaIS7_EERKSt3mapIS6_mSt4lessIS6_ESaISt4pairIKS6_mEEEm Line | Count | Source | 174 | 88 | const size_t max_num_buckets) { | 175 | | // If the input map is empty, there is nothing to build. | 176 | 88 | if (ordered_map.empty()) { | 177 | 7 | return false; | 178 | 7 | } | 179 | | | 180 | | // Calculate the maximum number of values that can be assigned to each bucket. | 181 | 81 | auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets); | 182 | | | 183 | | // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional | 184 | | // allocations when inserting buckets. | 185 | 81 | buckets.clear(); | 186 | 81 | buckets.reserve(max_num_buckets); | 187 | | | 188 | | // Initialize bucket variables. | 189 | 81 | size_t distinct_values_count = 0; | 190 | 81 | size_t values_count = 0; | 191 | 81 | size_t cumulative_values = 0; | 192 | | | 193 | | // Record how many values still need to be assigned. | 194 | 81 | auto remaining_distinct_values = ordered_map.size(); | 195 | | | 196 | 81 | auto it = ordered_map.begin(); | 197 | | | 198 | | // Lower value of the current bucket. | 199 | 81 | const T* lower_value = &it->first; | 200 | | | 201 | | // Iterate over the ordered map of distinct values and their counts. | 202 | 539 | for (; it != ordered_map.end(); ++it) { | 203 | 458 | const auto count = it->second; | 204 | 458 | const auto current_value = it->first; | 205 | | | 206 | | // Update the bucket counts and track the number of distinct values assigned. | 207 | 458 | distinct_values_count++; | 208 | 458 | remaining_distinct_values--; | 209 | 458 | values_count += count; | 210 | 458 | cumulative_values += count; | 211 | | | 212 | | // Check whether the current value should be added to the current bucket. | 213 | 458 | auto next = std::next(it); | 214 | 458 | size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1; | 215 | | | 216 | 458 | if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets && | 217 | 458 | values_count + next->second <= bucket_max_values) { | 218 | | // If the current value is the last in the input map and there are more remaining | 219 | | // distinct values than empty buckets and adding the value does not cause the bucket | 220 | | // to exceed its max size, skip adding the value to the current bucket. | 221 | 202 | continue; | 222 | 202 | } | 223 | | | 224 | | // Finalize the current bucket and add it to our collection of buckets. | 225 | 256 | auto pre_sum = cumulative_values - values_count; | 226 | | | 227 | 256 | Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count, | 228 | 256 | pre_sum); | 229 | 256 | buckets.push_back(new_bucket); | 230 | | | 231 | | // Reset variables for the next bucket. | 232 | 256 | if (next != ordered_map.end()) { | 233 | 175 | lower_value = &next->first; | 234 | 175 | } | 235 | 256 | values_count = 0; | 236 | 256 | distinct_values_count = 0; | 237 | 256 | } | 238 | | | 239 | 81 | return true; | 240 | 88 | } |
_ZN5doris15build_histogramINS_11DateV2ValueINS_19DateTimeV2ValueTypeEEEEEbRSt6vectorINS_6BucketIT_EESaIS7_EERKSt3mapIS6_mSt4lessIS6_ESaISt4pairIKS6_mEEEm Line | Count | Source | 174 | 88 | const size_t max_num_buckets) { | 175 | | // If the input map is empty, there is nothing to build. | 176 | 88 | if (ordered_map.empty()) { | 177 | 7 | return false; | 178 | 7 | } | 179 | | | 180 | | // Calculate the maximum number of values that can be assigned to each bucket. | 181 | 81 | auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets); | 182 | | | 183 | | // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional | 184 | | // allocations when inserting buckets. | 185 | 81 | buckets.clear(); | 186 | 81 | buckets.reserve(max_num_buckets); | 187 | | | 188 | | // Initialize bucket variables. | 189 | 81 | size_t distinct_values_count = 0; | 190 | 81 | size_t values_count = 0; | 191 | 81 | size_t cumulative_values = 0; | 192 | | | 193 | | // Record how many values still need to be assigned. | 194 | 81 | auto remaining_distinct_values = ordered_map.size(); | 195 | | | 196 | 81 | auto it = ordered_map.begin(); | 197 | | | 198 | | // Lower value of the current bucket. | 199 | 81 | const T* lower_value = &it->first; | 200 | | | 201 | | // Iterate over the ordered map of distinct values and their counts. | 202 | 538 | for (; it != ordered_map.end(); ++it) { | 203 | 457 | const auto count = it->second; | 204 | 457 | const auto current_value = it->first; | 205 | | | 206 | | // Update the bucket counts and track the number of distinct values assigned. | 207 | 457 | distinct_values_count++; | 208 | 457 | remaining_distinct_values--; | 209 | 457 | values_count += count; | 210 | 457 | cumulative_values += count; | 211 | | | 212 | | // Check whether the current value should be added to the current bucket. | 213 | 457 | auto next = std::next(it); | 214 | 457 | size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1; | 215 | | | 216 | 457 | if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets && | 217 | 457 | values_count + next->second <= bucket_max_values) { | 218 | | // If the current value is the last in the input map and there are more remaining | 219 | | // distinct values than empty buckets and adding the value does not cause the bucket | 220 | | // to exceed its max size, skip adding the value to the current bucket. | 221 | 202 | continue; | 222 | 202 | } | 223 | | | 224 | | // Finalize the current bucket and add it to our collection of buckets. | 225 | 255 | auto pre_sum = cumulative_values - values_count; | 226 | | | 227 | 255 | Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count, | 228 | 255 | pre_sum); | 229 | 255 | buckets.push_back(new_bucket); | 230 | | | 231 | | // Reset variables for the next bucket. | 232 | 255 | if (next != ordered_map.end()) { | 233 | 174 | lower_value = &next->first; | 234 | 174 | } | 235 | 255 | values_count = 0; | 236 | 255 | distinct_values_count = 0; | 237 | 255 | } | 238 | | | 239 | 81 | return true; | 240 | 88 | } |
|
241 | | |
242 | | template <typename T> |
243 | | bool histogram_to_json(rapidjson::StringBuffer& buffer, const std::vector<Bucket<T>>& buckets, |
244 | 702 | const DataTypePtr& data_type) { |
245 | 702 | rapidjson::Document doc; |
246 | 702 | doc.SetObject(); |
247 | 702 | rapidjson::Document::AllocatorType& allocator = doc.GetAllocator(); |
248 | | |
249 | 702 | int num_buckets = cast_set<int>(buckets.size()); |
250 | 702 | doc.AddMember("num_buckets", num_buckets, allocator); |
251 | | |
252 | 702 | rapidjson::Value bucket_arr(rapidjson::kArrayType); |
253 | 702 | bucket_arr.Reserve(num_buckets, allocator); |
254 | | |
255 | 702 | std::stringstream ss1; |
256 | 702 | std::stringstream ss2; |
257 | | |
258 | 702 | rapidjson::Value lower_val; |
259 | 702 | rapidjson::Value upper_val; |
260 | | |
261 | | // Convert bucket's lower and upper to 2 columns |
262 | 702 | MutableColumnPtr lower_column = data_type->create_column(); |
263 | 702 | MutableColumnPtr upper_column = data_type->create_column(); |
264 | 1.84k | for (const auto& bucket : buckets) { |
265 | | // String type is different, it has to pass in length |
266 | | // if it is string type , directly use string value |
267 | 1.84k | if constexpr (!std::is_same_v<T, std::string>) { |
268 | 1.66k | lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0); |
269 | 1.66k | upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0); |
270 | 1.66k | } |
271 | 1.84k | } |
272 | 702 | size_t row_num = 0; |
273 | | |
274 | 702 | auto format_options = DataTypeSerDe::get_default_format_options(); |
275 | 702 | auto time_zone = cctz::utc_time_zone(); |
276 | 702 | format_options.timezone = &time_zone; |
277 | | |
278 | 1.84k | for (const auto& bucket : buckets) { |
279 | 1.84k | if constexpr (std::is_same_v<T, std::string>) { |
280 | 178 | lower_val.SetString(bucket.lower.data(), |
281 | 178 | static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator); |
282 | 178 | upper_val.SetString(bucket.upper.data(), |
283 | 178 | static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator); |
284 | 1.66k | } else { |
285 | 1.66k | std::string lower_str = data_type->to_string(*lower_column, row_num, format_options); |
286 | 1.66k | std::string upper_str = data_type->to_string(*upper_column, row_num, format_options); |
287 | 1.66k | ++row_num; |
288 | 1.66k | lower_val.SetString(lower_str.data(), |
289 | 1.66k | static_cast<rapidjson::SizeType>(lower_str.size()), allocator); |
290 | 1.66k | upper_val.SetString(upper_str.data(), |
291 | 1.66k | static_cast<rapidjson::SizeType>(upper_str.size()), allocator); |
292 | 1.66k | } |
293 | 1.84k | rapidjson::Value bucket_json(rapidjson::kObjectType); |
294 | 1.84k | bucket_json.AddMember("lower", lower_val, allocator); |
295 | 1.84k | bucket_json.AddMember("upper", upper_val, allocator); |
296 | 1.84k | bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator); |
297 | 1.84k | bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator); |
298 | 1.84k | bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator); |
299 | | |
300 | 1.84k | bucket_arr.PushBack(bucket_json, allocator); |
301 | 1.84k | } |
302 | | |
303 | 702 | doc.AddMember("buckets", bucket_arr, allocator); |
304 | 702 | rapidjson::Writer<rapidjson::StringBuffer> writer(buffer); |
305 | 702 | doc.Accept(writer); |
306 | | |
307 | 702 | return !buckets.empty() && buffer.GetSize() > 0; |
308 | 702 | } _ZN5doris17histogram_to_jsonIhEEbRN9rapidjson19GenericStringBufferINS1_4UTF8IcEENS1_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISB_EERKSt10shared_ptrIKNS_9IDataTypeEE Line | Count | Source | 244 | 2 | const DataTypePtr& data_type) { | 245 | 2 | rapidjson::Document doc; | 246 | 2 | doc.SetObject(); | 247 | 2 | rapidjson::Document::AllocatorType& allocator = doc.GetAllocator(); | 248 | | | 249 | 2 | int num_buckets = cast_set<int>(buckets.size()); | 250 | 2 | doc.AddMember("num_buckets", num_buckets, allocator); | 251 | | | 252 | 2 | rapidjson::Value bucket_arr(rapidjson::kArrayType); | 253 | 2 | bucket_arr.Reserve(num_buckets, allocator); | 254 | | | 255 | 2 | std::stringstream ss1; | 256 | 2 | std::stringstream ss2; | 257 | | | 258 | 2 | rapidjson::Value lower_val; | 259 | 2 | rapidjson::Value upper_val; | 260 | | | 261 | | // Convert bucket's lower and upper to 2 columns | 262 | 2 | MutableColumnPtr lower_column = data_type->create_column(); | 263 | 2 | MutableColumnPtr upper_column = data_type->create_column(); | 264 | 2 | for (const auto& bucket : buckets) { | 265 | | // String type is different, it has to pass in length | 266 | | // if it is string type , directly use string value | 267 | 2 | if constexpr (!std::is_same_v<T, std::string>) { | 268 | 2 | lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0); | 269 | 2 | upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0); | 270 | 2 | } | 271 | 2 | } | 272 | 2 | size_t row_num = 0; | 273 | | | 274 | 2 | auto format_options = DataTypeSerDe::get_default_format_options(); | 275 | 2 | auto time_zone = cctz::utc_time_zone(); | 276 | 2 | format_options.timezone = &time_zone; | 277 | | | 278 | 2 | for (const auto& bucket : buckets) { | 279 | | if constexpr (std::is_same_v<T, std::string>) { | 280 | | lower_val.SetString(bucket.lower.data(), | 281 | | static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator); | 282 | | upper_val.SetString(bucket.upper.data(), | 283 | | static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator); | 284 | 2 | } else { | 285 | 2 | std::string lower_str = data_type->to_string(*lower_column, row_num, format_options); | 286 | 2 | std::string upper_str = data_type->to_string(*upper_column, row_num, format_options); | 287 | 2 | ++row_num; | 288 | 2 | lower_val.SetString(lower_str.data(), | 289 | 2 | static_cast<rapidjson::SizeType>(lower_str.size()), allocator); | 290 | 2 | upper_val.SetString(upper_str.data(), | 291 | 2 | static_cast<rapidjson::SizeType>(upper_str.size()), allocator); | 292 | 2 | } | 293 | 2 | rapidjson::Value bucket_json(rapidjson::kObjectType); | 294 | 2 | bucket_json.AddMember("lower", lower_val, allocator); | 295 | 2 | bucket_json.AddMember("upper", upper_val, allocator); | 296 | 2 | bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator); | 297 | 2 | bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator); | 298 | 2 | bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator); | 299 | | | 300 | 2 | bucket_arr.PushBack(bucket_json, allocator); | 301 | 2 | } | 302 | | | 303 | 2 | doc.AddMember("buckets", bucket_arr, allocator); | 304 | 2 | rapidjson::Writer<rapidjson::StringBuffer> writer(buffer); | 305 | 2 | doc.Accept(writer); | 306 | | | 307 | 2 | return !buckets.empty() && buffer.GetSize() > 0; | 308 | 2 | } |
_ZN5doris17histogram_to_jsonIaEEbRN9rapidjson19GenericStringBufferINS1_4UTF8IcEENS1_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISB_EERKSt10shared_ptrIKNS_9IDataTypeEE Line | Count | Source | 244 | 48 | const DataTypePtr& data_type) { | 245 | 48 | rapidjson::Document doc; | 246 | 48 | doc.SetObject(); | 247 | 48 | rapidjson::Document::AllocatorType& allocator = doc.GetAllocator(); | 248 | | | 249 | 48 | int num_buckets = cast_set<int>(buckets.size()); | 250 | 48 | doc.AddMember("num_buckets", num_buckets, allocator); | 251 | | | 252 | 48 | rapidjson::Value bucket_arr(rapidjson::kArrayType); | 253 | 48 | bucket_arr.Reserve(num_buckets, allocator); | 254 | | | 255 | 48 | std::stringstream ss1; | 256 | 48 | std::stringstream ss2; | 257 | | | 258 | 48 | rapidjson::Value lower_val; | 259 | 48 | rapidjson::Value upper_val; | 260 | | | 261 | | // Convert bucket's lower and upper to 2 columns | 262 | 48 | MutableColumnPtr lower_column = data_type->create_column(); | 263 | 48 | MutableColumnPtr upper_column = data_type->create_column(); | 264 | 129 | for (const auto& bucket : buckets) { | 265 | | // String type is different, it has to pass in length | 266 | | // if it is string type , directly use string value | 267 | 129 | if constexpr (!std::is_same_v<T, std::string>) { | 268 | 129 | lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0); | 269 | 129 | upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0); | 270 | 129 | } | 271 | 129 | } | 272 | 48 | size_t row_num = 0; | 273 | | | 274 | 48 | auto format_options = DataTypeSerDe::get_default_format_options(); | 275 | 48 | auto time_zone = cctz::utc_time_zone(); | 276 | 48 | format_options.timezone = &time_zone; | 277 | | | 278 | 129 | for (const auto& bucket : buckets) { | 279 | | if constexpr (std::is_same_v<T, std::string>) { | 280 | | lower_val.SetString(bucket.lower.data(), | 281 | | static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator); | 282 | | upper_val.SetString(bucket.upper.data(), | 283 | | static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator); | 284 | 129 | } else { | 285 | 129 | std::string lower_str = data_type->to_string(*lower_column, row_num, format_options); | 286 | 129 | std::string upper_str = data_type->to_string(*upper_column, row_num, format_options); | 287 | 129 | ++row_num; | 288 | 129 | lower_val.SetString(lower_str.data(), | 289 | 129 | static_cast<rapidjson::SizeType>(lower_str.size()), allocator); | 290 | 129 | upper_val.SetString(upper_str.data(), | 291 | 129 | static_cast<rapidjson::SizeType>(upper_str.size()), allocator); | 292 | 129 | } | 293 | 129 | rapidjson::Value bucket_json(rapidjson::kObjectType); | 294 | 129 | bucket_json.AddMember("lower", lower_val, allocator); | 295 | 129 | bucket_json.AddMember("upper", upper_val, allocator); | 296 | 129 | bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator); | 297 | 129 | bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator); | 298 | 129 | bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator); | 299 | | | 300 | 129 | bucket_arr.PushBack(bucket_json, allocator); | 301 | 129 | } | 302 | | | 303 | 48 | doc.AddMember("buckets", bucket_arr, allocator); | 304 | 48 | rapidjson::Writer<rapidjson::StringBuffer> writer(buffer); | 305 | 48 | doc.Accept(writer); | 306 | | | 307 | 48 | return !buckets.empty() && buffer.GetSize() > 0; | 308 | 48 | } |
_ZN5doris17histogram_to_jsonIsEEbRN9rapidjson19GenericStringBufferINS1_4UTF8IcEENS1_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISB_EERKSt10shared_ptrIKNS_9IDataTypeEE Line | Count | Source | 244 | 48 | const DataTypePtr& data_type) { | 245 | 48 | rapidjson::Document doc; | 246 | 48 | doc.SetObject(); | 247 | 48 | rapidjson::Document::AllocatorType& allocator = doc.GetAllocator(); | 248 | | | 249 | 48 | int num_buckets = cast_set<int>(buckets.size()); | 250 | 48 | doc.AddMember("num_buckets", num_buckets, allocator); | 251 | | | 252 | 48 | rapidjson::Value bucket_arr(rapidjson::kArrayType); | 253 | 48 | bucket_arr.Reserve(num_buckets, allocator); | 254 | | | 255 | 48 | std::stringstream ss1; | 256 | 48 | std::stringstream ss2; | 257 | | | 258 | 48 | rapidjson::Value lower_val; | 259 | 48 | rapidjson::Value upper_val; | 260 | | | 261 | | // Convert bucket's lower and upper to 2 columns | 262 | 48 | MutableColumnPtr lower_column = data_type->create_column(); | 263 | 48 | MutableColumnPtr upper_column = data_type->create_column(); | 264 | 132 | for (const auto& bucket : buckets) { | 265 | | // String type is different, it has to pass in length | 266 | | // if it is string type , directly use string value | 267 | 132 | if constexpr (!std::is_same_v<T, std::string>) { | 268 | 132 | lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0); | 269 | 132 | upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0); | 270 | 132 | } | 271 | 132 | } | 272 | 48 | size_t row_num = 0; | 273 | | | 274 | 48 | auto format_options = DataTypeSerDe::get_default_format_options(); | 275 | 48 | auto time_zone = cctz::utc_time_zone(); | 276 | 48 | format_options.timezone = &time_zone; | 277 | | | 278 | 132 | for (const auto& bucket : buckets) { | 279 | | if constexpr (std::is_same_v<T, std::string>) { | 280 | | lower_val.SetString(bucket.lower.data(), | 281 | | static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator); | 282 | | upper_val.SetString(bucket.upper.data(), | 283 | | static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator); | 284 | 132 | } else { | 285 | 132 | std::string lower_str = data_type->to_string(*lower_column, row_num, format_options); | 286 | 132 | std::string upper_str = data_type->to_string(*upper_column, row_num, format_options); | 287 | 132 | ++row_num; | 288 | 132 | lower_val.SetString(lower_str.data(), | 289 | 132 | static_cast<rapidjson::SizeType>(lower_str.size()), allocator); | 290 | 132 | upper_val.SetString(upper_str.data(), | 291 | 132 | static_cast<rapidjson::SizeType>(upper_str.size()), allocator); | 292 | 132 | } | 293 | 132 | rapidjson::Value bucket_json(rapidjson::kObjectType); | 294 | 132 | bucket_json.AddMember("lower", lower_val, allocator); | 295 | 132 | bucket_json.AddMember("upper", upper_val, allocator); | 296 | 132 | bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator); | 297 | 132 | bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator); | 298 | 132 | bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator); | 299 | | | 300 | 132 | bucket_arr.PushBack(bucket_json, allocator); | 301 | 132 | } | 302 | | | 303 | 48 | doc.AddMember("buckets", bucket_arr, allocator); | 304 | 48 | rapidjson::Writer<rapidjson::StringBuffer> writer(buffer); | 305 | 48 | doc.Accept(writer); | 306 | | | 307 | 48 | return !buckets.empty() && buffer.GetSize() > 0; | 308 | 48 | } |
_ZN5doris17histogram_to_jsonIiEEbRN9rapidjson19GenericStringBufferINS1_4UTF8IcEENS1_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISB_EERKSt10shared_ptrIKNS_9IDataTypeEE Line | Count | Source | 244 | 69 | const DataTypePtr& data_type) { | 245 | 69 | rapidjson::Document doc; | 246 | 69 | doc.SetObject(); | 247 | 69 | rapidjson::Document::AllocatorType& allocator = doc.GetAllocator(); | 248 | | | 249 | 69 | int num_buckets = cast_set<int>(buckets.size()); | 250 | 69 | doc.AddMember("num_buckets", num_buckets, allocator); | 251 | | | 252 | 69 | rapidjson::Value bucket_arr(rapidjson::kArrayType); | 253 | 69 | bucket_arr.Reserve(num_buckets, allocator); | 254 | | | 255 | 69 | std::stringstream ss1; | 256 | 69 | std::stringstream ss2; | 257 | | | 258 | 69 | rapidjson::Value lower_val; | 259 | 69 | rapidjson::Value upper_val; | 260 | | | 261 | | // Convert bucket's lower and upper to 2 columns | 262 | 69 | MutableColumnPtr lower_column = data_type->create_column(); | 263 | 69 | MutableColumnPtr upper_column = data_type->create_column(); | 264 | 149 | for (const auto& bucket : buckets) { | 265 | | // String type is different, it has to pass in length | 266 | | // if it is string type , directly use string value | 267 | 149 | if constexpr (!std::is_same_v<T, std::string>) { | 268 | 149 | lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0); | 269 | 149 | upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0); | 270 | 149 | } | 271 | 149 | } | 272 | 69 | size_t row_num = 0; | 273 | | | 274 | 69 | auto format_options = DataTypeSerDe::get_default_format_options(); | 275 | 69 | auto time_zone = cctz::utc_time_zone(); | 276 | 69 | format_options.timezone = &time_zone; | 277 | | | 278 | 149 | for (const auto& bucket : buckets) { | 279 | | if constexpr (std::is_same_v<T, std::string>) { | 280 | | lower_val.SetString(bucket.lower.data(), | 281 | | static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator); | 282 | | upper_val.SetString(bucket.upper.data(), | 283 | | static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator); | 284 | 149 | } else { | 285 | 149 | std::string lower_str = data_type->to_string(*lower_column, row_num, format_options); | 286 | 149 | std::string upper_str = data_type->to_string(*upper_column, row_num, format_options); | 287 | 149 | ++row_num; | 288 | 149 | lower_val.SetString(lower_str.data(), | 289 | 149 | static_cast<rapidjson::SizeType>(lower_str.size()), allocator); | 290 | 149 | upper_val.SetString(upper_str.data(), | 291 | 149 | static_cast<rapidjson::SizeType>(upper_str.size()), allocator); | 292 | 149 | } | 293 | 149 | rapidjson::Value bucket_json(rapidjson::kObjectType); | 294 | 149 | bucket_json.AddMember("lower", lower_val, allocator); | 295 | 149 | bucket_json.AddMember("upper", upper_val, allocator); | 296 | 149 | bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator); | 297 | 149 | bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator); | 298 | 149 | bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator); | 299 | | | 300 | 149 | bucket_arr.PushBack(bucket_json, allocator); | 301 | 149 | } | 302 | | | 303 | 69 | doc.AddMember("buckets", bucket_arr, allocator); | 304 | 69 | rapidjson::Writer<rapidjson::StringBuffer> writer(buffer); | 305 | 69 | doc.Accept(writer); | 306 | | | 307 | 69 | return !buckets.empty() && buffer.GetSize() > 0; | 308 | 69 | } |
_ZN5doris17histogram_to_jsonIlEEbRN9rapidjson19GenericStringBufferINS1_4UTF8IcEENS1_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISB_EERKSt10shared_ptrIKNS_9IDataTypeEE Line | Count | Source | 244 | 65 | const DataTypePtr& data_type) { | 245 | 65 | rapidjson::Document doc; | 246 | 65 | doc.SetObject(); | 247 | 65 | rapidjson::Document::AllocatorType& allocator = doc.GetAllocator(); | 248 | | | 249 | 65 | int num_buckets = cast_set<int>(buckets.size()); | 250 | 65 | doc.AddMember("num_buckets", num_buckets, allocator); | 251 | | | 252 | 65 | rapidjson::Value bucket_arr(rapidjson::kArrayType); | 253 | 65 | bucket_arr.Reserve(num_buckets, allocator); | 254 | | | 255 | 65 | std::stringstream ss1; | 256 | 65 | std::stringstream ss2; | 257 | | | 258 | 65 | rapidjson::Value lower_val; | 259 | 65 | rapidjson::Value upper_val; | 260 | | | 261 | | // Convert bucket's lower and upper to 2 columns | 262 | 65 | MutableColumnPtr lower_column = data_type->create_column(); | 263 | 65 | MutableColumnPtr upper_column = data_type->create_column(); | 264 | 212 | for (const auto& bucket : buckets) { | 265 | | // String type is different, it has to pass in length | 266 | | // if it is string type , directly use string value | 267 | 212 | if constexpr (!std::is_same_v<T, std::string>) { | 268 | 212 | lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0); | 269 | 212 | upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0); | 270 | 212 | } | 271 | 212 | } | 272 | 65 | size_t row_num = 0; | 273 | | | 274 | 65 | auto format_options = DataTypeSerDe::get_default_format_options(); | 275 | 65 | auto time_zone = cctz::utc_time_zone(); | 276 | 65 | format_options.timezone = &time_zone; | 277 | | | 278 | 212 | for (const auto& bucket : buckets) { | 279 | | if constexpr (std::is_same_v<T, std::string>) { | 280 | | lower_val.SetString(bucket.lower.data(), | 281 | | static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator); | 282 | | upper_val.SetString(bucket.upper.data(), | 283 | | static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator); | 284 | 212 | } else { | 285 | 212 | std::string lower_str = data_type->to_string(*lower_column, row_num, format_options); | 286 | 212 | std::string upper_str = data_type->to_string(*upper_column, row_num, format_options); | 287 | 212 | ++row_num; | 288 | 212 | lower_val.SetString(lower_str.data(), | 289 | 212 | static_cast<rapidjson::SizeType>(lower_str.size()), allocator); | 290 | 212 | upper_val.SetString(upper_str.data(), | 291 | 212 | static_cast<rapidjson::SizeType>(upper_str.size()), allocator); | 292 | 212 | } | 293 | 212 | rapidjson::Value bucket_json(rapidjson::kObjectType); | 294 | 212 | bucket_json.AddMember("lower", lower_val, allocator); | 295 | 212 | bucket_json.AddMember("upper", upper_val, allocator); | 296 | 212 | bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator); | 297 | 212 | bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator); | 298 | 212 | bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator); | 299 | | | 300 | 212 | bucket_arr.PushBack(bucket_json, allocator); | 301 | 212 | } | 302 | | | 303 | 65 | doc.AddMember("buckets", bucket_arr, allocator); | 304 | 65 | rapidjson::Writer<rapidjson::StringBuffer> writer(buffer); | 305 | 65 | doc.Accept(writer); | 306 | | | 307 | 65 | return !buckets.empty() && buffer.GetSize() > 0; | 308 | 65 | } |
_ZN5doris17histogram_to_jsonInEEbRN9rapidjson19GenericStringBufferINS1_4UTF8IcEENS1_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISB_EERKSt10shared_ptrIKNS_9IDataTypeEE Line | Count | Source | 244 | 48 | const DataTypePtr& data_type) { | 245 | 48 | rapidjson::Document doc; | 246 | 48 | doc.SetObject(); | 247 | 48 | rapidjson::Document::AllocatorType& allocator = doc.GetAllocator(); | 248 | | | 249 | 48 | int num_buckets = cast_set<int>(buckets.size()); | 250 | 48 | doc.AddMember("num_buckets", num_buckets, allocator); | 251 | | | 252 | 48 | rapidjson::Value bucket_arr(rapidjson::kArrayType); | 253 | 48 | bucket_arr.Reserve(num_buckets, allocator); | 254 | | | 255 | 48 | std::stringstream ss1; | 256 | 48 | std::stringstream ss2; | 257 | | | 258 | 48 | rapidjson::Value lower_val; | 259 | 48 | rapidjson::Value upper_val; | 260 | | | 261 | | // Convert bucket's lower and upper to 2 columns | 262 | 48 | MutableColumnPtr lower_column = data_type->create_column(); | 263 | 48 | MutableColumnPtr upper_column = data_type->create_column(); | 264 | 141 | for (const auto& bucket : buckets) { | 265 | | // String type is different, it has to pass in length | 266 | | // if it is string type , directly use string value | 267 | 141 | if constexpr (!std::is_same_v<T, std::string>) { | 268 | 141 | lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0); | 269 | 141 | upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0); | 270 | 141 | } | 271 | 141 | } | 272 | 48 | size_t row_num = 0; | 273 | | | 274 | 48 | auto format_options = DataTypeSerDe::get_default_format_options(); | 275 | 48 | auto time_zone = cctz::utc_time_zone(); | 276 | 48 | format_options.timezone = &time_zone; | 277 | | | 278 | 141 | for (const auto& bucket : buckets) { | 279 | | if constexpr (std::is_same_v<T, std::string>) { | 280 | | lower_val.SetString(bucket.lower.data(), | 281 | | static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator); | 282 | | upper_val.SetString(bucket.upper.data(), | 283 | | static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator); | 284 | 141 | } else { | 285 | 141 | std::string lower_str = data_type->to_string(*lower_column, row_num, format_options); | 286 | 141 | std::string upper_str = data_type->to_string(*upper_column, row_num, format_options); | 287 | 141 | ++row_num; | 288 | 141 | lower_val.SetString(lower_str.data(), | 289 | 141 | static_cast<rapidjson::SizeType>(lower_str.size()), allocator); | 290 | 141 | upper_val.SetString(upper_str.data(), | 291 | 141 | static_cast<rapidjson::SizeType>(upper_str.size()), allocator); | 292 | 141 | } | 293 | 141 | rapidjson::Value bucket_json(rapidjson::kObjectType); | 294 | 141 | bucket_json.AddMember("lower", lower_val, allocator); | 295 | 141 | bucket_json.AddMember("upper", upper_val, allocator); | 296 | 141 | bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator); | 297 | 141 | bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator); | 298 | 141 | bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator); | 299 | | | 300 | 141 | bucket_arr.PushBack(bucket_json, allocator); | 301 | 141 | } | 302 | | | 303 | 48 | doc.AddMember("buckets", bucket_arr, allocator); | 304 | 48 | rapidjson::Writer<rapidjson::StringBuffer> writer(buffer); | 305 | 48 | doc.Accept(writer); | 306 | | | 307 | 48 | return !buckets.empty() && buffer.GetSize() > 0; | 308 | 48 | } |
_ZN5doris17histogram_to_jsonIfEEbRN9rapidjson19GenericStringBufferINS1_4UTF8IcEENS1_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISB_EERKSt10shared_ptrIKNS_9IDataTypeEE Line | Count | Source | 244 | 46 | const DataTypePtr& data_type) { | 245 | 46 | rapidjson::Document doc; | 246 | 46 | doc.SetObject(); | 247 | 46 | rapidjson::Document::AllocatorType& allocator = doc.GetAllocator(); | 248 | | | 249 | 46 | int num_buckets = cast_set<int>(buckets.size()); | 250 | 46 | doc.AddMember("num_buckets", num_buckets, allocator); | 251 | | | 252 | 46 | rapidjson::Value bucket_arr(rapidjson::kArrayType); | 253 | 46 | bucket_arr.Reserve(num_buckets, allocator); | 254 | | | 255 | 46 | std::stringstream ss1; | 256 | 46 | std::stringstream ss2; | 257 | | | 258 | 46 | rapidjson::Value lower_val; | 259 | 46 | rapidjson::Value upper_val; | 260 | | | 261 | | // Convert bucket's lower and upper to 2 columns | 262 | 46 | MutableColumnPtr lower_column = data_type->create_column(); | 263 | 46 | MutableColumnPtr upper_column = data_type->create_column(); | 264 | 128 | for (const auto& bucket : buckets) { | 265 | | // String type is different, it has to pass in length | 266 | | // if it is string type , directly use string value | 267 | 128 | if constexpr (!std::is_same_v<T, std::string>) { | 268 | 128 | lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0); | 269 | 128 | upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0); | 270 | 128 | } | 271 | 128 | } | 272 | 46 | size_t row_num = 0; | 273 | | | 274 | 46 | auto format_options = DataTypeSerDe::get_default_format_options(); | 275 | 46 | auto time_zone = cctz::utc_time_zone(); | 276 | 46 | format_options.timezone = &time_zone; | 277 | | | 278 | 128 | for (const auto& bucket : buckets) { | 279 | | if constexpr (std::is_same_v<T, std::string>) { | 280 | | lower_val.SetString(bucket.lower.data(), | 281 | | static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator); | 282 | | upper_val.SetString(bucket.upper.data(), | 283 | | static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator); | 284 | 128 | } else { | 285 | 128 | std::string lower_str = data_type->to_string(*lower_column, row_num, format_options); | 286 | 128 | std::string upper_str = data_type->to_string(*upper_column, row_num, format_options); | 287 | 128 | ++row_num; | 288 | 128 | lower_val.SetString(lower_str.data(), | 289 | 128 | static_cast<rapidjson::SizeType>(lower_str.size()), allocator); | 290 | 128 | upper_val.SetString(upper_str.data(), | 291 | 128 | static_cast<rapidjson::SizeType>(upper_str.size()), allocator); | 292 | 128 | } | 293 | 128 | rapidjson::Value bucket_json(rapidjson::kObjectType); | 294 | 128 | bucket_json.AddMember("lower", lower_val, allocator); | 295 | 128 | bucket_json.AddMember("upper", upper_val, allocator); | 296 | 128 | bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator); | 297 | 128 | bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator); | 298 | 128 | bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator); | 299 | | | 300 | 128 | bucket_arr.PushBack(bucket_json, allocator); | 301 | 128 | } | 302 | | | 303 | 46 | doc.AddMember("buckets", bucket_arr, allocator); | 304 | 46 | rapidjson::Writer<rapidjson::StringBuffer> writer(buffer); | 305 | 46 | doc.Accept(writer); | 306 | | | 307 | 46 | return !buckets.empty() && buffer.GetSize() > 0; | 308 | 46 | } |
_ZN5doris17histogram_to_jsonIdEEbRN9rapidjson19GenericStringBufferINS1_4UTF8IcEENS1_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISB_EERKSt10shared_ptrIKNS_9IDataTypeEE Line | Count | Source | 244 | 46 | const DataTypePtr& data_type) { | 245 | 46 | rapidjson::Document doc; | 246 | 46 | doc.SetObject(); | 247 | 46 | rapidjson::Document::AllocatorType& allocator = doc.GetAllocator(); | 248 | | | 249 | 46 | int num_buckets = cast_set<int>(buckets.size()); | 250 | 46 | doc.AddMember("num_buckets", num_buckets, allocator); | 251 | | | 252 | 46 | rapidjson::Value bucket_arr(rapidjson::kArrayType); | 253 | 46 | bucket_arr.Reserve(num_buckets, allocator); | 254 | | | 255 | 46 | std::stringstream ss1; | 256 | 46 | std::stringstream ss2; | 257 | | | 258 | 46 | rapidjson::Value lower_val; | 259 | 46 | rapidjson::Value upper_val; | 260 | | | 261 | | // Convert bucket's lower and upper to 2 columns | 262 | 46 | MutableColumnPtr lower_column = data_type->create_column(); | 263 | 46 | MutableColumnPtr upper_column = data_type->create_column(); | 264 | 128 | for (const auto& bucket : buckets) { | 265 | | // String type is different, it has to pass in length | 266 | | // if it is string type , directly use string value | 267 | 128 | if constexpr (!std::is_same_v<T, std::string>) { | 268 | 128 | lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0); | 269 | 128 | upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0); | 270 | 128 | } | 271 | 128 | } | 272 | 46 | size_t row_num = 0; | 273 | | | 274 | 46 | auto format_options = DataTypeSerDe::get_default_format_options(); | 275 | 46 | auto time_zone = cctz::utc_time_zone(); | 276 | 46 | format_options.timezone = &time_zone; | 277 | | | 278 | 128 | for (const auto& bucket : buckets) { | 279 | | if constexpr (std::is_same_v<T, std::string>) { | 280 | | lower_val.SetString(bucket.lower.data(), | 281 | | static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator); | 282 | | upper_val.SetString(bucket.upper.data(), | 283 | | static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator); | 284 | 128 | } else { | 285 | 128 | std::string lower_str = data_type->to_string(*lower_column, row_num, format_options); | 286 | 128 | std::string upper_str = data_type->to_string(*upper_column, row_num, format_options); | 287 | 128 | ++row_num; | 288 | 128 | lower_val.SetString(lower_str.data(), | 289 | 128 | static_cast<rapidjson::SizeType>(lower_str.size()), allocator); | 290 | 128 | upper_val.SetString(upper_str.data(), | 291 | 128 | static_cast<rapidjson::SizeType>(upper_str.size()), allocator); | 292 | 128 | } | 293 | 128 | rapidjson::Value bucket_json(rapidjson::kObjectType); | 294 | 128 | bucket_json.AddMember("lower", lower_val, allocator); | 295 | 128 | bucket_json.AddMember("upper", upper_val, allocator); | 296 | 128 | bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator); | 297 | 128 | bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator); | 298 | 128 | bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator); | 299 | | | 300 | 128 | bucket_arr.PushBack(bucket_json, allocator); | 301 | 128 | } | 302 | | | 303 | 46 | doc.AddMember("buckets", bucket_arr, allocator); | 304 | 46 | rapidjson::Writer<rapidjson::StringBuffer> writer(buffer); | 305 | 46 | doc.Accept(writer); | 306 | | | 307 | 46 | return !buckets.empty() && buffer.GetSize() > 0; | 308 | 46 | } |
_ZN5doris17histogram_to_jsonINS_7DecimalIiEEEEbRN9rapidjson19GenericStringBufferINS3_4UTF8IcEENS3_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISD_EERKSt10shared_ptrIKNS_9IDataTypeEE Line | Count | Source | 244 | 49 | const DataTypePtr& data_type) { | 245 | 49 | rapidjson::Document doc; | 246 | 49 | doc.SetObject(); | 247 | 49 | rapidjson::Document::AllocatorType& allocator = doc.GetAllocator(); | 248 | | | 249 | 49 | int num_buckets = cast_set<int>(buckets.size()); | 250 | 49 | doc.AddMember("num_buckets", num_buckets, allocator); | 251 | | | 252 | 49 | rapidjson::Value bucket_arr(rapidjson::kArrayType); | 253 | 49 | bucket_arr.Reserve(num_buckets, allocator); | 254 | | | 255 | 49 | std::stringstream ss1; | 256 | 49 | std::stringstream ss2; | 257 | | | 258 | 49 | rapidjson::Value lower_val; | 259 | 49 | rapidjson::Value upper_val; | 260 | | | 261 | | // Convert bucket's lower and upper to 2 columns | 262 | 49 | MutableColumnPtr lower_column = data_type->create_column(); | 263 | 49 | MutableColumnPtr upper_column = data_type->create_column(); | 264 | 133 | for (const auto& bucket : buckets) { | 265 | | // String type is different, it has to pass in length | 266 | | // if it is string type , directly use string value | 267 | 133 | if constexpr (!std::is_same_v<T, std::string>) { | 268 | 133 | lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0); | 269 | 133 | upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0); | 270 | 133 | } | 271 | 133 | } | 272 | 49 | size_t row_num = 0; | 273 | | | 274 | 49 | auto format_options = DataTypeSerDe::get_default_format_options(); | 275 | 49 | auto time_zone = cctz::utc_time_zone(); | 276 | 49 | format_options.timezone = &time_zone; | 277 | | | 278 | 133 | for (const auto& bucket : buckets) { | 279 | | if constexpr (std::is_same_v<T, std::string>) { | 280 | | lower_val.SetString(bucket.lower.data(), | 281 | | static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator); | 282 | | upper_val.SetString(bucket.upper.data(), | 283 | | static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator); | 284 | 133 | } else { | 285 | 133 | std::string lower_str = data_type->to_string(*lower_column, row_num, format_options); | 286 | 133 | std::string upper_str = data_type->to_string(*upper_column, row_num, format_options); | 287 | 133 | ++row_num; | 288 | 133 | lower_val.SetString(lower_str.data(), | 289 | 133 | static_cast<rapidjson::SizeType>(lower_str.size()), allocator); | 290 | 133 | upper_val.SetString(upper_str.data(), | 291 | 133 | static_cast<rapidjson::SizeType>(upper_str.size()), allocator); | 292 | 133 | } | 293 | 133 | rapidjson::Value bucket_json(rapidjson::kObjectType); | 294 | 133 | bucket_json.AddMember("lower", lower_val, allocator); | 295 | 133 | bucket_json.AddMember("upper", upper_val, allocator); | 296 | 133 | bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator); | 297 | 133 | bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator); | 298 | 133 | bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator); | 299 | | | 300 | 133 | bucket_arr.PushBack(bucket_json, allocator); | 301 | 133 | } | 302 | | | 303 | 49 | doc.AddMember("buckets", bucket_arr, allocator); | 304 | 49 | rapidjson::Writer<rapidjson::StringBuffer> writer(buffer); | 305 | 49 | doc.Accept(writer); | 306 | | | 307 | 49 | return !buckets.empty() && buffer.GetSize() > 0; | 308 | 49 | } |
Unexecuted instantiation: _ZN5doris17histogram_to_jsonINS_7DecimalIlEEEEbRN9rapidjson19GenericStringBufferINS3_4UTF8IcEENS3_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISD_EERKSt10shared_ptrIKNS_9IDataTypeEE Unexecuted instantiation: _ZN5doris17histogram_to_jsonINS_12Decimal128V3EEEbRN9rapidjson19GenericStringBufferINS2_4UTF8IcEENS2_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISC_EERKSt10shared_ptrIKNS_9IDataTypeEE Unexecuted instantiation: _ZN5doris17histogram_to_jsonINS_7DecimalIN4wide7integerILm256EiEEEEEEbRN9rapidjson19GenericStringBufferINS6_4UTF8IcEENS6_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISG_EERKSt10shared_ptrIKNS_9IDataTypeEE _ZN5doris17histogram_to_jsonINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEEbRN9rapidjson19GenericStringBufferINS7_4UTF8IcEENS7_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISH_EERKSt10shared_ptrIKNS_9IDataTypeEE Line | Count | Source | 244 | 105 | const DataTypePtr& data_type) { | 245 | 105 | rapidjson::Document doc; | 246 | 105 | doc.SetObject(); | 247 | 105 | rapidjson::Document::AllocatorType& allocator = doc.GetAllocator(); | 248 | | | 249 | 105 | int num_buckets = cast_set<int>(buckets.size()); | 250 | 105 | doc.AddMember("num_buckets", num_buckets, allocator); | 251 | | | 252 | 105 | rapidjson::Value bucket_arr(rapidjson::kArrayType); | 253 | 105 | bucket_arr.Reserve(num_buckets, allocator); | 254 | | | 255 | 105 | std::stringstream ss1; | 256 | 105 | std::stringstream ss2; | 257 | | | 258 | 105 | rapidjson::Value lower_val; | 259 | 105 | rapidjson::Value upper_val; | 260 | | | 261 | | // Convert bucket's lower and upper to 2 columns | 262 | 105 | MutableColumnPtr lower_column = data_type->create_column(); | 263 | 105 | MutableColumnPtr upper_column = data_type->create_column(); | 264 | 178 | for (const auto& bucket : buckets) { | 265 | | // String type is different, it has to pass in length | 266 | | // if it is string type , directly use string value | 267 | | if constexpr (!std::is_same_v<T, std::string>) { | 268 | | lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0); | 269 | | upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0); | 270 | | } | 271 | 178 | } | 272 | 105 | size_t row_num = 0; | 273 | | | 274 | 105 | auto format_options = DataTypeSerDe::get_default_format_options(); | 275 | 105 | auto time_zone = cctz::utc_time_zone(); | 276 | 105 | format_options.timezone = &time_zone; | 277 | | | 278 | 178 | for (const auto& bucket : buckets) { | 279 | 178 | if constexpr (std::is_same_v<T, std::string>) { | 280 | 178 | lower_val.SetString(bucket.lower.data(), | 281 | 178 | static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator); | 282 | 178 | upper_val.SetString(bucket.upper.data(), | 283 | 178 | static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator); | 284 | | } else { | 285 | | std::string lower_str = data_type->to_string(*lower_column, row_num, format_options); | 286 | | std::string upper_str = data_type->to_string(*upper_column, row_num, format_options); | 287 | | ++row_num; | 288 | | lower_val.SetString(lower_str.data(), | 289 | | static_cast<rapidjson::SizeType>(lower_str.size()), allocator); | 290 | | upper_val.SetString(upper_str.data(), | 291 | | static_cast<rapidjson::SizeType>(upper_str.size()), allocator); | 292 | | } | 293 | 178 | rapidjson::Value bucket_json(rapidjson::kObjectType); | 294 | 178 | bucket_json.AddMember("lower", lower_val, allocator); | 295 | 178 | bucket_json.AddMember("upper", upper_val, allocator); | 296 | 178 | bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator); | 297 | 178 | bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator); | 298 | 178 | bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator); | 299 | | | 300 | 178 | bucket_arr.PushBack(bucket_json, allocator); | 301 | 178 | } | 302 | | | 303 | 105 | doc.AddMember("buckets", bucket_arr, allocator); | 304 | 105 | rapidjson::Writer<rapidjson::StringBuffer> writer(buffer); | 305 | 105 | doc.Accept(writer); | 306 | | | 307 | 105 | return !buckets.empty() && buffer.GetSize() > 0; | 308 | 105 | } |
_ZN5doris17histogram_to_jsonINS_11DateV2ValueINS_15DateV2ValueTypeEEEEEbRN9rapidjson19GenericStringBufferINS4_4UTF8IcEENS4_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISE_EERKSt10shared_ptrIKNS_9IDataTypeEE Line | Count | Source | 244 | 88 | const DataTypePtr& data_type) { | 245 | 88 | rapidjson::Document doc; | 246 | 88 | doc.SetObject(); | 247 | 88 | rapidjson::Document::AllocatorType& allocator = doc.GetAllocator(); | 248 | | | 249 | 88 | int num_buckets = cast_set<int>(buckets.size()); | 250 | 88 | doc.AddMember("num_buckets", num_buckets, allocator); | 251 | | | 252 | 88 | rapidjson::Value bucket_arr(rapidjson::kArrayType); | 253 | 88 | bucket_arr.Reserve(num_buckets, allocator); | 254 | | | 255 | 88 | std::stringstream ss1; | 256 | 88 | std::stringstream ss2; | 257 | | | 258 | 88 | rapidjson::Value lower_val; | 259 | 88 | rapidjson::Value upper_val; | 260 | | | 261 | | // Convert bucket's lower and upper to 2 columns | 262 | 88 | MutableColumnPtr lower_column = data_type->create_column(); | 263 | 88 | MutableColumnPtr upper_column = data_type->create_column(); | 264 | 256 | for (const auto& bucket : buckets) { | 265 | | // String type is different, it has to pass in length | 266 | | // if it is string type , directly use string value | 267 | 256 | if constexpr (!std::is_same_v<T, std::string>) { | 268 | 256 | lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0); | 269 | 256 | upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0); | 270 | 256 | } | 271 | 256 | } | 272 | 88 | size_t row_num = 0; | 273 | | | 274 | 88 | auto format_options = DataTypeSerDe::get_default_format_options(); | 275 | 88 | auto time_zone = cctz::utc_time_zone(); | 276 | 88 | format_options.timezone = &time_zone; | 277 | | | 278 | 256 | for (const auto& bucket : buckets) { | 279 | | if constexpr (std::is_same_v<T, std::string>) { | 280 | | lower_val.SetString(bucket.lower.data(), | 281 | | static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator); | 282 | | upper_val.SetString(bucket.upper.data(), | 283 | | static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator); | 284 | 256 | } else { | 285 | 256 | std::string lower_str = data_type->to_string(*lower_column, row_num, format_options); | 286 | 256 | std::string upper_str = data_type->to_string(*upper_column, row_num, format_options); | 287 | 256 | ++row_num; | 288 | 256 | lower_val.SetString(lower_str.data(), | 289 | 256 | static_cast<rapidjson::SizeType>(lower_str.size()), allocator); | 290 | 256 | upper_val.SetString(upper_str.data(), | 291 | 256 | static_cast<rapidjson::SizeType>(upper_str.size()), allocator); | 292 | 256 | } | 293 | 256 | rapidjson::Value bucket_json(rapidjson::kObjectType); | 294 | 256 | bucket_json.AddMember("lower", lower_val, allocator); | 295 | 256 | bucket_json.AddMember("upper", upper_val, allocator); | 296 | 256 | bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator); | 297 | 256 | bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator); | 298 | 256 | bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator); | 299 | | | 300 | 256 | bucket_arr.PushBack(bucket_json, allocator); | 301 | 256 | } | 302 | | | 303 | 88 | doc.AddMember("buckets", bucket_arr, allocator); | 304 | 88 | rapidjson::Writer<rapidjson::StringBuffer> writer(buffer); | 305 | 88 | doc.Accept(writer); | 306 | | | 307 | 88 | return !buckets.empty() && buffer.GetSize() > 0; | 308 | 88 | } |
_ZN5doris17histogram_to_jsonINS_11DateV2ValueINS_19DateTimeV2ValueTypeEEEEEbRN9rapidjson19GenericStringBufferINS4_4UTF8IcEENS4_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISE_EERKSt10shared_ptrIKNS_9IDataTypeEE Line | Count | Source | 244 | 88 | const DataTypePtr& data_type) { | 245 | 88 | rapidjson::Document doc; | 246 | 88 | doc.SetObject(); | 247 | 88 | rapidjson::Document::AllocatorType& allocator = doc.GetAllocator(); | 248 | | | 249 | 88 | int num_buckets = cast_set<int>(buckets.size()); | 250 | 88 | doc.AddMember("num_buckets", num_buckets, allocator); | 251 | | | 252 | 88 | rapidjson::Value bucket_arr(rapidjson::kArrayType); | 253 | 88 | bucket_arr.Reserve(num_buckets, allocator); | 254 | | | 255 | 88 | std::stringstream ss1; | 256 | 88 | std::stringstream ss2; | 257 | | | 258 | 88 | rapidjson::Value lower_val; | 259 | 88 | rapidjson::Value upper_val; | 260 | | | 261 | | // Convert bucket's lower and upper to 2 columns | 262 | 88 | MutableColumnPtr lower_column = data_type->create_column(); | 263 | 88 | MutableColumnPtr upper_column = data_type->create_column(); | 264 | 255 | for (const auto& bucket : buckets) { | 265 | | // String type is different, it has to pass in length | 266 | | // if it is string type , directly use string value | 267 | 255 | if constexpr (!std::is_same_v<T, std::string>) { | 268 | 255 | lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0); | 269 | 255 | upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0); | 270 | 255 | } | 271 | 255 | } | 272 | 88 | size_t row_num = 0; | 273 | | | 274 | 88 | auto format_options = DataTypeSerDe::get_default_format_options(); | 275 | 88 | auto time_zone = cctz::utc_time_zone(); | 276 | 88 | format_options.timezone = &time_zone; | 277 | | | 278 | 255 | for (const auto& bucket : buckets) { | 279 | | if constexpr (std::is_same_v<T, std::string>) { | 280 | | lower_val.SetString(bucket.lower.data(), | 281 | | static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator); | 282 | | upper_val.SetString(bucket.upper.data(), | 283 | | static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator); | 284 | 255 | } else { | 285 | 255 | std::string lower_str = data_type->to_string(*lower_column, row_num, format_options); | 286 | 255 | std::string upper_str = data_type->to_string(*upper_column, row_num, format_options); | 287 | 255 | ++row_num; | 288 | 255 | lower_val.SetString(lower_str.data(), | 289 | 255 | static_cast<rapidjson::SizeType>(lower_str.size()), allocator); | 290 | 255 | upper_val.SetString(upper_str.data(), | 291 | 255 | static_cast<rapidjson::SizeType>(upper_str.size()), allocator); | 292 | 255 | } | 293 | 255 | rapidjson::Value bucket_json(rapidjson::kObjectType); | 294 | 255 | bucket_json.AddMember("lower", lower_val, allocator); | 295 | 255 | bucket_json.AddMember("upper", upper_val, allocator); | 296 | 255 | bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator); | 297 | 255 | bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator); | 298 | 255 | bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator); | 299 | | | 300 | 255 | bucket_arr.PushBack(bucket_json, allocator); | 301 | 255 | } | 302 | | | 303 | 88 | doc.AddMember("buckets", bucket_arr, allocator); | 304 | 88 | rapidjson::Writer<rapidjson::StringBuffer> writer(buffer); | 305 | 88 | doc.Accept(writer); | 306 | | | 307 | 88 | return !buckets.empty() && buffer.GetSize() > 0; | 308 | 88 | } |
|
309 | | } // namespace doris |