be/src/exec/common/histogram_helpers.hpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #pragma once |
19 | | |
20 | | #include <rapidjson/document.h> |
21 | | #include <rapidjson/prettywriter.h> |
22 | | #include <rapidjson/stringbuffer.h> |
23 | | |
24 | | #include <boost/dynamic_bitset.hpp> |
25 | | |
26 | | #include "common/cast_set.h" |
27 | | #include "core/data_type/data_type_decimal.h" |
28 | | #include "util/io_helper.h" |
29 | | |
30 | | namespace doris { |
31 | | template <typename T> |
32 | | struct Bucket { |
33 | | public: |
34 | | Bucket() = default; |
35 | | Bucket(T lower, T upper, size_t ndv, size_t count, size_t pre_sum) |
36 | 1.85k | : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {}_ZN5doris6BucketINS_7DecimalIiEEEC2ES2_S2_mmm Line | Count | Source | 36 | 133 | : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {} |
Unexecuted instantiation: _ZN5doris6BucketINS_7DecimalIlEEEC2ES2_S2_mmm Unexecuted instantiation: _ZN5doris6BucketINS_12Decimal128V3EEC2ES1_S1_mmm Unexecuted instantiation: _ZN5doris6BucketINS_7DecimalIN4wide7integerILm256EiEEEEEC2ES5_S5_mmm _ZN5doris6BucketINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEC2ES6_S6_mmm Line | Count | Source | 36 | 178 | : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {} |
_ZN5doris6BucketINS_11DateV2ValueINS_15DateV2ValueTypeEEEEC2ES3_S3_mmm Line | Count | Source | 36 | 256 | : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {} |
_ZN5doris6BucketINS_11DateV2ValueINS_19DateTimeV2ValueTypeEEEEC2ES3_S3_mmm Line | Count | Source | 36 | 255 | : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {} |
_ZN5doris6BucketIhEC2Ehhmmm Line | Count | Source | 36 | 2 | : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {} |
_ZN5doris6BucketIaEC2Eaammm Line | Count | Source | 36 | 129 | : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {} |
_ZN5doris6BucketIsEC2Essmmm Line | Count | Source | 36 | 132 | : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {} |
_ZN5doris6BucketIiEC2Eiimmm Line | Count | Source | 36 | 163 | : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {} |
_ZN5doris6BucketIlEC2Ellmmm Line | Count | Source | 36 | 212 | : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {} |
_ZN5doris6BucketInEC2Ennmmm Line | Count | Source | 36 | 141 | : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {} |
_ZN5doris6BucketIfEC2Effmmm Line | Count | Source | 36 | 128 | : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {} |
_ZN5doris6BucketIdEC2Eddmmm Line | Count | Source | 36 | 128 | : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {} |
|
37 | | |
38 | | T lower; |
39 | | T upper; |
40 | | size_t ndv; |
41 | | size_t count; |
42 | | size_t pre_sum; |
43 | | }; |
44 | | |
45 | | /** |
46 | | * Checks if it is possible to assign the provided value_map to the given |
47 | | * number of buckets such that no bucket has a size larger than max_bucket_size. |
48 | | * |
49 | | * @param value_map A mapping of values to their counts. |
50 | | * @param max_bucket_size The maximum size that any bucket is allowed to have. |
51 | | * @param num_buckets The number of buckets that we want to assign values to. |
52 | | * |
53 | | * @return true if the values can be assigned to the buckets, false otherwise. |
54 | | */ |
55 | | template <typename T> |
56 | | bool can_assign_into_buckets(const std::map<T, size_t>& value_map, const size_t max_bucket_size, |
57 | 505 | const size_t num_buckets) { |
58 | 505 | if (value_map.empty()) { |
59 | 1 | return false; |
60 | 504 | }; |
61 | | |
62 | 504 | size_t used_buckets = 1; |
63 | 504 | size_t current_bucket_size = 0; |
64 | | |
65 | 30.1k | for (const auto& [value, count] : value_map) { |
66 | 30.1k | current_bucket_size += count; |
67 | | |
68 | | // If adding the current value to the current bucket would exceed max_bucket_size, |
69 | | // then we start a new bucket. |
70 | 30.1k | if (current_bucket_size > max_bucket_size) { |
71 | 1.14k | ++used_buckets; |
72 | 1.14k | current_bucket_size = count; |
73 | 1.14k | } |
74 | | |
75 | | // If we have used more buckets than num_buckets, we cannot assign the values to buckets. |
76 | 30.1k | if (used_buckets > num_buckets) { |
77 | 222 | return false; |
78 | 222 | } |
79 | 30.1k | } |
80 | | |
81 | 282 | return true; |
82 | 504 | } Unexecuted instantiation: _ZN5doris23can_assign_into_bucketsIhEEbRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEmm _ZN5doris23can_assign_into_bucketsIaEEbRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEmm Line | Count | Source | 57 | 27 | const size_t num_buckets) { | 58 | 27 | if (value_map.empty()) { | 59 | 0 | return false; | 60 | 27 | }; | 61 | | | 62 | 27 | size_t used_buckets = 1; | 63 | 27 | size_t current_bucket_size = 0; | 64 | | | 65 | 1.24k | for (const auto& [value, count] : value_map) { | 66 | 1.24k | current_bucket_size += count; | 67 | | | 68 | | // If adding the current value to the current bucket would exceed max_bucket_size, | 69 | | // then we start a new bucket. | 70 | 1.24k | if (current_bucket_size > max_bucket_size) { | 71 | 70 | ++used_buckets; | 72 | 70 | current_bucket_size = count; | 73 | 70 | } | 74 | | | 75 | | // If we have used more buckets than num_buckets, we cannot assign the values to buckets. | 76 | 1.24k | if (used_buckets > num_buckets) { | 77 | 11 | return false; | 78 | 11 | } | 79 | 1.24k | } | 80 | | | 81 | 16 | return true; | 82 | 27 | } |
_ZN5doris23can_assign_into_bucketsIsEEbRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEmm Line | Count | Source | 57 | 40 | const size_t num_buckets) { | 58 | 40 | if (value_map.empty()) { | 59 | 0 | return false; | 60 | 40 | }; | 61 | | | 62 | 40 | size_t used_buckets = 1; | 63 | 40 | size_t current_bucket_size = 0; | 64 | | | 65 | 1.34k | for (const auto& [value, count] : value_map) { | 66 | 1.34k | current_bucket_size += count; | 67 | | | 68 | | // If adding the current value to the current bucket would exceed max_bucket_size, | 69 | | // then we start a new bucket. | 70 | 1.34k | if (current_bucket_size > max_bucket_size) { | 71 | 88 | ++used_buckets; | 72 | 88 | current_bucket_size = count; | 73 | 88 | } | 74 | | | 75 | | // If we have used more buckets than num_buckets, we cannot assign the values to buckets. | 76 | 1.34k | if (used_buckets > num_buckets) { | 77 | 18 | return false; | 78 | 18 | } | 79 | 1.34k | } | 80 | | | 81 | 22 | return true; | 82 | 40 | } |
_ZN5doris23can_assign_into_bucketsIiEEbRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEmm Line | Count | Source | 57 | 69 | const size_t num_buckets) { | 58 | 69 | if (value_map.empty()) { | 59 | 1 | return false; | 60 | 68 | }; | 61 | | | 62 | 68 | size_t used_buckets = 1; | 63 | 68 | size_t current_bucket_size = 0; | 64 | | | 65 | 1.40k | for (const auto& [value, count] : value_map) { | 66 | 1.40k | current_bucket_size += count; | 67 | | | 68 | | // If adding the current value to the current bucket would exceed max_bucket_size, | 69 | | // then we start a new bucket. | 70 | 1.40k | if (current_bucket_size > max_bucket_size) { | 71 | 169 | ++used_buckets; | 72 | 169 | current_bucket_size = count; | 73 | 169 | } | 74 | | | 75 | | // If we have used more buckets than num_buckets, we cannot assign the values to buckets. | 76 | 1.40k | if (used_buckets > num_buckets) { | 77 | 30 | return false; | 78 | 30 | } | 79 | 1.40k | } | 80 | | | 81 | 38 | return true; | 82 | 68 | } |
_ZN5doris23can_assign_into_bucketsIlEEbRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEmm Line | Count | Source | 57 | 34 | const size_t num_buckets) { | 58 | 34 | if (value_map.empty()) { | 59 | 0 | return false; | 60 | 34 | }; | 61 | | | 62 | 34 | size_t used_buckets = 1; | 63 | 34 | size_t current_bucket_size = 0; | 64 | | | 65 | 1.29k | for (const auto& [value, count] : value_map) { | 66 | 1.29k | current_bucket_size += count; | 67 | | | 68 | | // If adding the current value to the current bucket would exceed max_bucket_size, | 69 | | // then we start a new bucket. | 70 | 1.29k | if (current_bucket_size > max_bucket_size) { | 71 | 90 | ++used_buckets; | 72 | 90 | current_bucket_size = count; | 73 | 90 | } | 74 | | | 75 | | // If we have used more buckets than num_buckets, we cannot assign the values to buckets. | 76 | 1.29k | if (used_buckets > num_buckets) { | 77 | 17 | return false; | 78 | 17 | } | 79 | 1.29k | } | 80 | | | 81 | 17 | return true; | 82 | 34 | } |
_ZN5doris23can_assign_into_bucketsInEEbRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEmm Line | Count | Source | 57 | 32 | const size_t num_buckets) { | 58 | 32 | if (value_map.empty()) { | 59 | 0 | return false; | 60 | 32 | }; | 61 | | | 62 | 32 | size_t used_buckets = 1; | 63 | 32 | size_t current_bucket_size = 0; | 64 | | | 65 | 1.28k | for (const auto& [value, count] : value_map) { | 66 | 1.28k | current_bucket_size += count; | 67 | | | 68 | | // If adding the current value to the current bucket would exceed max_bucket_size, | 69 | | // then we start a new bucket. | 70 | 1.28k | if (current_bucket_size > max_bucket_size) { | 71 | 96 | ++used_buckets; | 72 | 96 | current_bucket_size = count; | 73 | 96 | } | 74 | | | 75 | | // If we have used more buckets than num_buckets, we cannot assign the values to buckets. | 76 | 1.28k | if (used_buckets > num_buckets) { | 77 | 15 | return false; | 78 | 15 | } | 79 | 1.28k | } | 80 | | | 81 | 17 | return true; | 82 | 32 | } |
_ZN5doris23can_assign_into_bucketsIfEEbRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEmm Line | Count | Source | 57 | 31 | const size_t num_buckets) { | 58 | 31 | if (value_map.empty()) { | 59 | 0 | return false; | 60 | 31 | }; | 61 | | | 62 | 31 | size_t used_buckets = 1; | 63 | 31 | size_t current_bucket_size = 0; | 64 | | | 65 | 1.25k | for (const auto& [value, count] : value_map) { | 66 | 1.25k | current_bucket_size += count; | 67 | | | 68 | | // If adding the current value to the current bucket would exceed max_bucket_size, | 69 | | // then we start a new bucket. | 70 | 1.25k | if (current_bucket_size > max_bucket_size) { | 71 | 78 | ++used_buckets; | 72 | 78 | current_bucket_size = count; | 73 | 78 | } | 74 | | | 75 | | // If we have used more buckets than num_buckets, we cannot assign the values to buckets. | 76 | 1.25k | if (used_buckets > num_buckets) { | 77 | 15 | return false; | 78 | 15 | } | 79 | 1.25k | } | 80 | | | 81 | 16 | return true; | 82 | 31 | } |
_ZN5doris23can_assign_into_bucketsIdEEbRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEmm Line | Count | Source | 57 | 31 | const size_t num_buckets) { | 58 | 31 | if (value_map.empty()) { | 59 | 0 | return false; | 60 | 31 | }; | 61 | | | 62 | 31 | size_t used_buckets = 1; | 63 | 31 | size_t current_bucket_size = 0; | 64 | | | 65 | 1.25k | for (const auto& [value, count] : value_map) { | 66 | 1.25k | current_bucket_size += count; | 67 | | | 68 | | // If adding the current value to the current bucket would exceed max_bucket_size, | 69 | | // then we start a new bucket. | 70 | 1.25k | if (current_bucket_size > max_bucket_size) { | 71 | 78 | ++used_buckets; | 72 | 78 | current_bucket_size = count; | 73 | 78 | } | 74 | | | 75 | | // If we have used more buckets than num_buckets, we cannot assign the values to buckets. | 76 | 1.25k | if (used_buckets > num_buckets) { | 77 | 15 | return false; | 78 | 15 | } | 79 | 1.25k | } | 80 | | | 81 | 16 | return true; | 82 | 31 | } |
_ZN5doris23can_assign_into_bucketsINS_7DecimalIiEEEEbRKSt3mapIT_mSt4lessIS4_ESaISt4pairIKS4_mEEEmm Line | Count | Source | 57 | 39 | const size_t num_buckets) { | 58 | 39 | if (value_map.empty()) { | 59 | 0 | return false; | 60 | 39 | }; | 61 | | | 62 | 39 | size_t used_buckets = 1; | 63 | 39 | size_t current_bucket_size = 0; | 64 | | | 65 | 161 | for (const auto& [value, count] : value_map) { | 66 | 161 | current_bucket_size += count; | 67 | | | 68 | | // If adding the current value to the current bucket would exceed max_bucket_size, | 69 | | // then we start a new bucket. | 70 | 161 | if (current_bucket_size > max_bucket_size) { | 71 | 54 | ++used_buckets; | 72 | 54 | current_bucket_size = count; | 73 | 54 | } | 74 | | | 75 | | // If we have used more buckets than num_buckets, we cannot assign the values to buckets. | 76 | 161 | if (used_buckets > num_buckets) { | 77 | 17 | return false; | 78 | 17 | } | 79 | 161 | } | 80 | | | 81 | 22 | return true; | 82 | 39 | } |
Unexecuted instantiation: _ZN5doris23can_assign_into_bucketsINS_7DecimalIlEEEEbRKSt3mapIT_mSt4lessIS4_ESaISt4pairIKS4_mEEEmm Unexecuted instantiation: _ZN5doris23can_assign_into_bucketsINS_12Decimal128V3EEEbRKSt3mapIT_mSt4lessIS3_ESaISt4pairIKS3_mEEEmm Unexecuted instantiation: _ZN5doris23can_assign_into_bucketsINS_7DecimalIN4wide7integerILm256EiEEEEEEbRKSt3mapIT_mSt4lessIS7_ESaISt4pairIKS7_mEEEmm _ZN5doris23can_assign_into_bucketsINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEEbRKSt3mapIT_mSt4lessIS8_ESaISt4pairIKS8_mEEEmm Line | Count | Source | 57 | 100 | const size_t num_buckets) { | 58 | 100 | if (value_map.empty()) { | 59 | 0 | return false; | 60 | 100 | }; | 61 | | | 62 | 100 | size_t used_buckets = 1; | 63 | 100 | size_t current_bucket_size = 0; | 64 | | | 65 | 18.3k | for (const auto& [value, count] : value_map) { | 66 | 18.3k | current_bucket_size += count; | 67 | | | 68 | | // If adding the current value to the current bucket would exceed max_bucket_size, | 69 | | // then we start a new bucket. | 70 | 18.3k | if (current_bucket_size > max_bucket_size) { | 71 | 197 | ++used_buckets; | 72 | 197 | current_bucket_size = count; | 73 | 197 | } | 74 | | | 75 | | // If we have used more buckets than num_buckets, we cannot assign the values to buckets. | 76 | 18.3k | if (used_buckets > num_buckets) { | 77 | 38 | return false; | 78 | 38 | } | 79 | 18.3k | } | 80 | | | 81 | 62 | return true; | 82 | 100 | } |
_ZN5doris23can_assign_into_bucketsINS_11DateV2ValueINS_15DateV2ValueTypeEEEEEbRKSt3mapIT_mSt4lessIS5_ESaISt4pairIKS5_mEEEmm Line | Count | Source | 57 | 51 | const size_t num_buckets) { | 58 | 51 | if (value_map.empty()) { | 59 | 0 | return false; | 60 | 51 | }; | 61 | | | 62 | 51 | size_t used_buckets = 1; | 63 | 51 | size_t current_bucket_size = 0; | 64 | | | 65 | 1.31k | for (const auto& [value, count] : value_map) { | 66 | 1.31k | current_bucket_size += count; | 67 | | | 68 | | // If adding the current value to the current bucket would exceed max_bucket_size, | 69 | | // then we start a new bucket. | 70 | 1.31k | if (current_bucket_size > max_bucket_size) { | 71 | 114 | ++used_buckets; | 72 | 114 | current_bucket_size = count; | 73 | 114 | } | 74 | | | 75 | | // If we have used more buckets than num_buckets, we cannot assign the values to buckets. | 76 | 1.31k | if (used_buckets > num_buckets) { | 77 | 23 | return false; | 78 | 23 | } | 79 | 1.31k | } | 80 | | | 81 | 28 | return true; | 82 | 51 | } |
_ZN5doris23can_assign_into_bucketsINS_11DateV2ValueINS_19DateTimeV2ValueTypeEEEEEbRKSt3mapIT_mSt4lessIS5_ESaISt4pairIKS5_mEEEmm Line | Count | Source | 57 | 51 | const size_t num_buckets) { | 58 | 51 | if (value_map.empty()) { | 59 | 0 | return false; | 60 | 51 | }; | 61 | | | 62 | 51 | size_t used_buckets = 1; | 63 | 51 | size_t current_bucket_size = 0; | 64 | | | 65 | 1.31k | for (const auto& [value, count] : value_map) { | 66 | 1.31k | current_bucket_size += count; | 67 | | | 68 | | // If adding the current value to the current bucket would exceed max_bucket_size, | 69 | | // then we start a new bucket. | 70 | 1.31k | if (current_bucket_size > max_bucket_size) { | 71 | 115 | ++used_buckets; | 72 | 115 | current_bucket_size = count; | 73 | 115 | } | 74 | | | 75 | | // If we have used more buckets than num_buckets, we cannot assign the values to buckets. | 76 | 1.31k | if (used_buckets > num_buckets) { | 77 | 23 | return false; | 78 | 23 | } | 79 | 1.31k | } | 80 | | | 81 | 28 | return true; | 82 | 51 | } |
|
83 | | |
84 | | /** |
85 | | * Calculates the maximum number of values that can fit into each bucket given a set of values |
86 | | * and the desired number of buckets. |
87 | | * |
88 | | * @tparam T the type of the values in the value map |
89 | | * @param value_map the map of values and their counts |
90 | | * @param num_buckets the desired number of buckets |
91 | | * @return the maximum number of values that can fit into each bucket |
92 | | */ |
93 | | template <typename T> |
94 | 647 | size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) { |
95 | | // Ensure that the value map is not empty |
96 | 647 | assert(!value_map.empty()); |
97 | | |
98 | | // Calculate the total number of values in the map using std::accumulate() |
99 | 647 | size_t total_values = 0; |
100 | 5.76k | for (const auto& [value, count] : value_map) { |
101 | 5.76k | total_values += count; |
102 | 5.76k | } |
103 | | |
104 | | // If there is only one bucket, then all values will be assigned to that bucket |
105 | 647 | if (num_buckets == 1) { |
106 | 4 | return total_values; |
107 | 4 | } |
108 | | |
109 | | // To calculate the maximum value count in each bucket, we first calculate a conservative upper |
110 | | // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed |
111 | | // the actual maximum value count, but it does not underestimate it. The subsequent binary search |
112 | | // algorithm will approach the actual maximum value count. |
113 | 643 | size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1; |
114 | | |
115 | | // Initialize the lower bound to 0 |
116 | 643 | size_t lower_bucket_values = 0; |
117 | | |
118 | | // Perform a binary search to find the maximum number of values that can fit into each bucket |
119 | 643 | int search_step = 0; |
120 | 643 | const int max_search_steps = |
121 | 643 | 10; // Limit the number of search steps to avoid excessive iteration |
122 | | |
123 | 1.13k | while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) { |
124 | | // Calculate the midpoint of the upper and lower bounds |
125 | 491 | const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2; |
126 | | |
127 | | // Check if the given number of values can be assigned to the desired number of buckets |
128 | 491 | if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) { |
129 | | // If it can, then set the upper bound to the midpoint |
130 | 275 | upper_bucket_values = bucket_values; |
131 | 275 | } else { |
132 | | // If it can't, then set the lower bound to the midpoint |
133 | 216 | lower_bucket_values = bucket_values; |
134 | 216 | } |
135 | | // Increment the search step counter |
136 | 491 | ++search_step; |
137 | 491 | } |
138 | | |
139 | 643 | return upper_bucket_values; |
140 | 647 | } _ZN5doris27calculate_bucket_max_valuesIhEEmRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEm Line | Count | Source | 94 | 1 | size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) { | 95 | | // Ensure that the value map is not empty | 96 | 1 | assert(!value_map.empty()); | 97 | | | 98 | | // Calculate the total number of values in the map using std::accumulate() | 99 | 1 | size_t total_values = 0; | 100 | 2 | for (const auto& [value, count] : value_map) { | 101 | 2 | total_values += count; | 102 | 2 | } | 103 | | | 104 | | // If there is only one bucket, then all values will be assigned to that bucket | 105 | 1 | if (num_buckets == 1) { | 106 | 0 | return total_values; | 107 | 0 | } | 108 | | | 109 | | // To calculate the maximum value count in each bucket, we first calculate a conservative upper | 110 | | // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed | 111 | | // the actual maximum value count, but it does not underestimate it. The subsequent binary search | 112 | | // algorithm will approach the actual maximum value count. | 113 | 1 | size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1; | 114 | | | 115 | | // Initialize the lower bound to 0 | 116 | 1 | size_t lower_bucket_values = 0; | 117 | | | 118 | | // Perform a binary search to find the maximum number of values that can fit into each bucket | 119 | 1 | int search_step = 0; | 120 | 1 | const int max_search_steps = | 121 | 1 | 10; // Limit the number of search steps to avoid excessive iteration | 122 | | | 123 | 1 | while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) { | 124 | | // Calculate the midpoint of the upper and lower bounds | 125 | 0 | const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2; | 126 | | | 127 | | // Check if the given number of values can be assigned to the desired number of buckets | 128 | 0 | if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) { | 129 | | // If it can, then set the upper bound to the midpoint | 130 | 0 | upper_bucket_values = bucket_values; | 131 | 0 | } else { | 132 | | // If it can't, then set the lower bound to the midpoint | 133 | 0 | lower_bucket_values = bucket_values; | 134 | 0 | } | 135 | | // Increment the search step counter | 136 | 0 | ++search_step; | 137 | 0 | } | 138 | | | 139 | 1 | return upper_bucket_values; | 140 | 1 | } |
_ZN5doris27calculate_bucket_max_valuesIaEEmRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEm Line | Count | Source | 94 | 42 | size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) { | 95 | | // Ensure that the value map is not empty | 96 | 42 | assert(!value_map.empty()); | 97 | | | 98 | | // Calculate the total number of values in the map using std::accumulate() | 99 | 42 | size_t total_values = 0; | 100 | 340 | for (const auto& [value, count] : value_map) { | 101 | 340 | total_values += count; | 102 | 340 | } | 103 | | | 104 | | // If there is only one bucket, then all values will be assigned to that bucket | 105 | 42 | if (num_buckets == 1) { | 106 | 1 | return total_values; | 107 | 1 | } | 108 | | | 109 | | // To calculate the maximum value count in each bucket, we first calculate a conservative upper | 110 | | // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed | 111 | | // the actual maximum value count, but it does not underestimate it. The subsequent binary search | 112 | | // algorithm will approach the actual maximum value count. | 113 | 41 | size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1; | 114 | | | 115 | | // Initialize the lower bound to 0 | 116 | 41 | size_t lower_bucket_values = 0; | 117 | | | 118 | | // Perform a binary search to find the maximum number of values that can fit into each bucket | 119 | 41 | int search_step = 0; | 120 | 41 | const int max_search_steps = | 121 | 41 | 10; // Limit the number of search steps to avoid excessive iteration | 122 | | | 123 | 68 | while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) { | 124 | | // Calculate the midpoint of the upper and lower bounds | 125 | 27 | const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2; | 126 | | | 127 | | // Check if the given number of values can be assigned to the desired number of buckets | 128 | 27 | if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) { | 129 | | // If it can, then set the upper bound to the midpoint | 130 | 16 | upper_bucket_values = bucket_values; | 131 | 16 | } else { | 132 | | // If it can't, then set the lower bound to the midpoint | 133 | 11 | lower_bucket_values = bucket_values; | 134 | 11 | } | 135 | | // Increment the search step counter | 136 | 27 | ++search_step; | 137 | 27 | } | 138 | | | 139 | 41 | return upper_bucket_values; | 140 | 42 | } |
_ZN5doris27calculate_bucket_max_valuesIsEEmRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEm Line | Count | Source | 94 | 43 | size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) { | 95 | | // Ensure that the value map is not empty | 96 | 43 | assert(!value_map.empty()); | 97 | | | 98 | | // Calculate the total number of values in the map using std::accumulate() | 99 | 43 | size_t total_values = 0; | 100 | 349 | for (const auto& [value, count] : value_map) { | 101 | 349 | total_values += count; | 102 | 349 | } | 103 | | | 104 | | // If there is only one bucket, then all values will be assigned to that bucket | 105 | 43 | if (num_buckets == 1) { | 106 | 0 | return total_values; | 107 | 0 | } | 108 | | | 109 | | // To calculate the maximum value count in each bucket, we first calculate a conservative upper | 110 | | // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed | 111 | | // the actual maximum value count, but it does not underestimate it. The subsequent binary search | 112 | | // algorithm will approach the actual maximum value count. | 113 | 43 | size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1; | 114 | | | 115 | | // Initialize the lower bound to 0 | 116 | 43 | size_t lower_bucket_values = 0; | 117 | | | 118 | | // Perform a binary search to find the maximum number of values that can fit into each bucket | 119 | 43 | int search_step = 0; | 120 | 43 | const int max_search_steps = | 121 | 43 | 10; // Limit the number of search steps to avoid excessive iteration | 122 | | | 123 | 83 | while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) { | 124 | | // Calculate the midpoint of the upper and lower bounds | 125 | 40 | const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2; | 126 | | | 127 | | // Check if the given number of values can be assigned to the desired number of buckets | 128 | 40 | if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) { | 129 | | // If it can, then set the upper bound to the midpoint | 130 | 22 | upper_bucket_values = bucket_values; | 131 | 22 | } else { | 132 | | // If it can't, then set the lower bound to the midpoint | 133 | 18 | lower_bucket_values = bucket_values; | 134 | 18 | } | 135 | | // Increment the search step counter | 136 | 40 | ++search_step; | 137 | 40 | } | 138 | | | 139 | 43 | return upper_bucket_values; | 140 | 43 | } |
_ZN5doris27calculate_bucket_max_valuesIiEEmRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEm Line | Count | Source | 94 | 71 | size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) { | 95 | | // Ensure that the value map is not empty | 96 | 71 | assert(!value_map.empty()); | 97 | | | 98 | | // Calculate the total number of values in the map using std::accumulate() | 99 | 71 | size_t total_values = 0; | 100 | 375 | for (const auto& [value, count] : value_map) { | 101 | 375 | total_values += count; | 102 | 375 | } | 103 | | | 104 | | // If there is only one bucket, then all values will be assigned to that bucket | 105 | 71 | if (num_buckets == 1) { | 106 | 3 | return total_values; | 107 | 3 | } | 108 | | | 109 | | // To calculate the maximum value count in each bucket, we first calculate a conservative upper | 110 | | // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed | 111 | | // the actual maximum value count, but it does not underestimate it. The subsequent binary search | 112 | | // algorithm will approach the actual maximum value count. | 113 | 68 | size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1; | 114 | | | 115 | | // Initialize the lower bound to 0 | 116 | 68 | size_t lower_bucket_values = 0; | 117 | | | 118 | | // Perform a binary search to find the maximum number of values that can fit into each bucket | 119 | 68 | int search_step = 0; | 120 | 68 | const int max_search_steps = | 121 | 68 | 10; // Limit the number of search steps to avoid excessive iteration | 122 | | | 123 | 123 | while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) { | 124 | | // Calculate the midpoint of the upper and lower bounds | 125 | 55 | const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2; | 126 | | | 127 | | // Check if the given number of values can be assigned to the desired number of buckets | 128 | 55 | if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) { | 129 | | // If it can, then set the upper bound to the midpoint | 130 | 31 | upper_bucket_values = bucket_values; | 131 | 31 | } else { | 132 | | // If it can't, then set the lower bound to the midpoint | 133 | 24 | lower_bucket_values = bucket_values; | 134 | 24 | } | 135 | | // Increment the search step counter | 136 | 55 | ++search_step; | 137 | 55 | } | 138 | | | 139 | 68 | return upper_bucket_values; | 140 | 71 | } |
_ZN5doris27calculate_bucket_max_valuesIlEEmRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEm Line | Count | Source | 94 | 59 | size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) { | 95 | | // Ensure that the value map is not empty | 96 | 59 | assert(!value_map.empty()); | 97 | | | 98 | | // Calculate the total number of values in the map using std::accumulate() | 99 | 59 | size_t total_values = 0; | 100 | 418 | for (const auto& [value, count] : value_map) { | 101 | 418 | total_values += count; | 102 | 418 | } | 103 | | | 104 | | // If there is only one bucket, then all values will be assigned to that bucket | 105 | 59 | if (num_buckets == 1) { | 106 | 0 | return total_values; | 107 | 0 | } | 108 | | | 109 | | // To calculate the maximum value count in each bucket, we first calculate a conservative upper | 110 | | // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed | 111 | | // the actual maximum value count, but it does not underestimate it. The subsequent binary search | 112 | | // algorithm will approach the actual maximum value count. | 113 | 59 | size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1; | 114 | | | 115 | | // Initialize the lower bound to 0 | 116 | 59 | size_t lower_bucket_values = 0; | 117 | | | 118 | | // Perform a binary search to find the maximum number of values that can fit into each bucket | 119 | 59 | int search_step = 0; | 120 | 59 | const int max_search_steps = | 121 | 59 | 10; // Limit the number of search steps to avoid excessive iteration | 122 | | | 123 | 93 | while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) { | 124 | | // Calculate the midpoint of the upper and lower bounds | 125 | 34 | const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2; | 126 | | | 127 | | // Check if the given number of values can be assigned to the desired number of buckets | 128 | 34 | if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) { | 129 | | // If it can, then set the upper bound to the midpoint | 130 | 17 | upper_bucket_values = bucket_values; | 131 | 17 | } else { | 132 | | // If it can't, then set the lower bound to the midpoint | 133 | 17 | lower_bucket_values = bucket_values; | 134 | 17 | } | 135 | | // Increment the search step counter | 136 | 34 | ++search_step; | 137 | 34 | } | 138 | | | 139 | 59 | return upper_bucket_values; | 140 | 59 | } |
_ZN5doris27calculate_bucket_max_valuesInEEmRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEm Line | Count | Source | 94 | 42 | size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) { | 95 | | // Ensure that the value map is not empty | 96 | 42 | assert(!value_map.empty()); | 97 | | | 98 | | // Calculate the total number of values in the map using std::accumulate() | 99 | 42 | size_t total_values = 0; | 100 | 343 | for (const auto& [value, count] : value_map) { | 101 | 343 | total_values += count; | 102 | 343 | } | 103 | | | 104 | | // If there is only one bucket, then all values will be assigned to that bucket | 105 | 42 | if (num_buckets == 1) { | 106 | 0 | return total_values; | 107 | 0 | } | 108 | | | 109 | | // To calculate the maximum value count in each bucket, we first calculate a conservative upper | 110 | | // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed | 111 | | // the actual maximum value count, but it does not underestimate it. The subsequent binary search | 112 | | // algorithm will approach the actual maximum value count. | 113 | 42 | size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1; | 114 | | | 115 | | // Initialize the lower bound to 0 | 116 | 42 | size_t lower_bucket_values = 0; | 117 | | | 118 | | // Perform a binary search to find the maximum number of values that can fit into each bucket | 119 | 42 | int search_step = 0; | 120 | 42 | const int max_search_steps = | 121 | 42 | 10; // Limit the number of search steps to avoid excessive iteration | 122 | | | 123 | 74 | while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) { | 124 | | // Calculate the midpoint of the upper and lower bounds | 125 | 32 | const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2; | 126 | | | 127 | | // Check if the given number of values can be assigned to the desired number of buckets | 128 | 32 | if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) { | 129 | | // If it can, then set the upper bound to the midpoint | 130 | 17 | upper_bucket_values = bucket_values; | 131 | 17 | } else { | 132 | | // If it can't, then set the lower bound to the midpoint | 133 | 15 | lower_bucket_values = bucket_values; | 134 | 15 | } | 135 | | // Increment the search step counter | 136 | 32 | ++search_step; | 137 | 32 | } | 138 | | | 139 | 42 | return upper_bucket_values; | 140 | 42 | } |
_ZN5doris27calculate_bucket_max_valuesIfEEmRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEm Line | Count | Source | 94 | 41 | size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) { | 95 | | // Ensure that the value map is not empty | 96 | 41 | assert(!value_map.empty()); | 97 | | | 98 | | // Calculate the total number of values in the map using std::accumulate() | 99 | 41 | size_t total_values = 0; | 100 | 328 | for (const auto& [value, count] : value_map) { | 101 | 328 | total_values += count; | 102 | 328 | } | 103 | | | 104 | | // If there is only one bucket, then all values will be assigned to that bucket | 105 | 41 | if (num_buckets == 1) { | 106 | 0 | return total_values; | 107 | 0 | } | 108 | | | 109 | | // To calculate the maximum value count in each bucket, we first calculate a conservative upper | 110 | | // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed | 111 | | // the actual maximum value count, but it does not underestimate it. The subsequent binary search | 112 | | // algorithm will approach the actual maximum value count. | 113 | 41 | size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1; | 114 | | | 115 | | // Initialize the lower bound to 0 | 116 | 41 | size_t lower_bucket_values = 0; | 117 | | | 118 | | // Perform a binary search to find the maximum number of values that can fit into each bucket | 119 | 41 | int search_step = 0; | 120 | 41 | const int max_search_steps = | 121 | 41 | 10; // Limit the number of search steps to avoid excessive iteration | 122 | | | 123 | 72 | while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) { | 124 | | // Calculate the midpoint of the upper and lower bounds | 125 | 31 | const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2; | 126 | | | 127 | | // Check if the given number of values can be assigned to the desired number of buckets | 128 | 31 | if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) { | 129 | | // If it can, then set the upper bound to the midpoint | 130 | 16 | upper_bucket_values = bucket_values; | 131 | 16 | } else { | 132 | | // If it can't, then set the lower bound to the midpoint | 133 | 15 | lower_bucket_values = bucket_values; | 134 | 15 | } | 135 | | // Increment the search step counter | 136 | 31 | ++search_step; | 137 | 31 | } | 138 | | | 139 | 41 | return upper_bucket_values; | 140 | 41 | } |
_ZN5doris27calculate_bucket_max_valuesIdEEmRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEm Line | Count | Source | 94 | 41 | size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) { | 95 | | // Ensure that the value map is not empty | 96 | 41 | assert(!value_map.empty()); | 97 | | | 98 | | // Calculate the total number of values in the map using std::accumulate() | 99 | 41 | size_t total_values = 0; | 100 | 328 | for (const auto& [value, count] : value_map) { | 101 | 328 | total_values += count; | 102 | 328 | } | 103 | | | 104 | | // If there is only one bucket, then all values will be assigned to that bucket | 105 | 41 | if (num_buckets == 1) { | 106 | 0 | return total_values; | 107 | 0 | } | 108 | | | 109 | | // To calculate the maximum value count in each bucket, we first calculate a conservative upper | 110 | | // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed | 111 | | // the actual maximum value count, but it does not underestimate it. The subsequent binary search | 112 | | // algorithm will approach the actual maximum value count. | 113 | 41 | size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1; | 114 | | | 115 | | // Initialize the lower bound to 0 | 116 | 41 | size_t lower_bucket_values = 0; | 117 | | | 118 | | // Perform a binary search to find the maximum number of values that can fit into each bucket | 119 | 41 | int search_step = 0; | 120 | 41 | const int max_search_steps = | 121 | 41 | 10; // Limit the number of search steps to avoid excessive iteration | 122 | | | 123 | 72 | while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) { | 124 | | // Calculate the midpoint of the upper and lower bounds | 125 | 31 | const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2; | 126 | | | 127 | | // Check if the given number of values can be assigned to the desired number of buckets | 128 | 31 | if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) { | 129 | | // If it can, then set the upper bound to the midpoint | 130 | 16 | upper_bucket_values = bucket_values; | 131 | 16 | } else { | 132 | | // If it can't, then set the lower bound to the midpoint | 133 | 15 | lower_bucket_values = bucket_values; | 134 | 15 | } | 135 | | // Increment the search step counter | 136 | 31 | ++search_step; | 137 | 31 | } | 138 | | | 139 | 41 | return upper_bucket_values; | 140 | 41 | } |
_ZN5doris27calculate_bucket_max_valuesINS_7DecimalIiEEEEmRKSt3mapIT_mSt4lessIS4_ESaISt4pairIKS4_mEEEm Line | Count | Source | 94 | 45 | size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) { | 95 | | // Ensure that the value map is not empty | 96 | 45 | assert(!value_map.empty()); | 97 | | | 98 | | // Calculate the total number of values in the map using std::accumulate() | 99 | 45 | size_t total_values = 0; | 100 | 161 | for (const auto& [value, count] : value_map) { | 101 | 161 | total_values += count; | 102 | 161 | } | 103 | | | 104 | | // If there is only one bucket, then all values will be assigned to that bucket | 105 | 45 | if (num_buckets == 1) { | 106 | 0 | return total_values; | 107 | 0 | } | 108 | | | 109 | | // To calculate the maximum value count in each bucket, we first calculate a conservative upper | 110 | | // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed | 111 | | // the actual maximum value count, but it does not underestimate it. The subsequent binary search | 112 | | // algorithm will approach the actual maximum value count. | 113 | 45 | size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1; | 114 | | | 115 | | // Initialize the lower bound to 0 | 116 | 45 | size_t lower_bucket_values = 0; | 117 | | | 118 | | // Perform a binary search to find the maximum number of values that can fit into each bucket | 119 | 45 | int search_step = 0; | 120 | 45 | const int max_search_steps = | 121 | 45 | 10; // Limit the number of search steps to avoid excessive iteration | 122 | | | 123 | 84 | while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) { | 124 | | // Calculate the midpoint of the upper and lower bounds | 125 | 39 | const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2; | 126 | | | 127 | | // Check if the given number of values can be assigned to the desired number of buckets | 128 | 39 | if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) { | 129 | | // If it can, then set the upper bound to the midpoint | 130 | 22 | upper_bucket_values = bucket_values; | 131 | 22 | } else { | 132 | | // If it can't, then set the lower bound to the midpoint | 133 | 17 | lower_bucket_values = bucket_values; | 134 | 17 | } | 135 | | // Increment the search step counter | 136 | 39 | ++search_step; | 137 | 39 | } | 138 | | | 139 | 45 | return upper_bucket_values; | 140 | 45 | } |
Unexecuted instantiation: _ZN5doris27calculate_bucket_max_valuesINS_7DecimalIlEEEEmRKSt3mapIT_mSt4lessIS4_ESaISt4pairIKS4_mEEEm Unexecuted instantiation: _ZN5doris27calculate_bucket_max_valuesINS_12Decimal128V3EEEmRKSt3mapIT_mSt4lessIS3_ESaISt4pairIKS3_mEEEm Unexecuted instantiation: _ZN5doris27calculate_bucket_max_valuesINS_7DecimalIN4wide7integerILm256EiEEEEEEmRKSt3mapIT_mSt4lessIS7_ESaISt4pairIKS7_mEEEm _ZN5doris27calculate_bucket_max_valuesINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEEmRKSt3mapIT_mSt4lessIS8_ESaISt4pairIKS8_mEEEm Line | Count | Source | 94 | 100 | size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) { | 95 | | // Ensure that the value map is not empty | 96 | 100 | assert(!value_map.empty()); | 97 | | | 98 | | // Calculate the total number of values in the map using std::accumulate() | 99 | 100 | size_t total_values = 0; | 100 | 2.20k | for (const auto& [value, count] : value_map) { | 101 | 2.20k | total_values += count; | 102 | 2.20k | } | 103 | | | 104 | | // If there is only one bucket, then all values will be assigned to that bucket | 105 | 100 | if (num_buckets == 1) { | 106 | 0 | return total_values; | 107 | 0 | } | 108 | | | 109 | | // To calculate the maximum value count in each bucket, we first calculate a conservative upper | 110 | | // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed | 111 | | // the actual maximum value count, but it does not underestimate it. The subsequent binary search | 112 | | // algorithm will approach the actual maximum value count. | 113 | 100 | size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1; | 114 | | | 115 | | // Initialize the lower bound to 0 | 116 | 100 | size_t lower_bucket_values = 0; | 117 | | | 118 | | // Perform a binary search to find the maximum number of values that can fit into each bucket | 119 | 100 | int search_step = 0; | 120 | 100 | const int max_search_steps = | 121 | 100 | 10; // Limit the number of search steps to avoid excessive iteration | 122 | | | 123 | 200 | while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) { | 124 | | // Calculate the midpoint of the upper and lower bounds | 125 | 100 | const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2; | 126 | | | 127 | | // Check if the given number of values can be assigned to the desired number of buckets | 128 | 100 | if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) { | 129 | | // If it can, then set the upper bound to the midpoint | 130 | 62 | upper_bucket_values = bucket_values; | 131 | 62 | } else { | 132 | | // If it can't, then set the lower bound to the midpoint | 133 | 38 | lower_bucket_values = bucket_values; | 134 | 38 | } | 135 | | // Increment the search step counter | 136 | 100 | ++search_step; | 137 | 100 | } | 138 | | | 139 | 100 | return upper_bucket_values; | 140 | 100 | } |
_ZN5doris27calculate_bucket_max_valuesINS_11DateV2ValueINS_15DateV2ValueTypeEEEEEmRKSt3mapIT_mSt4lessIS5_ESaISt4pairIKS5_mEEEm Line | Count | Source | 94 | 81 | size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) { | 95 | | // Ensure that the value map is not empty | 96 | 81 | assert(!value_map.empty()); | 97 | | | 98 | | // Calculate the total number of values in the map using std::accumulate() | 99 | 81 | size_t total_values = 0; | 100 | 458 | for (const auto& [value, count] : value_map) { | 101 | 458 | total_values += count; | 102 | 458 | } | 103 | | | 104 | | // If there is only one bucket, then all values will be assigned to that bucket | 105 | 81 | if (num_buckets == 1) { | 106 | 0 | return total_values; | 107 | 0 | } | 108 | | | 109 | | // To calculate the maximum value count in each bucket, we first calculate a conservative upper | 110 | | // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed | 111 | | // the actual maximum value count, but it does not underestimate it. The subsequent binary search | 112 | | // algorithm will approach the actual maximum value count. | 113 | 81 | size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1; | 114 | | | 115 | | // Initialize the lower bound to 0 | 116 | 81 | size_t lower_bucket_values = 0; | 117 | | | 118 | | // Perform a binary search to find the maximum number of values that can fit into each bucket | 119 | 81 | int search_step = 0; | 120 | 81 | const int max_search_steps = | 121 | 81 | 10; // Limit the number of search steps to avoid excessive iteration | 122 | | | 123 | 132 | while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) { | 124 | | // Calculate the midpoint of the upper and lower bounds | 125 | 51 | const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2; | 126 | | | 127 | | // Check if the given number of values can be assigned to the desired number of buckets | 128 | 51 | if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) { | 129 | | // If it can, then set the upper bound to the midpoint | 130 | 28 | upper_bucket_values = bucket_values; | 131 | 28 | } else { | 132 | | // If it can't, then set the lower bound to the midpoint | 133 | 23 | lower_bucket_values = bucket_values; | 134 | 23 | } | 135 | | // Increment the search step counter | 136 | 51 | ++search_step; | 137 | 51 | } | 138 | | | 139 | 81 | return upper_bucket_values; | 140 | 81 | } |
_ZN5doris27calculate_bucket_max_valuesINS_11DateV2ValueINS_19DateTimeV2ValueTypeEEEEEmRKSt3mapIT_mSt4lessIS5_ESaISt4pairIKS5_mEEEm Line | Count | Source | 94 | 81 | size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) { | 95 | | // Ensure that the value map is not empty | 96 | 81 | assert(!value_map.empty()); | 97 | | | 98 | | // Calculate the total number of values in the map using std::accumulate() | 99 | 81 | size_t total_values = 0; | 100 | 457 | for (const auto& [value, count] : value_map) { | 101 | 457 | total_values += count; | 102 | 457 | } | 103 | | | 104 | | // If there is only one bucket, then all values will be assigned to that bucket | 105 | 81 | if (num_buckets == 1) { | 106 | 0 | return total_values; | 107 | 0 | } | 108 | | | 109 | | // To calculate the maximum value count in each bucket, we first calculate a conservative upper | 110 | | // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed | 111 | | // the actual maximum value count, but it does not underestimate it. The subsequent binary search | 112 | | // algorithm will approach the actual maximum value count. | 113 | 81 | size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1; | 114 | | | 115 | | // Initialize the lower bound to 0 | 116 | 81 | size_t lower_bucket_values = 0; | 117 | | | 118 | | // Perform a binary search to find the maximum number of values that can fit into each bucket | 119 | 81 | int search_step = 0; | 120 | 81 | const int max_search_steps = | 121 | 81 | 10; // Limit the number of search steps to avoid excessive iteration | 122 | | | 123 | 132 | while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) { | 124 | | // Calculate the midpoint of the upper and lower bounds | 125 | 51 | const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2; | 126 | | | 127 | | // Check if the given number of values can be assigned to the desired number of buckets | 128 | 51 | if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) { | 129 | | // If it can, then set the upper bound to the midpoint | 130 | 28 | upper_bucket_values = bucket_values; | 131 | 28 | } else { | 132 | | // If it can't, then set the lower bound to the midpoint | 133 | 23 | lower_bucket_values = bucket_values; | 134 | 23 | } | 135 | | // Increment the search step counter | 136 | 51 | ++search_step; | 137 | 51 | } | 138 | | | 139 | 81 | return upper_bucket_values; | 140 | 81 | } |
|
141 | | |
142 | | /** |
143 | | * Greedy equi-height histogram construction algorithm, inspired by the MySQL |
144 | | * equi_height implementation(https://dev.mysql.com/doc/dev/mysql-server/latest/equi__height_8h.html). |
145 | | * |
146 | | * Given an ordered collection of [value, count] pairs and a maximum bucket |
147 | | * size, construct a histogram by inserting values into a bucket while keeping |
148 | | * track of its size. If the insertion of a value into a non-empty bucket |
149 | | * causes the bucket to exceed the maximum size, create a new empty bucket and |
150 | | * continue. |
151 | | * |
152 | | * The algorithm guarantees a selectivity estimation error of at most ~2 * |
153 | | * #values / #buckets, often less. Values with a higher relative frequency are |
154 | | * guaranteed to be placed in singleton buckets. |
155 | | * |
156 | | * The minimum composite bucket size is used to minimize the worst case |
157 | | * selectivity estimation error. In general, the algorithm will adapt to the |
158 | | * data distribution to minimize the size of composite buckets. The heavy values |
159 | | * can be placed in singleton buckets and the remaining values will be evenly |
160 | | * spread across the remaining buckets, leading to a lower composite bucket size. |
161 | | * |
162 | | * Note: The term "value" refers to an entry in a column and the actual value |
163 | | * of an entry. The ordered_map is an ordered collection of [distinct value, |
164 | | * value count] pairs. For example, a Value_map<String> could contain the pairs ["a", 1], ["b", 2] |
165 | | * to represent one "a" value and two "b" values. |
166 | | * |
167 | | * @param buckets A vector of empty buckets that will be populated with data. |
168 | | * @param ordered_map An ordered map of distinct values and their counts. |
169 | | * @param max_num_buckets The maximum number of buckets that can be used. |
170 | | * |
171 | | * @return True if the buckets were successfully built, false otherwise. |
172 | | */ |
173 | | template <typename T> |
174 | | bool build_histogram(std::vector<Bucket<T>>& buckets, const std::map<T, size_t>& ordered_map, |
175 | 707 | const size_t max_num_buckets) { |
176 | | // If the input map is empty, there is nothing to build. |
177 | 707 | if (ordered_map.empty()) { |
178 | 66 | return false; |
179 | 66 | } |
180 | | |
181 | | // Calculate the maximum number of values that can be assigned to each bucket. |
182 | 641 | auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets); |
183 | | |
184 | | // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional |
185 | | // allocations when inserting buckets. |
186 | 641 | buckets.clear(); |
187 | 641 | buckets.reserve(max_num_buckets); |
188 | | |
189 | | // Initialize bucket variables. |
190 | 641 | size_t distinct_values_count = 0; |
191 | 641 | size_t values_count = 0; |
192 | 641 | size_t cumulative_values = 0; |
193 | | |
194 | | // Record how many values still need to be assigned. |
195 | 641 | auto remaining_distinct_values = ordered_map.size(); |
196 | | |
197 | 641 | auto it = ordered_map.begin(); |
198 | | |
199 | | // Lower value of the current bucket. |
200 | 641 | const T* lower_value = &it->first; |
201 | | |
202 | | // Iterate over the ordered map of distinct values and their counts. |
203 | 6.39k | for (; it != ordered_map.end(); ++it) { |
204 | 5.74k | const auto count = it->second; |
205 | 5.74k | const auto current_value = it->first; |
206 | | |
207 | | // Update the bucket counts and track the number of distinct values assigned. |
208 | 5.74k | distinct_values_count++; |
209 | 5.74k | remaining_distinct_values--; |
210 | 5.74k | values_count += count; |
211 | 5.74k | cumulative_values += count; |
212 | | |
213 | | // Check whether the current value should be added to the current bucket. |
214 | 5.74k | auto next = std::next(it); |
215 | 5.74k | size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1; |
216 | | |
217 | 5.74k | if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets && |
218 | 5.74k | values_count + next->second <= bucket_max_values) { |
219 | | // If the current value is the last in the input map and there are more remaining |
220 | | // distinct values than empty buckets and adding the value does not cause the bucket |
221 | | // to exceed its max size, skip adding the value to the current bucket. |
222 | 3.89k | continue; |
223 | 3.89k | } |
224 | | |
225 | | // Finalize the current bucket and add it to our collection of buckets. |
226 | 1.85k | auto pre_sum = cumulative_values - values_count; |
227 | | |
228 | 1.85k | Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count, |
229 | 1.85k | pre_sum); |
230 | 1.85k | buckets.push_back(new_bucket); |
231 | | |
232 | | // Reset variables for the next bucket. |
233 | 1.85k | if (next != ordered_map.end()) { |
234 | 1.21k | lower_value = &next->first; |
235 | 1.21k | } |
236 | 1.85k | values_count = 0; |
237 | 1.85k | distinct_values_count = 0; |
238 | 1.85k | } |
239 | | |
240 | 641 | return true; |
241 | 707 | } _ZN5doris15build_histogramIhEEbRSt6vectorINS_6BucketIT_EESaIS4_EERKSt3mapIS3_mSt4lessIS3_ESaISt4pairIKS3_mEEEm Line | Count | Source | 175 | 2 | const size_t max_num_buckets) { | 176 | | // If the input map is empty, there is nothing to build. | 177 | 2 | if (ordered_map.empty()) { | 178 | 1 | return false; | 179 | 1 | } | 180 | | | 181 | | // Calculate the maximum number of values that can be assigned to each bucket. | 182 | 1 | auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets); | 183 | | | 184 | | // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional | 185 | | // allocations when inserting buckets. | 186 | 1 | buckets.clear(); | 187 | 1 | buckets.reserve(max_num_buckets); | 188 | | | 189 | | // Initialize bucket variables. | 190 | 1 | size_t distinct_values_count = 0; | 191 | 1 | size_t values_count = 0; | 192 | 1 | size_t cumulative_values = 0; | 193 | | | 194 | | // Record how many values still need to be assigned. | 195 | 1 | auto remaining_distinct_values = ordered_map.size(); | 196 | | | 197 | 1 | auto it = ordered_map.begin(); | 198 | | | 199 | | // Lower value of the current bucket. | 200 | 1 | const T* lower_value = &it->first; | 201 | | | 202 | | // Iterate over the ordered map of distinct values and their counts. | 203 | 3 | for (; it != ordered_map.end(); ++it) { | 204 | 2 | const auto count = it->second; | 205 | 2 | const auto current_value = it->first; | 206 | | | 207 | | // Update the bucket counts and track the number of distinct values assigned. | 208 | 2 | distinct_values_count++; | 209 | 2 | remaining_distinct_values--; | 210 | 2 | values_count += count; | 211 | 2 | cumulative_values += count; | 212 | | | 213 | | // Check whether the current value should be added to the current bucket. | 214 | 2 | auto next = std::next(it); | 215 | 2 | size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1; | 216 | | | 217 | 2 | if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets && | 218 | 2 | values_count + next->second <= bucket_max_values) { | 219 | | // If the current value is the last in the input map and there are more remaining | 220 | | // distinct values than empty buckets and adding the value does not cause the bucket | 221 | | // to exceed its max size, skip adding the value to the current bucket. | 222 | 0 | continue; | 223 | 0 | } | 224 | | | 225 | | // Finalize the current bucket and add it to our collection of buckets. | 226 | 2 | auto pre_sum = cumulative_values - values_count; | 227 | | | 228 | 2 | Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count, | 229 | 2 | pre_sum); | 230 | 2 | buckets.push_back(new_bucket); | 231 | | | 232 | | // Reset variables for the next bucket. | 233 | 2 | if (next != ordered_map.end()) { | 234 | 1 | lower_value = &next->first; | 235 | 1 | } | 236 | 2 | values_count = 0; | 237 | 2 | distinct_values_count = 0; | 238 | 2 | } | 239 | | | 240 | 1 | return true; | 241 | 2 | } |
_ZN5doris15build_histogramIaEEbRSt6vectorINS_6BucketIT_EESaIS4_EERKSt3mapIS3_mSt4lessIS3_ESaISt4pairIKS3_mEEEm Line | Count | Source | 175 | 48 | const size_t max_num_buckets) { | 176 | | // If the input map is empty, there is nothing to build. | 177 | 48 | if (ordered_map.empty()) { | 178 | 6 | return false; | 179 | 6 | } | 180 | | | 181 | | // Calculate the maximum number of values that can be assigned to each bucket. | 182 | 42 | auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets); | 183 | | | 184 | | // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional | 185 | | // allocations when inserting buckets. | 186 | 42 | buckets.clear(); | 187 | 42 | buckets.reserve(max_num_buckets); | 188 | | | 189 | | // Initialize bucket variables. | 190 | 42 | size_t distinct_values_count = 0; | 191 | 42 | size_t values_count = 0; | 192 | 42 | size_t cumulative_values = 0; | 193 | | | 194 | | // Record how many values still need to be assigned. | 195 | 42 | auto remaining_distinct_values = ordered_map.size(); | 196 | | | 197 | 42 | auto it = ordered_map.begin(); | 198 | | | 199 | | // Lower value of the current bucket. | 200 | 42 | const T* lower_value = &it->first; | 201 | | | 202 | | // Iterate over the ordered map of distinct values and their counts. | 203 | 382 | for (; it != ordered_map.end(); ++it) { | 204 | 340 | const auto count = it->second; | 205 | 340 | const auto current_value = it->first; | 206 | | | 207 | | // Update the bucket counts and track the number of distinct values assigned. | 208 | 340 | distinct_values_count++; | 209 | 340 | remaining_distinct_values--; | 210 | 340 | values_count += count; | 211 | 340 | cumulative_values += count; | 212 | | | 213 | | // Check whether the current value should be added to the current bucket. | 214 | 340 | auto next = std::next(it); | 215 | 340 | size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1; | 216 | | | 217 | 340 | if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets && | 218 | 340 | values_count + next->second <= bucket_max_values) { | 219 | | // If the current value is the last in the input map and there are more remaining | 220 | | // distinct values than empty buckets and adding the value does not cause the bucket | 221 | | // to exceed its max size, skip adding the value to the current bucket. | 222 | 211 | continue; | 223 | 211 | } | 224 | | | 225 | | // Finalize the current bucket and add it to our collection of buckets. | 226 | 129 | auto pre_sum = cumulative_values - values_count; | 227 | | | 228 | 129 | Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count, | 229 | 129 | pre_sum); | 230 | 129 | buckets.push_back(new_bucket); | 231 | | | 232 | | // Reset variables for the next bucket. | 233 | 129 | if (next != ordered_map.end()) { | 234 | 87 | lower_value = &next->first; | 235 | 87 | } | 236 | 129 | values_count = 0; | 237 | 129 | distinct_values_count = 0; | 238 | 129 | } | 239 | | | 240 | 42 | return true; | 241 | 48 | } |
_ZN5doris15build_histogramIsEEbRSt6vectorINS_6BucketIT_EESaIS4_EERKSt3mapIS3_mSt4lessIS3_ESaISt4pairIKS3_mEEEm Line | Count | Source | 175 | 48 | const size_t max_num_buckets) { | 176 | | // If the input map is empty, there is nothing to build. | 177 | 48 | if (ordered_map.empty()) { | 178 | 5 | return false; | 179 | 5 | } | 180 | | | 181 | | // Calculate the maximum number of values that can be assigned to each bucket. | 182 | 43 | auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets); | 183 | | | 184 | | // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional | 185 | | // allocations when inserting buckets. | 186 | 43 | buckets.clear(); | 187 | 43 | buckets.reserve(max_num_buckets); | 188 | | | 189 | | // Initialize bucket variables. | 190 | 43 | size_t distinct_values_count = 0; | 191 | 43 | size_t values_count = 0; | 192 | 43 | size_t cumulative_values = 0; | 193 | | | 194 | | // Record how many values still need to be assigned. | 195 | 43 | auto remaining_distinct_values = ordered_map.size(); | 196 | | | 197 | 43 | auto it = ordered_map.begin(); | 198 | | | 199 | | // Lower value of the current bucket. | 200 | 43 | const T* lower_value = &it->first; | 201 | | | 202 | | // Iterate over the ordered map of distinct values and their counts. | 203 | 392 | for (; it != ordered_map.end(); ++it) { | 204 | 349 | const auto count = it->second; | 205 | 349 | const auto current_value = it->first; | 206 | | | 207 | | // Update the bucket counts and track the number of distinct values assigned. | 208 | 349 | distinct_values_count++; | 209 | 349 | remaining_distinct_values--; | 210 | 349 | values_count += count; | 211 | 349 | cumulative_values += count; | 212 | | | 213 | | // Check whether the current value should be added to the current bucket. | 214 | 349 | auto next = std::next(it); | 215 | 349 | size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1; | 216 | | | 217 | 349 | if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets && | 218 | 349 | values_count + next->second <= bucket_max_values) { | 219 | | // If the current value is the last in the input map and there are more remaining | 220 | | // distinct values than empty buckets and adding the value does not cause the bucket | 221 | | // to exceed its max size, skip adding the value to the current bucket. | 222 | 217 | continue; | 223 | 217 | } | 224 | | | 225 | | // Finalize the current bucket and add it to our collection of buckets. | 226 | 132 | auto pre_sum = cumulative_values - values_count; | 227 | | | 228 | 132 | Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count, | 229 | 132 | pre_sum); | 230 | 132 | buckets.push_back(new_bucket); | 231 | | | 232 | | // Reset variables for the next bucket. | 233 | 132 | if (next != ordered_map.end()) { | 234 | 89 | lower_value = &next->first; | 235 | 89 | } | 236 | 132 | values_count = 0; | 237 | 132 | distinct_values_count = 0; | 238 | 132 | } | 239 | | | 240 | 43 | return true; | 241 | 48 | } |
_ZN5doris15build_histogramIiEEbRSt6vectorINS_6BucketIT_EESaIS4_EERKSt3mapIS3_mSt4lessIS3_ESaISt4pairIKS3_mEEEm Line | Count | Source | 175 | 74 | const size_t max_num_buckets) { | 176 | | // If the input map is empty, there is nothing to build. | 177 | 74 | if (ordered_map.empty()) { | 178 | 9 | return false; | 179 | 9 | } | 180 | | | 181 | | // Calculate the maximum number of values that can be assigned to each bucket. | 182 | 65 | auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets); | 183 | | | 184 | | // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional | 185 | | // allocations when inserting buckets. | 186 | 65 | buckets.clear(); | 187 | 65 | buckets.reserve(max_num_buckets); | 188 | | | 189 | | // Initialize bucket variables. | 190 | 65 | size_t distinct_values_count = 0; | 191 | 65 | size_t values_count = 0; | 192 | 65 | size_t cumulative_values = 0; | 193 | | | 194 | | // Record how many values still need to be assigned. | 195 | 65 | auto remaining_distinct_values = ordered_map.size(); | 196 | | | 197 | 65 | auto it = ordered_map.begin(); | 198 | | | 199 | | // Lower value of the current bucket. | 200 | 65 | const T* lower_value = &it->first; | 201 | | | 202 | | // Iterate over the ordered map of distinct values and their counts. | 203 | 428 | for (; it != ordered_map.end(); ++it) { | 204 | 363 | const auto count = it->second; | 205 | 363 | const auto current_value = it->first; | 206 | | | 207 | | // Update the bucket counts and track the number of distinct values assigned. | 208 | 363 | distinct_values_count++; | 209 | 363 | remaining_distinct_values--; | 210 | 363 | values_count += count; | 211 | 363 | cumulative_values += count; | 212 | | | 213 | | // Check whether the current value should be added to the current bucket. | 214 | 363 | auto next = std::next(it); | 215 | 363 | size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1; | 216 | | | 217 | 363 | if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets && | 218 | 363 | values_count + next->second <= bucket_max_values) { | 219 | | // If the current value is the last in the input map and there are more remaining | 220 | | // distinct values than empty buckets and adding the value does not cause the bucket | 221 | | // to exceed its max size, skip adding the value to the current bucket. | 222 | 202 | continue; | 223 | 202 | } | 224 | | | 225 | | // Finalize the current bucket and add it to our collection of buckets. | 226 | 161 | auto pre_sum = cumulative_values - values_count; | 227 | | | 228 | 161 | Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count, | 229 | 161 | pre_sum); | 230 | 161 | buckets.push_back(new_bucket); | 231 | | | 232 | | // Reset variables for the next bucket. | 233 | 161 | if (next != ordered_map.end()) { | 234 | 96 | lower_value = &next->first; | 235 | 96 | } | 236 | 161 | values_count = 0; | 237 | 161 | distinct_values_count = 0; | 238 | 161 | } | 239 | | | 240 | 65 | return true; | 241 | 74 | } |
_ZN5doris15build_histogramIlEEbRSt6vectorINS_6BucketIT_EESaIS4_EERKSt3mapIS3_mSt4lessIS3_ESaISt4pairIKS3_mEEEm Line | Count | Source | 175 | 65 | const size_t max_num_buckets) { | 176 | | // If the input map is empty, there is nothing to build. | 177 | 65 | if (ordered_map.empty()) { | 178 | 6 | return false; | 179 | 6 | } | 180 | | | 181 | | // Calculate the maximum number of values that can be assigned to each bucket. | 182 | 59 | auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets); | 183 | | | 184 | | // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional | 185 | | // allocations when inserting buckets. | 186 | 59 | buckets.clear(); | 187 | 59 | buckets.reserve(max_num_buckets); | 188 | | | 189 | | // Initialize bucket variables. | 190 | 59 | size_t distinct_values_count = 0; | 191 | 59 | size_t values_count = 0; | 192 | 59 | size_t cumulative_values = 0; | 193 | | | 194 | | // Record how many values still need to be assigned. | 195 | 59 | auto remaining_distinct_values = ordered_map.size(); | 196 | | | 197 | 59 | auto it = ordered_map.begin(); | 198 | | | 199 | | // Lower value of the current bucket. | 200 | 59 | const T* lower_value = &it->first; | 201 | | | 202 | | // Iterate over the ordered map of distinct values and their counts. | 203 | 477 | for (; it != ordered_map.end(); ++it) { | 204 | 418 | const auto count = it->second; | 205 | 418 | const auto current_value = it->first; | 206 | | | 207 | | // Update the bucket counts and track the number of distinct values assigned. | 208 | 418 | distinct_values_count++; | 209 | 418 | remaining_distinct_values--; | 210 | 418 | values_count += count; | 211 | 418 | cumulative_values += count; | 212 | | | 213 | | // Check whether the current value should be added to the current bucket. | 214 | 418 | auto next = std::next(it); | 215 | 418 | size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1; | 216 | | | 217 | 418 | if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets && | 218 | 418 | values_count + next->second <= bucket_max_values) { | 219 | | // If the current value is the last in the input map and there are more remaining | 220 | | // distinct values than empty buckets and adding the value does not cause the bucket | 221 | | // to exceed its max size, skip adding the value to the current bucket. | 222 | 206 | continue; | 223 | 206 | } | 224 | | | 225 | | // Finalize the current bucket and add it to our collection of buckets. | 226 | 212 | auto pre_sum = cumulative_values - values_count; | 227 | | | 228 | 212 | Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count, | 229 | 212 | pre_sum); | 230 | 212 | buckets.push_back(new_bucket); | 231 | | | 232 | | // Reset variables for the next bucket. | 233 | 212 | if (next != ordered_map.end()) { | 234 | 153 | lower_value = &next->first; | 235 | 153 | } | 236 | 212 | values_count = 0; | 237 | 212 | distinct_values_count = 0; | 238 | 212 | } | 239 | | | 240 | 59 | return true; | 241 | 65 | } |
_ZN5doris15build_histogramInEEbRSt6vectorINS_6BucketIT_EESaIS4_EERKSt3mapIS3_mSt4lessIS3_ESaISt4pairIKS3_mEEEm Line | Count | Source | 175 | 48 | const size_t max_num_buckets) { | 176 | | // If the input map is empty, there is nothing to build. | 177 | 48 | if (ordered_map.empty()) { | 178 | 6 | return false; | 179 | 6 | } | 180 | | | 181 | | // Calculate the maximum number of values that can be assigned to each bucket. | 182 | 42 | auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets); | 183 | | | 184 | | // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional | 185 | | // allocations when inserting buckets. | 186 | 42 | buckets.clear(); | 187 | 42 | buckets.reserve(max_num_buckets); | 188 | | | 189 | | // Initialize bucket variables. | 190 | 42 | size_t distinct_values_count = 0; | 191 | 42 | size_t values_count = 0; | 192 | 42 | size_t cumulative_values = 0; | 193 | | | 194 | | // Record how many values still need to be assigned. | 195 | 42 | auto remaining_distinct_values = ordered_map.size(); | 196 | | | 197 | 42 | auto it = ordered_map.begin(); | 198 | | | 199 | | // Lower value of the current bucket. | 200 | 42 | const T* lower_value = &it->first; | 201 | | | 202 | | // Iterate over the ordered map of distinct values and their counts. | 203 | 385 | for (; it != ordered_map.end(); ++it) { | 204 | 343 | const auto count = it->second; | 205 | 343 | const auto current_value = it->first; | 206 | | | 207 | | // Update the bucket counts and track the number of distinct values assigned. | 208 | 343 | distinct_values_count++; | 209 | 343 | remaining_distinct_values--; | 210 | 343 | values_count += count; | 211 | 343 | cumulative_values += count; | 212 | | | 213 | | // Check whether the current value should be added to the current bucket. | 214 | 343 | auto next = std::next(it); | 215 | 343 | size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1; | 216 | | | 217 | 343 | if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets && | 218 | 343 | values_count + next->second <= bucket_max_values) { | 219 | | // If the current value is the last in the input map and there are more remaining | 220 | | // distinct values than empty buckets and adding the value does not cause the bucket | 221 | | // to exceed its max size, skip adding the value to the current bucket. | 222 | 202 | continue; | 223 | 202 | } | 224 | | | 225 | | // Finalize the current bucket and add it to our collection of buckets. | 226 | 141 | auto pre_sum = cumulative_values - values_count; | 227 | | | 228 | 141 | Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count, | 229 | 141 | pre_sum); | 230 | 141 | buckets.push_back(new_bucket); | 231 | | | 232 | | // Reset variables for the next bucket. | 233 | 141 | if (next != ordered_map.end()) { | 234 | 99 | lower_value = &next->first; | 235 | 99 | } | 236 | 141 | values_count = 0; | 237 | 141 | distinct_values_count = 0; | 238 | 141 | } | 239 | | | 240 | 42 | return true; | 241 | 48 | } |
_ZN5doris15build_histogramIfEEbRSt6vectorINS_6BucketIT_EESaIS4_EERKSt3mapIS3_mSt4lessIS3_ESaISt4pairIKS3_mEEEm Line | Count | Source | 175 | 46 | const size_t max_num_buckets) { | 176 | | // If the input map is empty, there is nothing to build. | 177 | 46 | if (ordered_map.empty()) { | 178 | 5 | return false; | 179 | 5 | } | 180 | | | 181 | | // Calculate the maximum number of values that can be assigned to each bucket. | 182 | 41 | auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets); | 183 | | | 184 | | // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional | 185 | | // allocations when inserting buckets. | 186 | 41 | buckets.clear(); | 187 | 41 | buckets.reserve(max_num_buckets); | 188 | | | 189 | | // Initialize bucket variables. | 190 | 41 | size_t distinct_values_count = 0; | 191 | 41 | size_t values_count = 0; | 192 | 41 | size_t cumulative_values = 0; | 193 | | | 194 | | // Record how many values still need to be assigned. | 195 | 41 | auto remaining_distinct_values = ordered_map.size(); | 196 | | | 197 | 41 | auto it = ordered_map.begin(); | 198 | | | 199 | | // Lower value of the current bucket. | 200 | 41 | const T* lower_value = &it->first; | 201 | | | 202 | | // Iterate over the ordered map of distinct values and their counts. | 203 | 369 | for (; it != ordered_map.end(); ++it) { | 204 | 328 | const auto count = it->second; | 205 | 328 | const auto current_value = it->first; | 206 | | | 207 | | // Update the bucket counts and track the number of distinct values assigned. | 208 | 328 | distinct_values_count++; | 209 | 328 | remaining_distinct_values--; | 210 | 328 | values_count += count; | 211 | 328 | cumulative_values += count; | 212 | | | 213 | | // Check whether the current value should be added to the current bucket. | 214 | 328 | auto next = std::next(it); | 215 | 328 | size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1; | 216 | | | 217 | 328 | if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets && | 218 | 328 | values_count + next->second <= bucket_max_values) { | 219 | | // If the current value is the last in the input map and there are more remaining | 220 | | // distinct values than empty buckets and adding the value does not cause the bucket | 221 | | // to exceed its max size, skip adding the value to the current bucket. | 222 | 200 | continue; | 223 | 200 | } | 224 | | | 225 | | // Finalize the current bucket and add it to our collection of buckets. | 226 | 128 | auto pre_sum = cumulative_values - values_count; | 227 | | | 228 | 128 | Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count, | 229 | 128 | pre_sum); | 230 | 128 | buckets.push_back(new_bucket); | 231 | | | 232 | | // Reset variables for the next bucket. | 233 | 128 | if (next != ordered_map.end()) { | 234 | 87 | lower_value = &next->first; | 235 | 87 | } | 236 | 128 | values_count = 0; | 237 | 128 | distinct_values_count = 0; | 238 | 128 | } | 239 | | | 240 | 41 | return true; | 241 | 46 | } |
_ZN5doris15build_histogramIdEEbRSt6vectorINS_6BucketIT_EESaIS4_EERKSt3mapIS3_mSt4lessIS3_ESaISt4pairIKS3_mEEEm Line | Count | Source | 175 | 46 | const size_t max_num_buckets) { | 176 | | // If the input map is empty, there is nothing to build. | 177 | 46 | if (ordered_map.empty()) { | 178 | 5 | return false; | 179 | 5 | } | 180 | | | 181 | | // Calculate the maximum number of values that can be assigned to each bucket. | 182 | 41 | auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets); | 183 | | | 184 | | // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional | 185 | | // allocations when inserting buckets. | 186 | 41 | buckets.clear(); | 187 | 41 | buckets.reserve(max_num_buckets); | 188 | | | 189 | | // Initialize bucket variables. | 190 | 41 | size_t distinct_values_count = 0; | 191 | 41 | size_t values_count = 0; | 192 | 41 | size_t cumulative_values = 0; | 193 | | | 194 | | // Record how many values still need to be assigned. | 195 | 41 | auto remaining_distinct_values = ordered_map.size(); | 196 | | | 197 | 41 | auto it = ordered_map.begin(); | 198 | | | 199 | | // Lower value of the current bucket. | 200 | 41 | const T* lower_value = &it->first; | 201 | | | 202 | | // Iterate over the ordered map of distinct values and their counts. | 203 | 369 | for (; it != ordered_map.end(); ++it) { | 204 | 328 | const auto count = it->second; | 205 | 328 | const auto current_value = it->first; | 206 | | | 207 | | // Update the bucket counts and track the number of distinct values assigned. | 208 | 328 | distinct_values_count++; | 209 | 328 | remaining_distinct_values--; | 210 | 328 | values_count += count; | 211 | 328 | cumulative_values += count; | 212 | | | 213 | | // Check whether the current value should be added to the current bucket. | 214 | 328 | auto next = std::next(it); | 215 | 328 | size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1; | 216 | | | 217 | 328 | if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets && | 218 | 328 | values_count + next->second <= bucket_max_values) { | 219 | | // If the current value is the last in the input map and there are more remaining | 220 | | // distinct values than empty buckets and adding the value does not cause the bucket | 221 | | // to exceed its max size, skip adding the value to the current bucket. | 222 | 200 | continue; | 223 | 200 | } | 224 | | | 225 | | // Finalize the current bucket and add it to our collection of buckets. | 226 | 128 | auto pre_sum = cumulative_values - values_count; | 227 | | | 228 | 128 | Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count, | 229 | 128 | pre_sum); | 230 | 128 | buckets.push_back(new_bucket); | 231 | | | 232 | | // Reset variables for the next bucket. | 233 | 128 | if (next != ordered_map.end()) { | 234 | 87 | lower_value = &next->first; | 235 | 87 | } | 236 | 128 | values_count = 0; | 237 | 128 | distinct_values_count = 0; | 238 | 128 | } | 239 | | | 240 | 41 | return true; | 241 | 46 | } |
_ZN5doris15build_histogramINS_7DecimalIiEEEEbRSt6vectorINS_6BucketIT_EESaIS6_EERKSt3mapIS5_mSt4lessIS5_ESaISt4pairIKS5_mEEEm Line | Count | Source | 175 | 49 | const size_t max_num_buckets) { | 176 | | // If the input map is empty, there is nothing to build. | 177 | 49 | if (ordered_map.empty()) { | 178 | 4 | return false; | 179 | 4 | } | 180 | | | 181 | | // Calculate the maximum number of values that can be assigned to each bucket. | 182 | 45 | auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets); | 183 | | | 184 | | // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional | 185 | | // allocations when inserting buckets. | 186 | 45 | buckets.clear(); | 187 | 45 | buckets.reserve(max_num_buckets); | 188 | | | 189 | | // Initialize bucket variables. | 190 | 45 | size_t distinct_values_count = 0; | 191 | 45 | size_t values_count = 0; | 192 | 45 | size_t cumulative_values = 0; | 193 | | | 194 | | // Record how many values still need to be assigned. | 195 | 45 | auto remaining_distinct_values = ordered_map.size(); | 196 | | | 197 | 45 | auto it = ordered_map.begin(); | 198 | | | 199 | | // Lower value of the current bucket. | 200 | 45 | const T* lower_value = &it->first; | 201 | | | 202 | | // Iterate over the ordered map of distinct values and their counts. | 203 | 206 | for (; it != ordered_map.end(); ++it) { | 204 | 161 | const auto count = it->second; | 205 | 161 | const auto current_value = it->first; | 206 | | | 207 | | // Update the bucket counts and track the number of distinct values assigned. | 208 | 161 | distinct_values_count++; | 209 | 161 | remaining_distinct_values--; | 210 | 161 | values_count += count; | 211 | 161 | cumulative_values += count; | 212 | | | 213 | | // Check whether the current value should be added to the current bucket. | 214 | 161 | auto next = std::next(it); | 215 | 161 | size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1; | 216 | | | 217 | 161 | if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets && | 218 | 161 | values_count + next->second <= bucket_max_values) { | 219 | | // If the current value is the last in the input map and there are more remaining | 220 | | // distinct values than empty buckets and adding the value does not cause the bucket | 221 | | // to exceed its max size, skip adding the value to the current bucket. | 222 | 28 | continue; | 223 | 28 | } | 224 | | | 225 | | // Finalize the current bucket and add it to our collection of buckets. | 226 | 133 | auto pre_sum = cumulative_values - values_count; | 227 | | | 228 | 133 | Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count, | 229 | 133 | pre_sum); | 230 | 133 | buckets.push_back(new_bucket); | 231 | | | 232 | | // Reset variables for the next bucket. | 233 | 133 | if (next != ordered_map.end()) { | 234 | 88 | lower_value = &next->first; | 235 | 88 | } | 236 | 133 | values_count = 0; | 237 | 133 | distinct_values_count = 0; | 238 | 133 | } | 239 | | | 240 | 45 | return true; | 241 | 49 | } |
Unexecuted instantiation: _ZN5doris15build_histogramINS_7DecimalIlEEEEbRSt6vectorINS_6BucketIT_EESaIS6_EERKSt3mapIS5_mSt4lessIS5_ESaISt4pairIKS5_mEEEm Unexecuted instantiation: _ZN5doris15build_histogramINS_12Decimal128V3EEEbRSt6vectorINS_6BucketIT_EESaIS5_EERKSt3mapIS4_mSt4lessIS4_ESaISt4pairIKS4_mEEEm Unexecuted instantiation: _ZN5doris15build_histogramINS_7DecimalIN4wide7integerILm256EiEEEEEEbRSt6vectorINS_6BucketIT_EESaIS9_EERKSt3mapIS8_mSt4lessIS8_ESaISt4pairIKS8_mEEEm _ZN5doris15build_histogramINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEEbRSt6vectorINS_6BucketIT_EESaISA_EERKSt3mapIS9_mSt4lessIS9_ESaISt4pairIKS9_mEEEm Line | Count | Source | 175 | 105 | const size_t max_num_buckets) { | 176 | | // If the input map is empty, there is nothing to build. | 177 | 105 | if (ordered_map.empty()) { | 178 | 5 | return false; | 179 | 5 | } | 180 | | | 181 | | // Calculate the maximum number of values that can be assigned to each bucket. | 182 | 100 | auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets); | 183 | | | 184 | | // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional | 185 | | // allocations when inserting buckets. | 186 | 100 | buckets.clear(); | 187 | 100 | buckets.reserve(max_num_buckets); | 188 | | | 189 | | // Initialize bucket variables. | 190 | 100 | size_t distinct_values_count = 0; | 191 | 100 | size_t values_count = 0; | 192 | 100 | size_t cumulative_values = 0; | 193 | | | 194 | | // Record how many values still need to be assigned. | 195 | 100 | auto remaining_distinct_values = ordered_map.size(); | 196 | | | 197 | 100 | auto it = ordered_map.begin(); | 198 | | | 199 | | // Lower value of the current bucket. | 200 | 100 | const T* lower_value = &it->first; | 201 | | | 202 | | // Iterate over the ordered map of distinct values and their counts. | 203 | 2.30k | for (; it != ordered_map.end(); ++it) { | 204 | 2.20k | const auto count = it->second; | 205 | 2.20k | const auto current_value = it->first; | 206 | | | 207 | | // Update the bucket counts and track the number of distinct values assigned. | 208 | 2.20k | distinct_values_count++; | 209 | 2.20k | remaining_distinct_values--; | 210 | 2.20k | values_count += count; | 211 | 2.20k | cumulative_values += count; | 212 | | | 213 | | // Check whether the current value should be added to the current bucket. | 214 | 2.20k | auto next = std::next(it); | 215 | 2.20k | size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1; | 216 | | | 217 | 2.20k | if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets && | 218 | 2.20k | values_count + next->second <= bucket_max_values) { | 219 | | // If the current value is the last in the input map and there are more remaining | 220 | | // distinct values than empty buckets and adding the value does not cause the bucket | 221 | | // to exceed its max size, skip adding the value to the current bucket. | 222 | 2.02k | continue; | 223 | 2.02k | } | 224 | | | 225 | | // Finalize the current bucket and add it to our collection of buckets. | 226 | 178 | auto pre_sum = cumulative_values - values_count; | 227 | | | 228 | 178 | Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count, | 229 | 178 | pre_sum); | 230 | 178 | buckets.push_back(new_bucket); | 231 | | | 232 | | // Reset variables for the next bucket. | 233 | 178 | if (next != ordered_map.end()) { | 234 | 78 | lower_value = &next->first; | 235 | 78 | } | 236 | 178 | values_count = 0; | 237 | 178 | distinct_values_count = 0; | 238 | 178 | } | 239 | | | 240 | 100 | return true; | 241 | 105 | } |
_ZN5doris15build_histogramINS_11DateV2ValueINS_15DateV2ValueTypeEEEEEbRSt6vectorINS_6BucketIT_EESaIS7_EERKSt3mapIS6_mSt4lessIS6_ESaISt4pairIKS6_mEEEm Line | Count | Source | 175 | 88 | const size_t max_num_buckets) { | 176 | | // If the input map is empty, there is nothing to build. | 177 | 88 | if (ordered_map.empty()) { | 178 | 7 | return false; | 179 | 7 | } | 180 | | | 181 | | // Calculate the maximum number of values that can be assigned to each bucket. | 182 | 81 | auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets); | 183 | | | 184 | | // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional | 185 | | // allocations when inserting buckets. | 186 | 81 | buckets.clear(); | 187 | 81 | buckets.reserve(max_num_buckets); | 188 | | | 189 | | // Initialize bucket variables. | 190 | 81 | size_t distinct_values_count = 0; | 191 | 81 | size_t values_count = 0; | 192 | 81 | size_t cumulative_values = 0; | 193 | | | 194 | | // Record how many values still need to be assigned. | 195 | 81 | auto remaining_distinct_values = ordered_map.size(); | 196 | | | 197 | 81 | auto it = ordered_map.begin(); | 198 | | | 199 | | // Lower value of the current bucket. | 200 | 81 | const T* lower_value = &it->first; | 201 | | | 202 | | // Iterate over the ordered map of distinct values and their counts. | 203 | 539 | for (; it != ordered_map.end(); ++it) { | 204 | 458 | const auto count = it->second; | 205 | 458 | const auto current_value = it->first; | 206 | | | 207 | | // Update the bucket counts and track the number of distinct values assigned. | 208 | 458 | distinct_values_count++; | 209 | 458 | remaining_distinct_values--; | 210 | 458 | values_count += count; | 211 | 458 | cumulative_values += count; | 212 | | | 213 | | // Check whether the current value should be added to the current bucket. | 214 | 458 | auto next = std::next(it); | 215 | 458 | size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1; | 216 | | | 217 | 458 | if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets && | 218 | 458 | values_count + next->second <= bucket_max_values) { | 219 | | // If the current value is the last in the input map and there are more remaining | 220 | | // distinct values than empty buckets and adding the value does not cause the bucket | 221 | | // to exceed its max size, skip adding the value to the current bucket. | 222 | 202 | continue; | 223 | 202 | } | 224 | | | 225 | | // Finalize the current bucket and add it to our collection of buckets. | 226 | 256 | auto pre_sum = cumulative_values - values_count; | 227 | | | 228 | 256 | Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count, | 229 | 256 | pre_sum); | 230 | 256 | buckets.push_back(new_bucket); | 231 | | | 232 | | // Reset variables for the next bucket. | 233 | 256 | if (next != ordered_map.end()) { | 234 | 175 | lower_value = &next->first; | 235 | 175 | } | 236 | 256 | values_count = 0; | 237 | 256 | distinct_values_count = 0; | 238 | 256 | } | 239 | | | 240 | 81 | return true; | 241 | 88 | } |
_ZN5doris15build_histogramINS_11DateV2ValueINS_19DateTimeV2ValueTypeEEEEEbRSt6vectorINS_6BucketIT_EESaIS7_EERKSt3mapIS6_mSt4lessIS6_ESaISt4pairIKS6_mEEEm Line | Count | Source | 175 | 88 | const size_t max_num_buckets) { | 176 | | // If the input map is empty, there is nothing to build. | 177 | 88 | if (ordered_map.empty()) { | 178 | 7 | return false; | 179 | 7 | } | 180 | | | 181 | | // Calculate the maximum number of values that can be assigned to each bucket. | 182 | 81 | auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets); | 183 | | | 184 | | // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional | 185 | | // allocations when inserting buckets. | 186 | 81 | buckets.clear(); | 187 | 81 | buckets.reserve(max_num_buckets); | 188 | | | 189 | | // Initialize bucket variables. | 190 | 81 | size_t distinct_values_count = 0; | 191 | 81 | size_t values_count = 0; | 192 | 81 | size_t cumulative_values = 0; | 193 | | | 194 | | // Record how many values still need to be assigned. | 195 | 81 | auto remaining_distinct_values = ordered_map.size(); | 196 | | | 197 | 81 | auto it = ordered_map.begin(); | 198 | | | 199 | | // Lower value of the current bucket. | 200 | 81 | const T* lower_value = &it->first; | 201 | | | 202 | | // Iterate over the ordered map of distinct values and their counts. | 203 | 538 | for (; it != ordered_map.end(); ++it) { | 204 | 457 | const auto count = it->second; | 205 | 457 | const auto current_value = it->first; | 206 | | | 207 | | // Update the bucket counts and track the number of distinct values assigned. | 208 | 457 | distinct_values_count++; | 209 | 457 | remaining_distinct_values--; | 210 | 457 | values_count += count; | 211 | 457 | cumulative_values += count; | 212 | | | 213 | | // Check whether the current value should be added to the current bucket. | 214 | 457 | auto next = std::next(it); | 215 | 457 | size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1; | 216 | | | 217 | 457 | if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets && | 218 | 457 | values_count + next->second <= bucket_max_values) { | 219 | | // If the current value is the last in the input map and there are more remaining | 220 | | // distinct values than empty buckets and adding the value does not cause the bucket | 221 | | // to exceed its max size, skip adding the value to the current bucket. | 222 | 202 | continue; | 223 | 202 | } | 224 | | | 225 | | // Finalize the current bucket and add it to our collection of buckets. | 226 | 255 | auto pre_sum = cumulative_values - values_count; | 227 | | | 228 | 255 | Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count, | 229 | 255 | pre_sum); | 230 | 255 | buckets.push_back(new_bucket); | 231 | | | 232 | | // Reset variables for the next bucket. | 233 | 255 | if (next != ordered_map.end()) { | 234 | 174 | lower_value = &next->first; | 235 | 174 | } | 236 | 255 | values_count = 0; | 237 | 255 | distinct_values_count = 0; | 238 | 255 | } | 239 | | | 240 | 81 | return true; | 241 | 88 | } |
|
242 | | |
243 | | template <typename T> |
244 | | bool histogram_to_json(rapidjson::StringBuffer& buffer, const std::vector<Bucket<T>>& buckets, |
245 | 702 | const DataTypePtr& data_type) { |
246 | 702 | rapidjson::Document doc; |
247 | 702 | doc.SetObject(); |
248 | 702 | rapidjson::Document::AllocatorType& allocator = doc.GetAllocator(); |
249 | | |
250 | 702 | int num_buckets = cast_set<int>(buckets.size()); |
251 | 702 | doc.AddMember("num_buckets", num_buckets, allocator); |
252 | | |
253 | 702 | rapidjson::Value bucket_arr(rapidjson::kArrayType); |
254 | 702 | bucket_arr.Reserve(num_buckets, allocator); |
255 | | |
256 | 702 | std::stringstream ss1; |
257 | 702 | std::stringstream ss2; |
258 | | |
259 | 702 | rapidjson::Value lower_val; |
260 | 702 | rapidjson::Value upper_val; |
261 | | |
262 | | // Convert bucket's lower and upper to 2 columns |
263 | 702 | MutableColumnPtr lower_column = data_type->create_column(); |
264 | 702 | MutableColumnPtr upper_column = data_type->create_column(); |
265 | 1.84k | for (const auto& bucket : buckets) { |
266 | | // String type is different, it has to pass in length |
267 | | // if it is string type , directly use string value |
268 | 1.84k | if constexpr (!std::is_same_v<T, std::string>) { |
269 | 1.66k | lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0); |
270 | 1.66k | upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0); |
271 | 1.66k | } |
272 | 1.84k | } |
273 | 702 | size_t row_num = 0; |
274 | | |
275 | 702 | auto format_options = DataTypeSerDe::get_default_format_options(); |
276 | 702 | auto time_zone = cctz::utc_time_zone(); |
277 | 702 | format_options.timezone = &time_zone; |
278 | | |
279 | 1.84k | for (const auto& bucket : buckets) { |
280 | 1.84k | if constexpr (std::is_same_v<T, std::string>) { |
281 | 178 | lower_val.SetString(bucket.lower.data(), |
282 | 178 | static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator); |
283 | 178 | upper_val.SetString(bucket.upper.data(), |
284 | 178 | static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator); |
285 | 1.66k | } else { |
286 | 1.66k | std::string lower_str = data_type->to_string(*lower_column, row_num, format_options); |
287 | 1.66k | std::string upper_str = data_type->to_string(*upper_column, row_num, format_options); |
288 | 1.66k | ++row_num; |
289 | 1.66k | lower_val.SetString(lower_str.data(), |
290 | 1.66k | static_cast<rapidjson::SizeType>(lower_str.size()), allocator); |
291 | 1.66k | upper_val.SetString(upper_str.data(), |
292 | 1.66k | static_cast<rapidjson::SizeType>(upper_str.size()), allocator); |
293 | 1.66k | } |
294 | 1.84k | rapidjson::Value bucket_json(rapidjson::kObjectType); |
295 | 1.84k | bucket_json.AddMember("lower", lower_val, allocator); |
296 | 1.84k | bucket_json.AddMember("upper", upper_val, allocator); |
297 | 1.84k | bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator); |
298 | 1.84k | bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator); |
299 | 1.84k | bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator); |
300 | | |
301 | 1.84k | bucket_arr.PushBack(bucket_json, allocator); |
302 | 1.84k | } |
303 | | |
304 | 702 | doc.AddMember("buckets", bucket_arr, allocator); |
305 | 702 | rapidjson::Writer<rapidjson::StringBuffer> writer(buffer); |
306 | 702 | doc.Accept(writer); |
307 | | |
308 | 702 | return !buckets.empty() && buffer.GetSize() > 0; |
309 | 702 | } _ZN5doris17histogram_to_jsonIhEEbRN9rapidjson19GenericStringBufferINS1_4UTF8IcEENS1_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISB_EERKSt10shared_ptrIKNS_9IDataTypeEE Line | Count | Source | 245 | 2 | const DataTypePtr& data_type) { | 246 | 2 | rapidjson::Document doc; | 247 | 2 | doc.SetObject(); | 248 | 2 | rapidjson::Document::AllocatorType& allocator = doc.GetAllocator(); | 249 | | | 250 | 2 | int num_buckets = cast_set<int>(buckets.size()); | 251 | 2 | doc.AddMember("num_buckets", num_buckets, allocator); | 252 | | | 253 | 2 | rapidjson::Value bucket_arr(rapidjson::kArrayType); | 254 | 2 | bucket_arr.Reserve(num_buckets, allocator); | 255 | | | 256 | 2 | std::stringstream ss1; | 257 | 2 | std::stringstream ss2; | 258 | | | 259 | 2 | rapidjson::Value lower_val; | 260 | 2 | rapidjson::Value upper_val; | 261 | | | 262 | | // Convert bucket's lower and upper to 2 columns | 263 | 2 | MutableColumnPtr lower_column = data_type->create_column(); | 264 | 2 | MutableColumnPtr upper_column = data_type->create_column(); | 265 | 2 | for (const auto& bucket : buckets) { | 266 | | // String type is different, it has to pass in length | 267 | | // if it is string type , directly use string value | 268 | 2 | if constexpr (!std::is_same_v<T, std::string>) { | 269 | 2 | lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0); | 270 | 2 | upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0); | 271 | 2 | } | 272 | 2 | } | 273 | 2 | size_t row_num = 0; | 274 | | | 275 | 2 | auto format_options = DataTypeSerDe::get_default_format_options(); | 276 | 2 | auto time_zone = cctz::utc_time_zone(); | 277 | 2 | format_options.timezone = &time_zone; | 278 | | | 279 | 2 | for (const auto& bucket : buckets) { | 280 | | if constexpr (std::is_same_v<T, std::string>) { | 281 | | lower_val.SetString(bucket.lower.data(), | 282 | | static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator); | 283 | | upper_val.SetString(bucket.upper.data(), | 284 | | static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator); | 285 | 2 | } else { | 286 | 2 | std::string lower_str = data_type->to_string(*lower_column, row_num, format_options); | 287 | 2 | std::string upper_str = data_type->to_string(*upper_column, row_num, format_options); | 288 | 2 | ++row_num; | 289 | 2 | lower_val.SetString(lower_str.data(), | 290 | 2 | static_cast<rapidjson::SizeType>(lower_str.size()), allocator); | 291 | 2 | upper_val.SetString(upper_str.data(), | 292 | 2 | static_cast<rapidjson::SizeType>(upper_str.size()), allocator); | 293 | 2 | } | 294 | 2 | rapidjson::Value bucket_json(rapidjson::kObjectType); | 295 | 2 | bucket_json.AddMember("lower", lower_val, allocator); | 296 | 2 | bucket_json.AddMember("upper", upper_val, allocator); | 297 | 2 | bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator); | 298 | 2 | bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator); | 299 | 2 | bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator); | 300 | | | 301 | 2 | bucket_arr.PushBack(bucket_json, allocator); | 302 | 2 | } | 303 | | | 304 | 2 | doc.AddMember("buckets", bucket_arr, allocator); | 305 | 2 | rapidjson::Writer<rapidjson::StringBuffer> writer(buffer); | 306 | 2 | doc.Accept(writer); | 307 | | | 308 | 2 | return !buckets.empty() && buffer.GetSize() > 0; | 309 | 2 | } |
_ZN5doris17histogram_to_jsonIaEEbRN9rapidjson19GenericStringBufferINS1_4UTF8IcEENS1_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISB_EERKSt10shared_ptrIKNS_9IDataTypeEE Line | Count | Source | 245 | 48 | const DataTypePtr& data_type) { | 246 | 48 | rapidjson::Document doc; | 247 | 48 | doc.SetObject(); | 248 | 48 | rapidjson::Document::AllocatorType& allocator = doc.GetAllocator(); | 249 | | | 250 | 48 | int num_buckets = cast_set<int>(buckets.size()); | 251 | 48 | doc.AddMember("num_buckets", num_buckets, allocator); | 252 | | | 253 | 48 | rapidjson::Value bucket_arr(rapidjson::kArrayType); | 254 | 48 | bucket_arr.Reserve(num_buckets, allocator); | 255 | | | 256 | 48 | std::stringstream ss1; | 257 | 48 | std::stringstream ss2; | 258 | | | 259 | 48 | rapidjson::Value lower_val; | 260 | 48 | rapidjson::Value upper_val; | 261 | | | 262 | | // Convert bucket's lower and upper to 2 columns | 263 | 48 | MutableColumnPtr lower_column = data_type->create_column(); | 264 | 48 | MutableColumnPtr upper_column = data_type->create_column(); | 265 | 129 | for (const auto& bucket : buckets) { | 266 | | // String type is different, it has to pass in length | 267 | | // if it is string type , directly use string value | 268 | 129 | if constexpr (!std::is_same_v<T, std::string>) { | 269 | 129 | lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0); | 270 | 129 | upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0); | 271 | 129 | } | 272 | 129 | } | 273 | 48 | size_t row_num = 0; | 274 | | | 275 | 48 | auto format_options = DataTypeSerDe::get_default_format_options(); | 276 | 48 | auto time_zone = cctz::utc_time_zone(); | 277 | 48 | format_options.timezone = &time_zone; | 278 | | | 279 | 129 | for (const auto& bucket : buckets) { | 280 | | if constexpr (std::is_same_v<T, std::string>) { | 281 | | lower_val.SetString(bucket.lower.data(), | 282 | | static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator); | 283 | | upper_val.SetString(bucket.upper.data(), | 284 | | static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator); | 285 | 129 | } else { | 286 | 129 | std::string lower_str = data_type->to_string(*lower_column, row_num, format_options); | 287 | 129 | std::string upper_str = data_type->to_string(*upper_column, row_num, format_options); | 288 | 129 | ++row_num; | 289 | 129 | lower_val.SetString(lower_str.data(), | 290 | 129 | static_cast<rapidjson::SizeType>(lower_str.size()), allocator); | 291 | 129 | upper_val.SetString(upper_str.data(), | 292 | 129 | static_cast<rapidjson::SizeType>(upper_str.size()), allocator); | 293 | 129 | } | 294 | 129 | rapidjson::Value bucket_json(rapidjson::kObjectType); | 295 | 129 | bucket_json.AddMember("lower", lower_val, allocator); | 296 | 129 | bucket_json.AddMember("upper", upper_val, allocator); | 297 | 129 | bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator); | 298 | 129 | bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator); | 299 | 129 | bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator); | 300 | | | 301 | 129 | bucket_arr.PushBack(bucket_json, allocator); | 302 | 129 | } | 303 | | | 304 | 48 | doc.AddMember("buckets", bucket_arr, allocator); | 305 | 48 | rapidjson::Writer<rapidjson::StringBuffer> writer(buffer); | 306 | 48 | doc.Accept(writer); | 307 | | | 308 | 48 | return !buckets.empty() && buffer.GetSize() > 0; | 309 | 48 | } |
_ZN5doris17histogram_to_jsonIsEEbRN9rapidjson19GenericStringBufferINS1_4UTF8IcEENS1_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISB_EERKSt10shared_ptrIKNS_9IDataTypeEE Line | Count | Source | 245 | 48 | const DataTypePtr& data_type) { | 246 | 48 | rapidjson::Document doc; | 247 | 48 | doc.SetObject(); | 248 | 48 | rapidjson::Document::AllocatorType& allocator = doc.GetAllocator(); | 249 | | | 250 | 48 | int num_buckets = cast_set<int>(buckets.size()); | 251 | 48 | doc.AddMember("num_buckets", num_buckets, allocator); | 252 | | | 253 | 48 | rapidjson::Value bucket_arr(rapidjson::kArrayType); | 254 | 48 | bucket_arr.Reserve(num_buckets, allocator); | 255 | | | 256 | 48 | std::stringstream ss1; | 257 | 48 | std::stringstream ss2; | 258 | | | 259 | 48 | rapidjson::Value lower_val; | 260 | 48 | rapidjson::Value upper_val; | 261 | | | 262 | | // Convert bucket's lower and upper to 2 columns | 263 | 48 | MutableColumnPtr lower_column = data_type->create_column(); | 264 | 48 | MutableColumnPtr upper_column = data_type->create_column(); | 265 | 132 | for (const auto& bucket : buckets) { | 266 | | // String type is different, it has to pass in length | 267 | | // if it is string type , directly use string value | 268 | 132 | if constexpr (!std::is_same_v<T, std::string>) { | 269 | 132 | lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0); | 270 | 132 | upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0); | 271 | 132 | } | 272 | 132 | } | 273 | 48 | size_t row_num = 0; | 274 | | | 275 | 48 | auto format_options = DataTypeSerDe::get_default_format_options(); | 276 | 48 | auto time_zone = cctz::utc_time_zone(); | 277 | 48 | format_options.timezone = &time_zone; | 278 | | | 279 | 132 | for (const auto& bucket : buckets) { | 280 | | if constexpr (std::is_same_v<T, std::string>) { | 281 | | lower_val.SetString(bucket.lower.data(), | 282 | | static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator); | 283 | | upper_val.SetString(bucket.upper.data(), | 284 | | static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator); | 285 | 132 | } else { | 286 | 132 | std::string lower_str = data_type->to_string(*lower_column, row_num, format_options); | 287 | 132 | std::string upper_str = data_type->to_string(*upper_column, row_num, format_options); | 288 | 132 | ++row_num; | 289 | 132 | lower_val.SetString(lower_str.data(), | 290 | 132 | static_cast<rapidjson::SizeType>(lower_str.size()), allocator); | 291 | 132 | upper_val.SetString(upper_str.data(), | 292 | 132 | static_cast<rapidjson::SizeType>(upper_str.size()), allocator); | 293 | 132 | } | 294 | 132 | rapidjson::Value bucket_json(rapidjson::kObjectType); | 295 | 132 | bucket_json.AddMember("lower", lower_val, allocator); | 296 | 132 | bucket_json.AddMember("upper", upper_val, allocator); | 297 | 132 | bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator); | 298 | 132 | bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator); | 299 | 132 | bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator); | 300 | | | 301 | 132 | bucket_arr.PushBack(bucket_json, allocator); | 302 | 132 | } | 303 | | | 304 | 48 | doc.AddMember("buckets", bucket_arr, allocator); | 305 | 48 | rapidjson::Writer<rapidjson::StringBuffer> writer(buffer); | 306 | 48 | doc.Accept(writer); | 307 | | | 308 | 48 | return !buckets.empty() && buffer.GetSize() > 0; | 309 | 48 | } |
_ZN5doris17histogram_to_jsonIiEEbRN9rapidjson19GenericStringBufferINS1_4UTF8IcEENS1_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISB_EERKSt10shared_ptrIKNS_9IDataTypeEE Line | Count | Source | 245 | 69 | const DataTypePtr& data_type) { | 246 | 69 | rapidjson::Document doc; | 247 | 69 | doc.SetObject(); | 248 | 69 | rapidjson::Document::AllocatorType& allocator = doc.GetAllocator(); | 249 | | | 250 | 69 | int num_buckets = cast_set<int>(buckets.size()); | 251 | 69 | doc.AddMember("num_buckets", num_buckets, allocator); | 252 | | | 253 | 69 | rapidjson::Value bucket_arr(rapidjson::kArrayType); | 254 | 69 | bucket_arr.Reserve(num_buckets, allocator); | 255 | | | 256 | 69 | std::stringstream ss1; | 257 | 69 | std::stringstream ss2; | 258 | | | 259 | 69 | rapidjson::Value lower_val; | 260 | 69 | rapidjson::Value upper_val; | 261 | | | 262 | | // Convert bucket's lower and upper to 2 columns | 263 | 69 | MutableColumnPtr lower_column = data_type->create_column(); | 264 | 69 | MutableColumnPtr upper_column = data_type->create_column(); | 265 | 149 | for (const auto& bucket : buckets) { | 266 | | // String type is different, it has to pass in length | 267 | | // if it is string type , directly use string value | 268 | 149 | if constexpr (!std::is_same_v<T, std::string>) { | 269 | 149 | lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0); | 270 | 149 | upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0); | 271 | 149 | } | 272 | 149 | } | 273 | 69 | size_t row_num = 0; | 274 | | | 275 | 69 | auto format_options = DataTypeSerDe::get_default_format_options(); | 276 | 69 | auto time_zone = cctz::utc_time_zone(); | 277 | 69 | format_options.timezone = &time_zone; | 278 | | | 279 | 149 | for (const auto& bucket : buckets) { | 280 | | if constexpr (std::is_same_v<T, std::string>) { | 281 | | lower_val.SetString(bucket.lower.data(), | 282 | | static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator); | 283 | | upper_val.SetString(bucket.upper.data(), | 284 | | static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator); | 285 | 149 | } else { | 286 | 149 | std::string lower_str = data_type->to_string(*lower_column, row_num, format_options); | 287 | 149 | std::string upper_str = data_type->to_string(*upper_column, row_num, format_options); | 288 | 149 | ++row_num; | 289 | 149 | lower_val.SetString(lower_str.data(), | 290 | 149 | static_cast<rapidjson::SizeType>(lower_str.size()), allocator); | 291 | 149 | upper_val.SetString(upper_str.data(), | 292 | 149 | static_cast<rapidjson::SizeType>(upper_str.size()), allocator); | 293 | 149 | } | 294 | 149 | rapidjson::Value bucket_json(rapidjson::kObjectType); | 295 | 149 | bucket_json.AddMember("lower", lower_val, allocator); | 296 | 149 | bucket_json.AddMember("upper", upper_val, allocator); | 297 | 149 | bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator); | 298 | 149 | bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator); | 299 | 149 | bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator); | 300 | | | 301 | 149 | bucket_arr.PushBack(bucket_json, allocator); | 302 | 149 | } | 303 | | | 304 | 69 | doc.AddMember("buckets", bucket_arr, allocator); | 305 | 69 | rapidjson::Writer<rapidjson::StringBuffer> writer(buffer); | 306 | 69 | doc.Accept(writer); | 307 | | | 308 | 69 | return !buckets.empty() && buffer.GetSize() > 0; | 309 | 69 | } |
_ZN5doris17histogram_to_jsonIlEEbRN9rapidjson19GenericStringBufferINS1_4UTF8IcEENS1_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISB_EERKSt10shared_ptrIKNS_9IDataTypeEE Line | Count | Source | 245 | 65 | const DataTypePtr& data_type) { | 246 | 65 | rapidjson::Document doc; | 247 | 65 | doc.SetObject(); | 248 | 65 | rapidjson::Document::AllocatorType& allocator = doc.GetAllocator(); | 249 | | | 250 | 65 | int num_buckets = cast_set<int>(buckets.size()); | 251 | 65 | doc.AddMember("num_buckets", num_buckets, allocator); | 252 | | | 253 | 65 | rapidjson::Value bucket_arr(rapidjson::kArrayType); | 254 | 65 | bucket_arr.Reserve(num_buckets, allocator); | 255 | | | 256 | 65 | std::stringstream ss1; | 257 | 65 | std::stringstream ss2; | 258 | | | 259 | 65 | rapidjson::Value lower_val; | 260 | 65 | rapidjson::Value upper_val; | 261 | | | 262 | | // Convert bucket's lower and upper to 2 columns | 263 | 65 | MutableColumnPtr lower_column = data_type->create_column(); | 264 | 65 | MutableColumnPtr upper_column = data_type->create_column(); | 265 | 212 | for (const auto& bucket : buckets) { | 266 | | // String type is different, it has to pass in length | 267 | | // if it is string type , directly use string value | 268 | 212 | if constexpr (!std::is_same_v<T, std::string>) { | 269 | 212 | lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0); | 270 | 212 | upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0); | 271 | 212 | } | 272 | 212 | } | 273 | 65 | size_t row_num = 0; | 274 | | | 275 | 65 | auto format_options = DataTypeSerDe::get_default_format_options(); | 276 | 65 | auto time_zone = cctz::utc_time_zone(); | 277 | 65 | format_options.timezone = &time_zone; | 278 | | | 279 | 212 | for (const auto& bucket : buckets) { | 280 | | if constexpr (std::is_same_v<T, std::string>) { | 281 | | lower_val.SetString(bucket.lower.data(), | 282 | | static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator); | 283 | | upper_val.SetString(bucket.upper.data(), | 284 | | static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator); | 285 | 212 | } else { | 286 | 212 | std::string lower_str = data_type->to_string(*lower_column, row_num, format_options); | 287 | 212 | std::string upper_str = data_type->to_string(*upper_column, row_num, format_options); | 288 | 212 | ++row_num; | 289 | 212 | lower_val.SetString(lower_str.data(), | 290 | 212 | static_cast<rapidjson::SizeType>(lower_str.size()), allocator); | 291 | 212 | upper_val.SetString(upper_str.data(), | 292 | 212 | static_cast<rapidjson::SizeType>(upper_str.size()), allocator); | 293 | 212 | } | 294 | 212 | rapidjson::Value bucket_json(rapidjson::kObjectType); | 295 | 212 | bucket_json.AddMember("lower", lower_val, allocator); | 296 | 212 | bucket_json.AddMember("upper", upper_val, allocator); | 297 | 212 | bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator); | 298 | 212 | bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator); | 299 | 212 | bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator); | 300 | | | 301 | 212 | bucket_arr.PushBack(bucket_json, allocator); | 302 | 212 | } | 303 | | | 304 | 65 | doc.AddMember("buckets", bucket_arr, allocator); | 305 | 65 | rapidjson::Writer<rapidjson::StringBuffer> writer(buffer); | 306 | 65 | doc.Accept(writer); | 307 | | | 308 | 65 | return !buckets.empty() && buffer.GetSize() > 0; | 309 | 65 | } |
_ZN5doris17histogram_to_jsonInEEbRN9rapidjson19GenericStringBufferINS1_4UTF8IcEENS1_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISB_EERKSt10shared_ptrIKNS_9IDataTypeEE Line | Count | Source | 245 | 48 | const DataTypePtr& data_type) { | 246 | 48 | rapidjson::Document doc; | 247 | 48 | doc.SetObject(); | 248 | 48 | rapidjson::Document::AllocatorType& allocator = doc.GetAllocator(); | 249 | | | 250 | 48 | int num_buckets = cast_set<int>(buckets.size()); | 251 | 48 | doc.AddMember("num_buckets", num_buckets, allocator); | 252 | | | 253 | 48 | rapidjson::Value bucket_arr(rapidjson::kArrayType); | 254 | 48 | bucket_arr.Reserve(num_buckets, allocator); | 255 | | | 256 | 48 | std::stringstream ss1; | 257 | 48 | std::stringstream ss2; | 258 | | | 259 | 48 | rapidjson::Value lower_val; | 260 | 48 | rapidjson::Value upper_val; | 261 | | | 262 | | // Convert bucket's lower and upper to 2 columns | 263 | 48 | MutableColumnPtr lower_column = data_type->create_column(); | 264 | 48 | MutableColumnPtr upper_column = data_type->create_column(); | 265 | 141 | for (const auto& bucket : buckets) { | 266 | | // String type is different, it has to pass in length | 267 | | // if it is string type , directly use string value | 268 | 141 | if constexpr (!std::is_same_v<T, std::string>) { | 269 | 141 | lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0); | 270 | 141 | upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0); | 271 | 141 | } | 272 | 141 | } | 273 | 48 | size_t row_num = 0; | 274 | | | 275 | 48 | auto format_options = DataTypeSerDe::get_default_format_options(); | 276 | 48 | auto time_zone = cctz::utc_time_zone(); | 277 | 48 | format_options.timezone = &time_zone; | 278 | | | 279 | 141 | for (const auto& bucket : buckets) { | 280 | | if constexpr (std::is_same_v<T, std::string>) { | 281 | | lower_val.SetString(bucket.lower.data(), | 282 | | static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator); | 283 | | upper_val.SetString(bucket.upper.data(), | 284 | | static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator); | 285 | 141 | } else { | 286 | 141 | std::string lower_str = data_type->to_string(*lower_column, row_num, format_options); | 287 | 141 | std::string upper_str = data_type->to_string(*upper_column, row_num, format_options); | 288 | 141 | ++row_num; | 289 | 141 | lower_val.SetString(lower_str.data(), | 290 | 141 | static_cast<rapidjson::SizeType>(lower_str.size()), allocator); | 291 | 141 | upper_val.SetString(upper_str.data(), | 292 | 141 | static_cast<rapidjson::SizeType>(upper_str.size()), allocator); | 293 | 141 | } | 294 | 141 | rapidjson::Value bucket_json(rapidjson::kObjectType); | 295 | 141 | bucket_json.AddMember("lower", lower_val, allocator); | 296 | 141 | bucket_json.AddMember("upper", upper_val, allocator); | 297 | 141 | bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator); | 298 | 141 | bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator); | 299 | 141 | bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator); | 300 | | | 301 | 141 | bucket_arr.PushBack(bucket_json, allocator); | 302 | 141 | } | 303 | | | 304 | 48 | doc.AddMember("buckets", bucket_arr, allocator); | 305 | 48 | rapidjson::Writer<rapidjson::StringBuffer> writer(buffer); | 306 | 48 | doc.Accept(writer); | 307 | | | 308 | 48 | return !buckets.empty() && buffer.GetSize() > 0; | 309 | 48 | } |
_ZN5doris17histogram_to_jsonIfEEbRN9rapidjson19GenericStringBufferINS1_4UTF8IcEENS1_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISB_EERKSt10shared_ptrIKNS_9IDataTypeEE Line | Count | Source | 245 | 46 | const DataTypePtr& data_type) { | 246 | 46 | rapidjson::Document doc; | 247 | 46 | doc.SetObject(); | 248 | 46 | rapidjson::Document::AllocatorType& allocator = doc.GetAllocator(); | 249 | | | 250 | 46 | int num_buckets = cast_set<int>(buckets.size()); | 251 | 46 | doc.AddMember("num_buckets", num_buckets, allocator); | 252 | | | 253 | 46 | rapidjson::Value bucket_arr(rapidjson::kArrayType); | 254 | 46 | bucket_arr.Reserve(num_buckets, allocator); | 255 | | | 256 | 46 | std::stringstream ss1; | 257 | 46 | std::stringstream ss2; | 258 | | | 259 | 46 | rapidjson::Value lower_val; | 260 | 46 | rapidjson::Value upper_val; | 261 | | | 262 | | // Convert bucket's lower and upper to 2 columns | 263 | 46 | MutableColumnPtr lower_column = data_type->create_column(); | 264 | 46 | MutableColumnPtr upper_column = data_type->create_column(); | 265 | 128 | for (const auto& bucket : buckets) { | 266 | | // String type is different, it has to pass in length | 267 | | // if it is string type , directly use string value | 268 | 128 | if constexpr (!std::is_same_v<T, std::string>) { | 269 | 128 | lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0); | 270 | 128 | upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0); | 271 | 128 | } | 272 | 128 | } | 273 | 46 | size_t row_num = 0; | 274 | | | 275 | 46 | auto format_options = DataTypeSerDe::get_default_format_options(); | 276 | 46 | auto time_zone = cctz::utc_time_zone(); | 277 | 46 | format_options.timezone = &time_zone; | 278 | | | 279 | 128 | for (const auto& bucket : buckets) { | 280 | | if constexpr (std::is_same_v<T, std::string>) { | 281 | | lower_val.SetString(bucket.lower.data(), | 282 | | static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator); | 283 | | upper_val.SetString(bucket.upper.data(), | 284 | | static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator); | 285 | 128 | } else { | 286 | 128 | std::string lower_str = data_type->to_string(*lower_column, row_num, format_options); | 287 | 128 | std::string upper_str = data_type->to_string(*upper_column, row_num, format_options); | 288 | 128 | ++row_num; | 289 | 128 | lower_val.SetString(lower_str.data(), | 290 | 128 | static_cast<rapidjson::SizeType>(lower_str.size()), allocator); | 291 | 128 | upper_val.SetString(upper_str.data(), | 292 | 128 | static_cast<rapidjson::SizeType>(upper_str.size()), allocator); | 293 | 128 | } | 294 | 128 | rapidjson::Value bucket_json(rapidjson::kObjectType); | 295 | 128 | bucket_json.AddMember("lower", lower_val, allocator); | 296 | 128 | bucket_json.AddMember("upper", upper_val, allocator); | 297 | 128 | bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator); | 298 | 128 | bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator); | 299 | 128 | bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator); | 300 | | | 301 | 128 | bucket_arr.PushBack(bucket_json, allocator); | 302 | 128 | } | 303 | | | 304 | 46 | doc.AddMember("buckets", bucket_arr, allocator); | 305 | 46 | rapidjson::Writer<rapidjson::StringBuffer> writer(buffer); | 306 | 46 | doc.Accept(writer); | 307 | | | 308 | 46 | return !buckets.empty() && buffer.GetSize() > 0; | 309 | 46 | } |
_ZN5doris17histogram_to_jsonIdEEbRN9rapidjson19GenericStringBufferINS1_4UTF8IcEENS1_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISB_EERKSt10shared_ptrIKNS_9IDataTypeEE Line | Count | Source | 245 | 46 | const DataTypePtr& data_type) { | 246 | 46 | rapidjson::Document doc; | 247 | 46 | doc.SetObject(); | 248 | 46 | rapidjson::Document::AllocatorType& allocator = doc.GetAllocator(); | 249 | | | 250 | 46 | int num_buckets = cast_set<int>(buckets.size()); | 251 | 46 | doc.AddMember("num_buckets", num_buckets, allocator); | 252 | | | 253 | 46 | rapidjson::Value bucket_arr(rapidjson::kArrayType); | 254 | 46 | bucket_arr.Reserve(num_buckets, allocator); | 255 | | | 256 | 46 | std::stringstream ss1; | 257 | 46 | std::stringstream ss2; | 258 | | | 259 | 46 | rapidjson::Value lower_val; | 260 | 46 | rapidjson::Value upper_val; | 261 | | | 262 | | // Convert bucket's lower and upper to 2 columns | 263 | 46 | MutableColumnPtr lower_column = data_type->create_column(); | 264 | 46 | MutableColumnPtr upper_column = data_type->create_column(); | 265 | 128 | for (const auto& bucket : buckets) { | 266 | | // String type is different, it has to pass in length | 267 | | // if it is string type , directly use string value | 268 | 128 | if constexpr (!std::is_same_v<T, std::string>) { | 269 | 128 | lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0); | 270 | 128 | upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0); | 271 | 128 | } | 272 | 128 | } | 273 | 46 | size_t row_num = 0; | 274 | | | 275 | 46 | auto format_options = DataTypeSerDe::get_default_format_options(); | 276 | 46 | auto time_zone = cctz::utc_time_zone(); | 277 | 46 | format_options.timezone = &time_zone; | 278 | | | 279 | 128 | for (const auto& bucket : buckets) { | 280 | | if constexpr (std::is_same_v<T, std::string>) { | 281 | | lower_val.SetString(bucket.lower.data(), | 282 | | static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator); | 283 | | upper_val.SetString(bucket.upper.data(), | 284 | | static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator); | 285 | 128 | } else { | 286 | 128 | std::string lower_str = data_type->to_string(*lower_column, row_num, format_options); | 287 | 128 | std::string upper_str = data_type->to_string(*upper_column, row_num, format_options); | 288 | 128 | ++row_num; | 289 | 128 | lower_val.SetString(lower_str.data(), | 290 | 128 | static_cast<rapidjson::SizeType>(lower_str.size()), allocator); | 291 | 128 | upper_val.SetString(upper_str.data(), | 292 | 128 | static_cast<rapidjson::SizeType>(upper_str.size()), allocator); | 293 | 128 | } | 294 | 128 | rapidjson::Value bucket_json(rapidjson::kObjectType); | 295 | 128 | bucket_json.AddMember("lower", lower_val, allocator); | 296 | 128 | bucket_json.AddMember("upper", upper_val, allocator); | 297 | 128 | bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator); | 298 | 128 | bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator); | 299 | 128 | bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator); | 300 | | | 301 | 128 | bucket_arr.PushBack(bucket_json, allocator); | 302 | 128 | } | 303 | | | 304 | 46 | doc.AddMember("buckets", bucket_arr, allocator); | 305 | 46 | rapidjson::Writer<rapidjson::StringBuffer> writer(buffer); | 306 | 46 | doc.Accept(writer); | 307 | | | 308 | 46 | return !buckets.empty() && buffer.GetSize() > 0; | 309 | 46 | } |
_ZN5doris17histogram_to_jsonINS_7DecimalIiEEEEbRN9rapidjson19GenericStringBufferINS3_4UTF8IcEENS3_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISD_EERKSt10shared_ptrIKNS_9IDataTypeEE Line | Count | Source | 245 | 49 | const DataTypePtr& data_type) { | 246 | 49 | rapidjson::Document doc; | 247 | 49 | doc.SetObject(); | 248 | 49 | rapidjson::Document::AllocatorType& allocator = doc.GetAllocator(); | 249 | | | 250 | 49 | int num_buckets = cast_set<int>(buckets.size()); | 251 | 49 | doc.AddMember("num_buckets", num_buckets, allocator); | 252 | | | 253 | 49 | rapidjson::Value bucket_arr(rapidjson::kArrayType); | 254 | 49 | bucket_arr.Reserve(num_buckets, allocator); | 255 | | | 256 | 49 | std::stringstream ss1; | 257 | 49 | std::stringstream ss2; | 258 | | | 259 | 49 | rapidjson::Value lower_val; | 260 | 49 | rapidjson::Value upper_val; | 261 | | | 262 | | // Convert bucket's lower and upper to 2 columns | 263 | 49 | MutableColumnPtr lower_column = data_type->create_column(); | 264 | 49 | MutableColumnPtr upper_column = data_type->create_column(); | 265 | 133 | for (const auto& bucket : buckets) { | 266 | | // String type is different, it has to pass in length | 267 | | // if it is string type , directly use string value | 268 | 133 | if constexpr (!std::is_same_v<T, std::string>) { | 269 | 133 | lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0); | 270 | 133 | upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0); | 271 | 133 | } | 272 | 133 | } | 273 | 49 | size_t row_num = 0; | 274 | | | 275 | 49 | auto format_options = DataTypeSerDe::get_default_format_options(); | 276 | 49 | auto time_zone = cctz::utc_time_zone(); | 277 | 49 | format_options.timezone = &time_zone; | 278 | | | 279 | 133 | for (const auto& bucket : buckets) { | 280 | | if constexpr (std::is_same_v<T, std::string>) { | 281 | | lower_val.SetString(bucket.lower.data(), | 282 | | static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator); | 283 | | upper_val.SetString(bucket.upper.data(), | 284 | | static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator); | 285 | 133 | } else { | 286 | 133 | std::string lower_str = data_type->to_string(*lower_column, row_num, format_options); | 287 | 133 | std::string upper_str = data_type->to_string(*upper_column, row_num, format_options); | 288 | 133 | ++row_num; | 289 | 133 | lower_val.SetString(lower_str.data(), | 290 | 133 | static_cast<rapidjson::SizeType>(lower_str.size()), allocator); | 291 | 133 | upper_val.SetString(upper_str.data(), | 292 | 133 | static_cast<rapidjson::SizeType>(upper_str.size()), allocator); | 293 | 133 | } | 294 | 133 | rapidjson::Value bucket_json(rapidjson::kObjectType); | 295 | 133 | bucket_json.AddMember("lower", lower_val, allocator); | 296 | 133 | bucket_json.AddMember("upper", upper_val, allocator); | 297 | 133 | bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator); | 298 | 133 | bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator); | 299 | 133 | bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator); | 300 | | | 301 | 133 | bucket_arr.PushBack(bucket_json, allocator); | 302 | 133 | } | 303 | | | 304 | 49 | doc.AddMember("buckets", bucket_arr, allocator); | 305 | 49 | rapidjson::Writer<rapidjson::StringBuffer> writer(buffer); | 306 | 49 | doc.Accept(writer); | 307 | | | 308 | 49 | return !buckets.empty() && buffer.GetSize() > 0; | 309 | 49 | } |
Unexecuted instantiation: _ZN5doris17histogram_to_jsonINS_7DecimalIlEEEEbRN9rapidjson19GenericStringBufferINS3_4UTF8IcEENS3_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISD_EERKSt10shared_ptrIKNS_9IDataTypeEE Unexecuted instantiation: _ZN5doris17histogram_to_jsonINS_12Decimal128V3EEEbRN9rapidjson19GenericStringBufferINS2_4UTF8IcEENS2_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISC_EERKSt10shared_ptrIKNS_9IDataTypeEE Unexecuted instantiation: _ZN5doris17histogram_to_jsonINS_7DecimalIN4wide7integerILm256EiEEEEEEbRN9rapidjson19GenericStringBufferINS6_4UTF8IcEENS6_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISG_EERKSt10shared_ptrIKNS_9IDataTypeEE _ZN5doris17histogram_to_jsonINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEEbRN9rapidjson19GenericStringBufferINS7_4UTF8IcEENS7_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISH_EERKSt10shared_ptrIKNS_9IDataTypeEE Line | Count | Source | 245 | 105 | const DataTypePtr& data_type) { | 246 | 105 | rapidjson::Document doc; | 247 | 105 | doc.SetObject(); | 248 | 105 | rapidjson::Document::AllocatorType& allocator = doc.GetAllocator(); | 249 | | | 250 | 105 | int num_buckets = cast_set<int>(buckets.size()); | 251 | 105 | doc.AddMember("num_buckets", num_buckets, allocator); | 252 | | | 253 | 105 | rapidjson::Value bucket_arr(rapidjson::kArrayType); | 254 | 105 | bucket_arr.Reserve(num_buckets, allocator); | 255 | | | 256 | 105 | std::stringstream ss1; | 257 | 105 | std::stringstream ss2; | 258 | | | 259 | 105 | rapidjson::Value lower_val; | 260 | 105 | rapidjson::Value upper_val; | 261 | | | 262 | | // Convert bucket's lower and upper to 2 columns | 263 | 105 | MutableColumnPtr lower_column = data_type->create_column(); | 264 | 105 | MutableColumnPtr upper_column = data_type->create_column(); | 265 | 178 | for (const auto& bucket : buckets) { | 266 | | // String type is different, it has to pass in length | 267 | | // if it is string type , directly use string value | 268 | | if constexpr (!std::is_same_v<T, std::string>) { | 269 | | lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0); | 270 | | upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0); | 271 | | } | 272 | 178 | } | 273 | 105 | size_t row_num = 0; | 274 | | | 275 | 105 | auto format_options = DataTypeSerDe::get_default_format_options(); | 276 | 105 | auto time_zone = cctz::utc_time_zone(); | 277 | 105 | format_options.timezone = &time_zone; | 278 | | | 279 | 178 | for (const auto& bucket : buckets) { | 280 | 178 | if constexpr (std::is_same_v<T, std::string>) { | 281 | 178 | lower_val.SetString(bucket.lower.data(), | 282 | 178 | static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator); | 283 | 178 | upper_val.SetString(bucket.upper.data(), | 284 | 178 | static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator); | 285 | | } else { | 286 | | std::string lower_str = data_type->to_string(*lower_column, row_num, format_options); | 287 | | std::string upper_str = data_type->to_string(*upper_column, row_num, format_options); | 288 | | ++row_num; | 289 | | lower_val.SetString(lower_str.data(), | 290 | | static_cast<rapidjson::SizeType>(lower_str.size()), allocator); | 291 | | upper_val.SetString(upper_str.data(), | 292 | | static_cast<rapidjson::SizeType>(upper_str.size()), allocator); | 293 | | } | 294 | 178 | rapidjson::Value bucket_json(rapidjson::kObjectType); | 295 | 178 | bucket_json.AddMember("lower", lower_val, allocator); | 296 | 178 | bucket_json.AddMember("upper", upper_val, allocator); | 297 | 178 | bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator); | 298 | 178 | bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator); | 299 | 178 | bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator); | 300 | | | 301 | 178 | bucket_arr.PushBack(bucket_json, allocator); | 302 | 178 | } | 303 | | | 304 | 105 | doc.AddMember("buckets", bucket_arr, allocator); | 305 | 105 | rapidjson::Writer<rapidjson::StringBuffer> writer(buffer); | 306 | 105 | doc.Accept(writer); | 307 | | | 308 | 105 | return !buckets.empty() && buffer.GetSize() > 0; | 309 | 105 | } |
_ZN5doris17histogram_to_jsonINS_11DateV2ValueINS_15DateV2ValueTypeEEEEEbRN9rapidjson19GenericStringBufferINS4_4UTF8IcEENS4_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISE_EERKSt10shared_ptrIKNS_9IDataTypeEE Line | Count | Source | 245 | 88 | const DataTypePtr& data_type) { | 246 | 88 | rapidjson::Document doc; | 247 | 88 | doc.SetObject(); | 248 | 88 | rapidjson::Document::AllocatorType& allocator = doc.GetAllocator(); | 249 | | | 250 | 88 | int num_buckets = cast_set<int>(buckets.size()); | 251 | 88 | doc.AddMember("num_buckets", num_buckets, allocator); | 252 | | | 253 | 88 | rapidjson::Value bucket_arr(rapidjson::kArrayType); | 254 | 88 | bucket_arr.Reserve(num_buckets, allocator); | 255 | | | 256 | 88 | std::stringstream ss1; | 257 | 88 | std::stringstream ss2; | 258 | | | 259 | 88 | rapidjson::Value lower_val; | 260 | 88 | rapidjson::Value upper_val; | 261 | | | 262 | | // Convert bucket's lower and upper to 2 columns | 263 | 88 | MutableColumnPtr lower_column = data_type->create_column(); | 264 | 88 | MutableColumnPtr upper_column = data_type->create_column(); | 265 | 256 | for (const auto& bucket : buckets) { | 266 | | // String type is different, it has to pass in length | 267 | | // if it is string type , directly use string value | 268 | 256 | if constexpr (!std::is_same_v<T, std::string>) { | 269 | 256 | lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0); | 270 | 256 | upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0); | 271 | 256 | } | 272 | 256 | } | 273 | 88 | size_t row_num = 0; | 274 | | | 275 | 88 | auto format_options = DataTypeSerDe::get_default_format_options(); | 276 | 88 | auto time_zone = cctz::utc_time_zone(); | 277 | 88 | format_options.timezone = &time_zone; | 278 | | | 279 | 256 | for (const auto& bucket : buckets) { | 280 | | if constexpr (std::is_same_v<T, std::string>) { | 281 | | lower_val.SetString(bucket.lower.data(), | 282 | | static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator); | 283 | | upper_val.SetString(bucket.upper.data(), | 284 | | static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator); | 285 | 256 | } else { | 286 | 256 | std::string lower_str = data_type->to_string(*lower_column, row_num, format_options); | 287 | 256 | std::string upper_str = data_type->to_string(*upper_column, row_num, format_options); | 288 | 256 | ++row_num; | 289 | 256 | lower_val.SetString(lower_str.data(), | 290 | 256 | static_cast<rapidjson::SizeType>(lower_str.size()), allocator); | 291 | 256 | upper_val.SetString(upper_str.data(), | 292 | 256 | static_cast<rapidjson::SizeType>(upper_str.size()), allocator); | 293 | 256 | } | 294 | 256 | rapidjson::Value bucket_json(rapidjson::kObjectType); | 295 | 256 | bucket_json.AddMember("lower", lower_val, allocator); | 296 | 256 | bucket_json.AddMember("upper", upper_val, allocator); | 297 | 256 | bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator); | 298 | 256 | bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator); | 299 | 256 | bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator); | 300 | | | 301 | 256 | bucket_arr.PushBack(bucket_json, allocator); | 302 | 256 | } | 303 | | | 304 | 88 | doc.AddMember("buckets", bucket_arr, allocator); | 305 | 88 | rapidjson::Writer<rapidjson::StringBuffer> writer(buffer); | 306 | 88 | doc.Accept(writer); | 307 | | | 308 | 88 | return !buckets.empty() && buffer.GetSize() > 0; | 309 | 88 | } |
_ZN5doris17histogram_to_jsonINS_11DateV2ValueINS_19DateTimeV2ValueTypeEEEEEbRN9rapidjson19GenericStringBufferINS4_4UTF8IcEENS4_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISE_EERKSt10shared_ptrIKNS_9IDataTypeEE Line | Count | Source | 245 | 88 | const DataTypePtr& data_type) { | 246 | 88 | rapidjson::Document doc; | 247 | 88 | doc.SetObject(); | 248 | 88 | rapidjson::Document::AllocatorType& allocator = doc.GetAllocator(); | 249 | | | 250 | 88 | int num_buckets = cast_set<int>(buckets.size()); | 251 | 88 | doc.AddMember("num_buckets", num_buckets, allocator); | 252 | | | 253 | 88 | rapidjson::Value bucket_arr(rapidjson::kArrayType); | 254 | 88 | bucket_arr.Reserve(num_buckets, allocator); | 255 | | | 256 | 88 | std::stringstream ss1; | 257 | 88 | std::stringstream ss2; | 258 | | | 259 | 88 | rapidjson::Value lower_val; | 260 | 88 | rapidjson::Value upper_val; | 261 | | | 262 | | // Convert bucket's lower and upper to 2 columns | 263 | 88 | MutableColumnPtr lower_column = data_type->create_column(); | 264 | 88 | MutableColumnPtr upper_column = data_type->create_column(); | 265 | 255 | for (const auto& bucket : buckets) { | 266 | | // String type is different, it has to pass in length | 267 | | // if it is string type , directly use string value | 268 | 255 | if constexpr (!std::is_same_v<T, std::string>) { | 269 | 255 | lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0); | 270 | 255 | upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0); | 271 | 255 | } | 272 | 255 | } | 273 | 88 | size_t row_num = 0; | 274 | | | 275 | 88 | auto format_options = DataTypeSerDe::get_default_format_options(); | 276 | 88 | auto time_zone = cctz::utc_time_zone(); | 277 | 88 | format_options.timezone = &time_zone; | 278 | | | 279 | 255 | for (const auto& bucket : buckets) { | 280 | | if constexpr (std::is_same_v<T, std::string>) { | 281 | | lower_val.SetString(bucket.lower.data(), | 282 | | static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator); | 283 | | upper_val.SetString(bucket.upper.data(), | 284 | | static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator); | 285 | 255 | } else { | 286 | 255 | std::string lower_str = data_type->to_string(*lower_column, row_num, format_options); | 287 | 255 | std::string upper_str = data_type->to_string(*upper_column, row_num, format_options); | 288 | 255 | ++row_num; | 289 | 255 | lower_val.SetString(lower_str.data(), | 290 | 255 | static_cast<rapidjson::SizeType>(lower_str.size()), allocator); | 291 | 255 | upper_val.SetString(upper_str.data(), | 292 | 255 | static_cast<rapidjson::SizeType>(upper_str.size()), allocator); | 293 | 255 | } | 294 | 255 | rapidjson::Value bucket_json(rapidjson::kObjectType); | 295 | 255 | bucket_json.AddMember("lower", lower_val, allocator); | 296 | 255 | bucket_json.AddMember("upper", upper_val, allocator); | 297 | 255 | bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator); | 298 | 255 | bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator); | 299 | 255 | bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator); | 300 | | | 301 | 255 | bucket_arr.PushBack(bucket_json, allocator); | 302 | 255 | } | 303 | | | 304 | 88 | doc.AddMember("buckets", bucket_arr, allocator); | 305 | 88 | rapidjson::Writer<rapidjson::StringBuffer> writer(buffer); | 306 | 88 | doc.Accept(writer); | 307 | | | 308 | 88 | return !buckets.empty() && buffer.GetSize() > 0; | 309 | 88 | } |
|
310 | | } // namespace doris |