be/src/exec/common/histogram_helpers.hpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #pragma once |
19 | | |
20 | | #include <rapidjson/document.h> |
21 | | #include <rapidjson/prettywriter.h> |
22 | | #include <rapidjson/stringbuffer.h> |
23 | | |
24 | | #include <boost/dynamic_bitset.hpp> |
25 | | |
26 | | #include "common/cast_set.h" |
27 | | #include "core/data_type/data_type_decimal.h" |
28 | | #include "util/io_helper.h" |
29 | | |
30 | | namespace doris { |
31 | | #include "common/compile_check_begin.h" |
32 | | template <typename T> |
33 | | struct Bucket { |
34 | | public: |
35 | | Bucket() = default; |
36 | | Bucket(T lower, T upper, size_t ndv, size_t count, size_t pre_sum) |
37 | 1.85k | : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {}_ZN5doris6BucketINS_7DecimalIiEEEC2ES2_S2_mmm Line | Count | Source | 37 | 133 | : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {} |
Unexecuted instantiation: _ZN5doris6BucketINS_7DecimalIlEEEC2ES2_S2_mmm Unexecuted instantiation: _ZN5doris6BucketINS_12Decimal128V3EEC2ES1_S1_mmm Unexecuted instantiation: _ZN5doris6BucketINS_7DecimalIN4wide7integerILm256EiEEEEEC2ES5_S5_mmm _ZN5doris6BucketINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEC2ES6_S6_mmm Line | Count | Source | 37 | 178 | : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {} |
_ZN5doris6BucketINS_11DateV2ValueINS_15DateV2ValueTypeEEEEC2ES3_S3_mmm Line | Count | Source | 37 | 256 | : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {} |
_ZN5doris6BucketINS_11DateV2ValueINS_19DateTimeV2ValueTypeEEEEC2ES3_S3_mmm Line | Count | Source | 37 | 255 | : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {} |
_ZN5doris6BucketIhEC2Ehhmmm Line | Count | Source | 37 | 2 | : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {} |
_ZN5doris6BucketIaEC2Eaammm Line | Count | Source | 37 | 129 | : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {} |
_ZN5doris6BucketIsEC2Essmmm Line | Count | Source | 37 | 132 | : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {} |
_ZN5doris6BucketIiEC2Eiimmm Line | Count | Source | 37 | 163 | : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {} |
_ZN5doris6BucketIlEC2Ellmmm Line | Count | Source | 37 | 212 | : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {} |
_ZN5doris6BucketInEC2Ennmmm Line | Count | Source | 37 | 141 | : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {} |
_ZN5doris6BucketIfEC2Effmmm Line | Count | Source | 37 | 128 | : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {} |
_ZN5doris6BucketIdEC2Eddmmm Line | Count | Source | 37 | 128 | : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {} |
|
38 | | |
39 | | T lower; |
40 | | T upper; |
41 | | size_t ndv; |
42 | | size_t count; |
43 | | size_t pre_sum; |
44 | | }; |
45 | | |
46 | | /** |
47 | | * Checks if it is possible to assign the provided value_map to the given |
48 | | * number of buckets such that no bucket has a size larger than max_bucket_size. |
49 | | * |
50 | | * @param value_map A mapping of values to their counts. |
51 | | * @param max_bucket_size The maximum size that any bucket is allowed to have. |
52 | | * @param num_buckets The number of buckets that we want to assign values to. |
53 | | * |
54 | | * @return true if the values can be assigned to the buckets, false otherwise. |
55 | | */ |
56 | | template <typename T> |
57 | | bool can_assign_into_buckets(const std::map<T, size_t>& value_map, const size_t max_bucket_size, |
58 | 505 | const size_t num_buckets) { |
59 | 505 | if (value_map.empty()) { |
60 | 1 | return false; |
61 | 504 | }; |
62 | | |
63 | 504 | size_t used_buckets = 1; |
64 | 504 | size_t current_bucket_size = 0; |
65 | | |
66 | 30.1k | for (const auto& [value, count] : value_map) { |
67 | 30.1k | current_bucket_size += count; |
68 | | |
69 | | // If adding the current value to the current bucket would exceed max_bucket_size, |
70 | | // then we start a new bucket. |
71 | 30.1k | if (current_bucket_size > max_bucket_size) { |
72 | 1.14k | ++used_buckets; |
73 | 1.14k | current_bucket_size = count; |
74 | 1.14k | } |
75 | | |
76 | | // If we have used more buckets than num_buckets, we cannot assign the values to buckets. |
77 | 30.1k | if (used_buckets > num_buckets) { |
78 | 222 | return false; |
79 | 222 | } |
80 | 30.1k | } |
81 | | |
82 | 282 | return true; |
83 | 504 | } Unexecuted instantiation: _ZN5doris23can_assign_into_bucketsIhEEbRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEmm _ZN5doris23can_assign_into_bucketsIaEEbRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEmm Line | Count | Source | 58 | 27 | const size_t num_buckets) { | 59 | 27 | if (value_map.empty()) { | 60 | 0 | return false; | 61 | 27 | }; | 62 | | | 63 | 27 | size_t used_buckets = 1; | 64 | 27 | size_t current_bucket_size = 0; | 65 | | | 66 | 1.24k | for (const auto& [value, count] : value_map) { | 67 | 1.24k | current_bucket_size += count; | 68 | | | 69 | | // If adding the current value to the current bucket would exceed max_bucket_size, | 70 | | // then we start a new bucket. | 71 | 1.24k | if (current_bucket_size > max_bucket_size) { | 72 | 70 | ++used_buckets; | 73 | 70 | current_bucket_size = count; | 74 | 70 | } | 75 | | | 76 | | // If we have used more buckets than num_buckets, we cannot assign the values to buckets. | 77 | 1.24k | if (used_buckets > num_buckets) { | 78 | 11 | return false; | 79 | 11 | } | 80 | 1.24k | } | 81 | | | 82 | 16 | return true; | 83 | 27 | } |
_ZN5doris23can_assign_into_bucketsIsEEbRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEmm Line | Count | Source | 58 | 40 | const size_t num_buckets) { | 59 | 40 | if (value_map.empty()) { | 60 | 0 | return false; | 61 | 40 | }; | 62 | | | 63 | 40 | size_t used_buckets = 1; | 64 | 40 | size_t current_bucket_size = 0; | 65 | | | 66 | 1.34k | for (const auto& [value, count] : value_map) { | 67 | 1.34k | current_bucket_size += count; | 68 | | | 69 | | // If adding the current value to the current bucket would exceed max_bucket_size, | 70 | | // then we start a new bucket. | 71 | 1.34k | if (current_bucket_size > max_bucket_size) { | 72 | 88 | ++used_buckets; | 73 | 88 | current_bucket_size = count; | 74 | 88 | } | 75 | | | 76 | | // If we have used more buckets than num_buckets, we cannot assign the values to buckets. | 77 | 1.34k | if (used_buckets > num_buckets) { | 78 | 18 | return false; | 79 | 18 | } | 80 | 1.34k | } | 81 | | | 82 | 22 | return true; | 83 | 40 | } |
_ZN5doris23can_assign_into_bucketsIiEEbRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEmm Line | Count | Source | 58 | 69 | const size_t num_buckets) { | 59 | 69 | if (value_map.empty()) { | 60 | 1 | return false; | 61 | 68 | }; | 62 | | | 63 | 68 | size_t used_buckets = 1; | 64 | 68 | size_t current_bucket_size = 0; | 65 | | | 66 | 1.40k | for (const auto& [value, count] : value_map) { | 67 | 1.40k | current_bucket_size += count; | 68 | | | 69 | | // If adding the current value to the current bucket would exceed max_bucket_size, | 70 | | // then we start a new bucket. | 71 | 1.40k | if (current_bucket_size > max_bucket_size) { | 72 | 169 | ++used_buckets; | 73 | 169 | current_bucket_size = count; | 74 | 169 | } | 75 | | | 76 | | // If we have used more buckets than num_buckets, we cannot assign the values to buckets. | 77 | 1.40k | if (used_buckets > num_buckets) { | 78 | 30 | return false; | 79 | 30 | } | 80 | 1.40k | } | 81 | | | 82 | 38 | return true; | 83 | 68 | } |
_ZN5doris23can_assign_into_bucketsIlEEbRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEmm Line | Count | Source | 58 | 34 | const size_t num_buckets) { | 59 | 34 | if (value_map.empty()) { | 60 | 0 | return false; | 61 | 34 | }; | 62 | | | 63 | 34 | size_t used_buckets = 1; | 64 | 34 | size_t current_bucket_size = 0; | 65 | | | 66 | 1.29k | for (const auto& [value, count] : value_map) { | 67 | 1.29k | current_bucket_size += count; | 68 | | | 69 | | // If adding the current value to the current bucket would exceed max_bucket_size, | 70 | | // then we start a new bucket. | 71 | 1.29k | if (current_bucket_size > max_bucket_size) { | 72 | 90 | ++used_buckets; | 73 | 90 | current_bucket_size = count; | 74 | 90 | } | 75 | | | 76 | | // If we have used more buckets than num_buckets, we cannot assign the values to buckets. | 77 | 1.29k | if (used_buckets > num_buckets) { | 78 | 17 | return false; | 79 | 17 | } | 80 | 1.29k | } | 81 | | | 82 | 17 | return true; | 83 | 34 | } |
_ZN5doris23can_assign_into_bucketsInEEbRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEmm Line | Count | Source | 58 | 32 | const size_t num_buckets) { | 59 | 32 | if (value_map.empty()) { | 60 | 0 | return false; | 61 | 32 | }; | 62 | | | 63 | 32 | size_t used_buckets = 1; | 64 | 32 | size_t current_bucket_size = 0; | 65 | | | 66 | 1.28k | for (const auto& [value, count] : value_map) { | 67 | 1.28k | current_bucket_size += count; | 68 | | | 69 | | // If adding the current value to the current bucket would exceed max_bucket_size, | 70 | | // then we start a new bucket. | 71 | 1.28k | if (current_bucket_size > max_bucket_size) { | 72 | 96 | ++used_buckets; | 73 | 96 | current_bucket_size = count; | 74 | 96 | } | 75 | | | 76 | | // If we have used more buckets than num_buckets, we cannot assign the values to buckets. | 77 | 1.28k | if (used_buckets > num_buckets) { | 78 | 15 | return false; | 79 | 15 | } | 80 | 1.28k | } | 81 | | | 82 | 17 | return true; | 83 | 32 | } |
_ZN5doris23can_assign_into_bucketsIfEEbRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEmm Line | Count | Source | 58 | 31 | const size_t num_buckets) { | 59 | 31 | if (value_map.empty()) { | 60 | 0 | return false; | 61 | 31 | }; | 62 | | | 63 | 31 | size_t used_buckets = 1; | 64 | 31 | size_t current_bucket_size = 0; | 65 | | | 66 | 1.25k | for (const auto& [value, count] : value_map) { | 67 | 1.25k | current_bucket_size += count; | 68 | | | 69 | | // If adding the current value to the current bucket would exceed max_bucket_size, | 70 | | // then we start a new bucket. | 71 | 1.25k | if (current_bucket_size > max_bucket_size) { | 72 | 78 | ++used_buckets; | 73 | 78 | current_bucket_size = count; | 74 | 78 | } | 75 | | | 76 | | // If we have used more buckets than num_buckets, we cannot assign the values to buckets. | 77 | 1.25k | if (used_buckets > num_buckets) { | 78 | 15 | return false; | 79 | 15 | } | 80 | 1.25k | } | 81 | | | 82 | 16 | return true; | 83 | 31 | } |
_ZN5doris23can_assign_into_bucketsIdEEbRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEmm Line | Count | Source | 58 | 31 | const size_t num_buckets) { | 59 | 31 | if (value_map.empty()) { | 60 | 0 | return false; | 61 | 31 | }; | 62 | | | 63 | 31 | size_t used_buckets = 1; | 64 | 31 | size_t current_bucket_size = 0; | 65 | | | 66 | 1.25k | for (const auto& [value, count] : value_map) { | 67 | 1.25k | current_bucket_size += count; | 68 | | | 69 | | // If adding the current value to the current bucket would exceed max_bucket_size, | 70 | | // then we start a new bucket. | 71 | 1.25k | if (current_bucket_size > max_bucket_size) { | 72 | 78 | ++used_buckets; | 73 | 78 | current_bucket_size = count; | 74 | 78 | } | 75 | | | 76 | | // If we have used more buckets than num_buckets, we cannot assign the values to buckets. | 77 | 1.25k | if (used_buckets > num_buckets) { | 78 | 15 | return false; | 79 | 15 | } | 80 | 1.25k | } | 81 | | | 82 | 16 | return true; | 83 | 31 | } |
_ZN5doris23can_assign_into_bucketsINS_7DecimalIiEEEEbRKSt3mapIT_mSt4lessIS4_ESaISt4pairIKS4_mEEEmm Line | Count | Source | 58 | 39 | const size_t num_buckets) { | 59 | 39 | if (value_map.empty()) { | 60 | 0 | return false; | 61 | 39 | }; | 62 | | | 63 | 39 | size_t used_buckets = 1; | 64 | 39 | size_t current_bucket_size = 0; | 65 | | | 66 | 161 | for (const auto& [value, count] : value_map) { | 67 | 161 | current_bucket_size += count; | 68 | | | 69 | | // If adding the current value to the current bucket would exceed max_bucket_size, | 70 | | // then we start a new bucket. | 71 | 161 | if (current_bucket_size > max_bucket_size) { | 72 | 54 | ++used_buckets; | 73 | 54 | current_bucket_size = count; | 74 | 54 | } | 75 | | | 76 | | // If we have used more buckets than num_buckets, we cannot assign the values to buckets. | 77 | 161 | if (used_buckets > num_buckets) { | 78 | 17 | return false; | 79 | 17 | } | 80 | 161 | } | 81 | | | 82 | 22 | return true; | 83 | 39 | } |
Unexecuted instantiation: _ZN5doris23can_assign_into_bucketsINS_7DecimalIlEEEEbRKSt3mapIT_mSt4lessIS4_ESaISt4pairIKS4_mEEEmm Unexecuted instantiation: _ZN5doris23can_assign_into_bucketsINS_12Decimal128V3EEEbRKSt3mapIT_mSt4lessIS3_ESaISt4pairIKS3_mEEEmm Unexecuted instantiation: _ZN5doris23can_assign_into_bucketsINS_7DecimalIN4wide7integerILm256EiEEEEEEbRKSt3mapIT_mSt4lessIS7_ESaISt4pairIKS7_mEEEmm _ZN5doris23can_assign_into_bucketsINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEEbRKSt3mapIT_mSt4lessIS8_ESaISt4pairIKS8_mEEEmm Line | Count | Source | 58 | 100 | const size_t num_buckets) { | 59 | 100 | if (value_map.empty()) { | 60 | 0 | return false; | 61 | 100 | }; | 62 | | | 63 | 100 | size_t used_buckets = 1; | 64 | 100 | size_t current_bucket_size = 0; | 65 | | | 66 | 18.3k | for (const auto& [value, count] : value_map) { | 67 | 18.3k | current_bucket_size += count; | 68 | | | 69 | | // If adding the current value to the current bucket would exceed max_bucket_size, | 70 | | // then we start a new bucket. | 71 | 18.3k | if (current_bucket_size > max_bucket_size) { | 72 | 197 | ++used_buckets; | 73 | 197 | current_bucket_size = count; | 74 | 197 | } | 75 | | | 76 | | // If we have used more buckets than num_buckets, we cannot assign the values to buckets. | 77 | 18.3k | if (used_buckets > num_buckets) { | 78 | 38 | return false; | 79 | 38 | } | 80 | 18.3k | } | 81 | | | 82 | 62 | return true; | 83 | 100 | } |
_ZN5doris23can_assign_into_bucketsINS_11DateV2ValueINS_15DateV2ValueTypeEEEEEbRKSt3mapIT_mSt4lessIS5_ESaISt4pairIKS5_mEEEmm Line | Count | Source | 58 | 51 | const size_t num_buckets) { | 59 | 51 | if (value_map.empty()) { | 60 | 0 | return false; | 61 | 51 | }; | 62 | | | 63 | 51 | size_t used_buckets = 1; | 64 | 51 | size_t current_bucket_size = 0; | 65 | | | 66 | 1.31k | for (const auto& [value, count] : value_map) { | 67 | 1.31k | current_bucket_size += count; | 68 | | | 69 | | // If adding the current value to the current bucket would exceed max_bucket_size, | 70 | | // then we start a new bucket. | 71 | 1.31k | if (current_bucket_size > max_bucket_size) { | 72 | 114 | ++used_buckets; | 73 | 114 | current_bucket_size = count; | 74 | 114 | } | 75 | | | 76 | | // If we have used more buckets than num_buckets, we cannot assign the values to buckets. | 77 | 1.31k | if (used_buckets > num_buckets) { | 78 | 23 | return false; | 79 | 23 | } | 80 | 1.31k | } | 81 | | | 82 | 28 | return true; | 83 | 51 | } |
_ZN5doris23can_assign_into_bucketsINS_11DateV2ValueINS_19DateTimeV2ValueTypeEEEEEbRKSt3mapIT_mSt4lessIS5_ESaISt4pairIKS5_mEEEmm Line | Count | Source | 58 | 51 | const size_t num_buckets) { | 59 | 51 | if (value_map.empty()) { | 60 | 0 | return false; | 61 | 51 | }; | 62 | | | 63 | 51 | size_t used_buckets = 1; | 64 | 51 | size_t current_bucket_size = 0; | 65 | | | 66 | 1.31k | for (const auto& [value, count] : value_map) { | 67 | 1.31k | current_bucket_size += count; | 68 | | | 69 | | // If adding the current value to the current bucket would exceed max_bucket_size, | 70 | | // then we start a new bucket. | 71 | 1.31k | if (current_bucket_size > max_bucket_size) { | 72 | 115 | ++used_buckets; | 73 | 115 | current_bucket_size = count; | 74 | 115 | } | 75 | | | 76 | | // If we have used more buckets than num_buckets, we cannot assign the values to buckets. | 77 | 1.31k | if (used_buckets > num_buckets) { | 78 | 23 | return false; | 79 | 23 | } | 80 | 1.31k | } | 81 | | | 82 | 28 | return true; | 83 | 51 | } |
|
84 | | |
85 | | /** |
86 | | * Calculates the maximum number of values that can fit into each bucket given a set of values |
87 | | * and the desired number of buckets. |
88 | | * |
89 | | * @tparam T the type of the values in the value map |
90 | | * @param value_map the map of values and their counts |
91 | | * @param num_buckets the desired number of buckets |
92 | | * @return the maximum number of values that can fit into each bucket |
93 | | */ |
94 | | template <typename T> |
95 | 647 | size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) { |
96 | | // Ensure that the value map is not empty |
97 | 647 | assert(!value_map.empty()); |
98 | | |
99 | | // Calculate the total number of values in the map using std::accumulate() |
100 | 647 | size_t total_values = 0; |
101 | 5.76k | for (const auto& [value, count] : value_map) { |
102 | 5.76k | total_values += count; |
103 | 5.76k | } |
104 | | |
105 | | // If there is only one bucket, then all values will be assigned to that bucket |
106 | 647 | if (num_buckets == 1) { |
107 | 4 | return total_values; |
108 | 4 | } |
109 | | |
110 | | // To calculate the maximum value count in each bucket, we first calculate a conservative upper |
111 | | // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed |
112 | | // the actual maximum value count, but it does not underestimate it. The subsequent binary search |
113 | | // algorithm will approach the actual maximum value count. |
114 | 643 | size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1; |
115 | | |
116 | | // Initialize the lower bound to 0 |
117 | 643 | size_t lower_bucket_values = 0; |
118 | | |
119 | | // Perform a binary search to find the maximum number of values that can fit into each bucket |
120 | 643 | int search_step = 0; |
121 | 643 | const int max_search_steps = |
122 | 643 | 10; // Limit the number of search steps to avoid excessive iteration |
123 | | |
124 | 1.13k | while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) { |
125 | | // Calculate the midpoint of the upper and lower bounds |
126 | 491 | const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2; |
127 | | |
128 | | // Check if the given number of values can be assigned to the desired number of buckets |
129 | 491 | if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) { |
130 | | // If it can, then set the upper bound to the midpoint |
131 | 275 | upper_bucket_values = bucket_values; |
132 | 275 | } else { |
133 | | // If it can't, then set the lower bound to the midpoint |
134 | 216 | lower_bucket_values = bucket_values; |
135 | 216 | } |
136 | | // Increment the search step counter |
137 | 491 | ++search_step; |
138 | 491 | } |
139 | | |
140 | 643 | return upper_bucket_values; |
141 | 647 | } _ZN5doris27calculate_bucket_max_valuesIhEEmRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEm Line | Count | Source | 95 | 1 | size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) { | 96 | | // Ensure that the value map is not empty | 97 | 1 | assert(!value_map.empty()); | 98 | | | 99 | | // Calculate the total number of values in the map using std::accumulate() | 100 | 1 | size_t total_values = 0; | 101 | 2 | for (const auto& [value, count] : value_map) { | 102 | 2 | total_values += count; | 103 | 2 | } | 104 | | | 105 | | // If there is only one bucket, then all values will be assigned to that bucket | 106 | 1 | if (num_buckets == 1) { | 107 | 0 | return total_values; | 108 | 0 | } | 109 | | | 110 | | // To calculate the maximum value count in each bucket, we first calculate a conservative upper | 111 | | // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed | 112 | | // the actual maximum value count, but it does not underestimate it. The subsequent binary search | 113 | | // algorithm will approach the actual maximum value count. | 114 | 1 | size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1; | 115 | | | 116 | | // Initialize the lower bound to 0 | 117 | 1 | size_t lower_bucket_values = 0; | 118 | | | 119 | | // Perform a binary search to find the maximum number of values that can fit into each bucket | 120 | 1 | int search_step = 0; | 121 | 1 | const int max_search_steps = | 122 | 1 | 10; // Limit the number of search steps to avoid excessive iteration | 123 | | | 124 | 1 | while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) { | 125 | | // Calculate the midpoint of the upper and lower bounds | 126 | 0 | const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2; | 127 | | | 128 | | // Check if the given number of values can be assigned to the desired number of buckets | 129 | 0 | if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) { | 130 | | // If it can, then set the upper bound to the midpoint | 131 | 0 | upper_bucket_values = bucket_values; | 132 | 0 | } else { | 133 | | // If it can't, then set the lower bound to the midpoint | 134 | 0 | lower_bucket_values = bucket_values; | 135 | 0 | } | 136 | | // Increment the search step counter | 137 | 0 | ++search_step; | 138 | 0 | } | 139 | | | 140 | 1 | return upper_bucket_values; | 141 | 1 | } |
_ZN5doris27calculate_bucket_max_valuesIaEEmRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEm Line | Count | Source | 95 | 42 | size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) { | 96 | | // Ensure that the value map is not empty | 97 | 42 | assert(!value_map.empty()); | 98 | | | 99 | | // Calculate the total number of values in the map using std::accumulate() | 100 | 42 | size_t total_values = 0; | 101 | 340 | for (const auto& [value, count] : value_map) { | 102 | 340 | total_values += count; | 103 | 340 | } | 104 | | | 105 | | // If there is only one bucket, then all values will be assigned to that bucket | 106 | 42 | if (num_buckets == 1) { | 107 | 1 | return total_values; | 108 | 1 | } | 109 | | | 110 | | // To calculate the maximum value count in each bucket, we first calculate a conservative upper | 111 | | // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed | 112 | | // the actual maximum value count, but it does not underestimate it. The subsequent binary search | 113 | | // algorithm will approach the actual maximum value count. | 114 | 41 | size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1; | 115 | | | 116 | | // Initialize the lower bound to 0 | 117 | 41 | size_t lower_bucket_values = 0; | 118 | | | 119 | | // Perform a binary search to find the maximum number of values that can fit into each bucket | 120 | 41 | int search_step = 0; | 121 | 41 | const int max_search_steps = | 122 | 41 | 10; // Limit the number of search steps to avoid excessive iteration | 123 | | | 124 | 68 | while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) { | 125 | | // Calculate the midpoint of the upper and lower bounds | 126 | 27 | const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2; | 127 | | | 128 | | // Check if the given number of values can be assigned to the desired number of buckets | 129 | 27 | if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) { | 130 | | // If it can, then set the upper bound to the midpoint | 131 | 16 | upper_bucket_values = bucket_values; | 132 | 16 | } else { | 133 | | // If it can't, then set the lower bound to the midpoint | 134 | 11 | lower_bucket_values = bucket_values; | 135 | 11 | } | 136 | | // Increment the search step counter | 137 | 27 | ++search_step; | 138 | 27 | } | 139 | | | 140 | 41 | return upper_bucket_values; | 141 | 42 | } |
_ZN5doris27calculate_bucket_max_valuesIsEEmRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEm Line | Count | Source | 95 | 43 | size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) { | 96 | | // Ensure that the value map is not empty | 97 | 43 | assert(!value_map.empty()); | 98 | | | 99 | | // Calculate the total number of values in the map using std::accumulate() | 100 | 43 | size_t total_values = 0; | 101 | 349 | for (const auto& [value, count] : value_map) { | 102 | 349 | total_values += count; | 103 | 349 | } | 104 | | | 105 | | // If there is only one bucket, then all values will be assigned to that bucket | 106 | 43 | if (num_buckets == 1) { | 107 | 0 | return total_values; | 108 | 0 | } | 109 | | | 110 | | // To calculate the maximum value count in each bucket, we first calculate a conservative upper | 111 | | // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed | 112 | | // the actual maximum value count, but it does not underestimate it. The subsequent binary search | 113 | | // algorithm will approach the actual maximum value count. | 114 | 43 | size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1; | 115 | | | 116 | | // Initialize the lower bound to 0 | 117 | 43 | size_t lower_bucket_values = 0; | 118 | | | 119 | | // Perform a binary search to find the maximum number of values that can fit into each bucket | 120 | 43 | int search_step = 0; | 121 | 43 | const int max_search_steps = | 122 | 43 | 10; // Limit the number of search steps to avoid excessive iteration | 123 | | | 124 | 83 | while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) { | 125 | | // Calculate the midpoint of the upper and lower bounds | 126 | 40 | const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2; | 127 | | | 128 | | // Check if the given number of values can be assigned to the desired number of buckets | 129 | 40 | if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) { | 130 | | // If it can, then set the upper bound to the midpoint | 131 | 22 | upper_bucket_values = bucket_values; | 132 | 22 | } else { | 133 | | // If it can't, then set the lower bound to the midpoint | 134 | 18 | lower_bucket_values = bucket_values; | 135 | 18 | } | 136 | | // Increment the search step counter | 137 | 40 | ++search_step; | 138 | 40 | } | 139 | | | 140 | 43 | return upper_bucket_values; | 141 | 43 | } |
_ZN5doris27calculate_bucket_max_valuesIiEEmRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEm Line | Count | Source | 95 | 71 | size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) { | 96 | | // Ensure that the value map is not empty | 97 | 71 | assert(!value_map.empty()); | 98 | | | 99 | | // Calculate the total number of values in the map using std::accumulate() | 100 | 71 | size_t total_values = 0; | 101 | 375 | for (const auto& [value, count] : value_map) { | 102 | 375 | total_values += count; | 103 | 375 | } | 104 | | | 105 | | // If there is only one bucket, then all values will be assigned to that bucket | 106 | 71 | if (num_buckets == 1) { | 107 | 3 | return total_values; | 108 | 3 | } | 109 | | | 110 | | // To calculate the maximum value count in each bucket, we first calculate a conservative upper | 111 | | // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed | 112 | | // the actual maximum value count, but it does not underestimate it. The subsequent binary search | 113 | | // algorithm will approach the actual maximum value count. | 114 | 68 | size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1; | 115 | | | 116 | | // Initialize the lower bound to 0 | 117 | 68 | size_t lower_bucket_values = 0; | 118 | | | 119 | | // Perform a binary search to find the maximum number of values that can fit into each bucket | 120 | 68 | int search_step = 0; | 121 | 68 | const int max_search_steps = | 122 | 68 | 10; // Limit the number of search steps to avoid excessive iteration | 123 | | | 124 | 123 | while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) { | 125 | | // Calculate the midpoint of the upper and lower bounds | 126 | 55 | const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2; | 127 | | | 128 | | // Check if the given number of values can be assigned to the desired number of buckets | 129 | 55 | if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) { | 130 | | // If it can, then set the upper bound to the midpoint | 131 | 31 | upper_bucket_values = bucket_values; | 132 | 31 | } else { | 133 | | // If it can't, then set the lower bound to the midpoint | 134 | 24 | lower_bucket_values = bucket_values; | 135 | 24 | } | 136 | | // Increment the search step counter | 137 | 55 | ++search_step; | 138 | 55 | } | 139 | | | 140 | 68 | return upper_bucket_values; | 141 | 71 | } |
_ZN5doris27calculate_bucket_max_valuesIlEEmRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEm Line | Count | Source | 95 | 59 | size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) { | 96 | | // Ensure that the value map is not empty | 97 | 59 | assert(!value_map.empty()); | 98 | | | 99 | | // Calculate the total number of values in the map using std::accumulate() | 100 | 59 | size_t total_values = 0; | 101 | 418 | for (const auto& [value, count] : value_map) { | 102 | 418 | total_values += count; | 103 | 418 | } | 104 | | | 105 | | // If there is only one bucket, then all values will be assigned to that bucket | 106 | 59 | if (num_buckets == 1) { | 107 | 0 | return total_values; | 108 | 0 | } | 109 | | | 110 | | // To calculate the maximum value count in each bucket, we first calculate a conservative upper | 111 | | // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed | 112 | | // the actual maximum value count, but it does not underestimate it. The subsequent binary search | 113 | | // algorithm will approach the actual maximum value count. | 114 | 59 | size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1; | 115 | | | 116 | | // Initialize the lower bound to 0 | 117 | 59 | size_t lower_bucket_values = 0; | 118 | | | 119 | | // Perform a binary search to find the maximum number of values that can fit into each bucket | 120 | 59 | int search_step = 0; | 121 | 59 | const int max_search_steps = | 122 | 59 | 10; // Limit the number of search steps to avoid excessive iteration | 123 | | | 124 | 93 | while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) { | 125 | | // Calculate the midpoint of the upper and lower bounds | 126 | 34 | const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2; | 127 | | | 128 | | // Check if the given number of values can be assigned to the desired number of buckets | 129 | 34 | if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) { | 130 | | // If it can, then set the upper bound to the midpoint | 131 | 17 | upper_bucket_values = bucket_values; | 132 | 17 | } else { | 133 | | // If it can't, then set the lower bound to the midpoint | 134 | 17 | lower_bucket_values = bucket_values; | 135 | 17 | } | 136 | | // Increment the search step counter | 137 | 34 | ++search_step; | 138 | 34 | } | 139 | | | 140 | 59 | return upper_bucket_values; | 141 | 59 | } |
_ZN5doris27calculate_bucket_max_valuesInEEmRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEm Line | Count | Source | 95 | 42 | size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) { | 96 | | // Ensure that the value map is not empty | 97 | 42 | assert(!value_map.empty()); | 98 | | | 99 | | // Calculate the total number of values in the map using std::accumulate() | 100 | 42 | size_t total_values = 0; | 101 | 343 | for (const auto& [value, count] : value_map) { | 102 | 343 | total_values += count; | 103 | 343 | } | 104 | | | 105 | | // If there is only one bucket, then all values will be assigned to that bucket | 106 | 42 | if (num_buckets == 1) { | 107 | 0 | return total_values; | 108 | 0 | } | 109 | | | 110 | | // To calculate the maximum value count in each bucket, we first calculate a conservative upper | 111 | | // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed | 112 | | // the actual maximum value count, but it does not underestimate it. The subsequent binary search | 113 | | // algorithm will approach the actual maximum value count. | 114 | 42 | size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1; | 115 | | | 116 | | // Initialize the lower bound to 0 | 117 | 42 | size_t lower_bucket_values = 0; | 118 | | | 119 | | // Perform a binary search to find the maximum number of values that can fit into each bucket | 120 | 42 | int search_step = 0; | 121 | 42 | const int max_search_steps = | 122 | 42 | 10; // Limit the number of search steps to avoid excessive iteration | 123 | | | 124 | 74 | while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) { | 125 | | // Calculate the midpoint of the upper and lower bounds | 126 | 32 | const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2; | 127 | | | 128 | | // Check if the given number of values can be assigned to the desired number of buckets | 129 | 32 | if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) { | 130 | | // If it can, then set the upper bound to the midpoint | 131 | 17 | upper_bucket_values = bucket_values; | 132 | 17 | } else { | 133 | | // If it can't, then set the lower bound to the midpoint | 134 | 15 | lower_bucket_values = bucket_values; | 135 | 15 | } | 136 | | // Increment the search step counter | 137 | 32 | ++search_step; | 138 | 32 | } | 139 | | | 140 | 42 | return upper_bucket_values; | 141 | 42 | } |
_ZN5doris27calculate_bucket_max_valuesIfEEmRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEm Line | Count | Source | 95 | 41 | size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) { | 96 | | // Ensure that the value map is not empty | 97 | 41 | assert(!value_map.empty()); | 98 | | | 99 | | // Calculate the total number of values in the map using std::accumulate() | 100 | 41 | size_t total_values = 0; | 101 | 328 | for (const auto& [value, count] : value_map) { | 102 | 328 | total_values += count; | 103 | 328 | } | 104 | | | 105 | | // If there is only one bucket, then all values will be assigned to that bucket | 106 | 41 | if (num_buckets == 1) { | 107 | 0 | return total_values; | 108 | 0 | } | 109 | | | 110 | | // To calculate the maximum value count in each bucket, we first calculate a conservative upper | 111 | | // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed | 112 | | // the actual maximum value count, but it does not underestimate it. The subsequent binary search | 113 | | // algorithm will approach the actual maximum value count. | 114 | 41 | size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1; | 115 | | | 116 | | // Initialize the lower bound to 0 | 117 | 41 | size_t lower_bucket_values = 0; | 118 | | | 119 | | // Perform a binary search to find the maximum number of values that can fit into each bucket | 120 | 41 | int search_step = 0; | 121 | 41 | const int max_search_steps = | 122 | 41 | 10; // Limit the number of search steps to avoid excessive iteration | 123 | | | 124 | 72 | while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) { | 125 | | // Calculate the midpoint of the upper and lower bounds | 126 | 31 | const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2; | 127 | | | 128 | | // Check if the given number of values can be assigned to the desired number of buckets | 129 | 31 | if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) { | 130 | | // If it can, then set the upper bound to the midpoint | 131 | 16 | upper_bucket_values = bucket_values; | 132 | 16 | } else { | 133 | | // If it can't, then set the lower bound to the midpoint | 134 | 15 | lower_bucket_values = bucket_values; | 135 | 15 | } | 136 | | // Increment the search step counter | 137 | 31 | ++search_step; | 138 | 31 | } | 139 | | | 140 | 41 | return upper_bucket_values; | 141 | 41 | } |
_ZN5doris27calculate_bucket_max_valuesIdEEmRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEm Line | Count | Source | 95 | 41 | size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) { | 96 | | // Ensure that the value map is not empty | 97 | 41 | assert(!value_map.empty()); | 98 | | | 99 | | // Calculate the total number of values in the map using std::accumulate() | 100 | 41 | size_t total_values = 0; | 101 | 328 | for (const auto& [value, count] : value_map) { | 102 | 328 | total_values += count; | 103 | 328 | } | 104 | | | 105 | | // If there is only one bucket, then all values will be assigned to that bucket | 106 | 41 | if (num_buckets == 1) { | 107 | 0 | return total_values; | 108 | 0 | } | 109 | | | 110 | | // To calculate the maximum value count in each bucket, we first calculate a conservative upper | 111 | | // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed | 112 | | // the actual maximum value count, but it does not underestimate it. The subsequent binary search | 113 | | // algorithm will approach the actual maximum value count. | 114 | 41 | size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1; | 115 | | | 116 | | // Initialize the lower bound to 0 | 117 | 41 | size_t lower_bucket_values = 0; | 118 | | | 119 | | // Perform a binary search to find the maximum number of values that can fit into each bucket | 120 | 41 | int search_step = 0; | 121 | 41 | const int max_search_steps = | 122 | 41 | 10; // Limit the number of search steps to avoid excessive iteration | 123 | | | 124 | 72 | while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) { | 125 | | // Calculate the midpoint of the upper and lower bounds | 126 | 31 | const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2; | 127 | | | 128 | | // Check if the given number of values can be assigned to the desired number of buckets | 129 | 31 | if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) { | 130 | | // If it can, then set the upper bound to the midpoint | 131 | 16 | upper_bucket_values = bucket_values; | 132 | 16 | } else { | 133 | | // If it can't, then set the lower bound to the midpoint | 134 | 15 | lower_bucket_values = bucket_values; | 135 | 15 | } | 136 | | // Increment the search step counter | 137 | 31 | ++search_step; | 138 | 31 | } | 139 | | | 140 | 41 | return upper_bucket_values; | 141 | 41 | } |
_ZN5doris27calculate_bucket_max_valuesINS_7DecimalIiEEEEmRKSt3mapIT_mSt4lessIS4_ESaISt4pairIKS4_mEEEm Line | Count | Source | 95 | 45 | size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) { | 96 | | // Ensure that the value map is not empty | 97 | 45 | assert(!value_map.empty()); | 98 | | | 99 | | // Calculate the total number of values in the map using std::accumulate() | 100 | 45 | size_t total_values = 0; | 101 | 161 | for (const auto& [value, count] : value_map) { | 102 | 161 | total_values += count; | 103 | 161 | } | 104 | | | 105 | | // If there is only one bucket, then all values will be assigned to that bucket | 106 | 45 | if (num_buckets == 1) { | 107 | 0 | return total_values; | 108 | 0 | } | 109 | | | 110 | | // To calculate the maximum value count in each bucket, we first calculate a conservative upper | 111 | | // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed | 112 | | // the actual maximum value count, but it does not underestimate it. The subsequent binary search | 113 | | // algorithm will approach the actual maximum value count. | 114 | 45 | size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1; | 115 | | | 116 | | // Initialize the lower bound to 0 | 117 | 45 | size_t lower_bucket_values = 0; | 118 | | | 119 | | // Perform a binary search to find the maximum number of values that can fit into each bucket | 120 | 45 | int search_step = 0; | 121 | 45 | const int max_search_steps = | 122 | 45 | 10; // Limit the number of search steps to avoid excessive iteration | 123 | | | 124 | 84 | while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) { | 125 | | // Calculate the midpoint of the upper and lower bounds | 126 | 39 | const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2; | 127 | | | 128 | | // Check if the given number of values can be assigned to the desired number of buckets | 129 | 39 | if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) { | 130 | | // If it can, then set the upper bound to the midpoint | 131 | 22 | upper_bucket_values = bucket_values; | 132 | 22 | } else { | 133 | | // If it can't, then set the lower bound to the midpoint | 134 | 17 | lower_bucket_values = bucket_values; | 135 | 17 | } | 136 | | // Increment the search step counter | 137 | 39 | ++search_step; | 138 | 39 | } | 139 | | | 140 | 45 | return upper_bucket_values; | 141 | 45 | } |
Unexecuted instantiation: _ZN5doris27calculate_bucket_max_valuesINS_7DecimalIlEEEEmRKSt3mapIT_mSt4lessIS4_ESaISt4pairIKS4_mEEEm Unexecuted instantiation: _ZN5doris27calculate_bucket_max_valuesINS_12Decimal128V3EEEmRKSt3mapIT_mSt4lessIS3_ESaISt4pairIKS3_mEEEm Unexecuted instantiation: _ZN5doris27calculate_bucket_max_valuesINS_7DecimalIN4wide7integerILm256EiEEEEEEmRKSt3mapIT_mSt4lessIS7_ESaISt4pairIKS7_mEEEm _ZN5doris27calculate_bucket_max_valuesINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEEmRKSt3mapIT_mSt4lessIS8_ESaISt4pairIKS8_mEEEm Line | Count | Source | 95 | 100 | size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) { | 96 | | // Ensure that the value map is not empty | 97 | 100 | assert(!value_map.empty()); | 98 | | | 99 | | // Calculate the total number of values in the map using std::accumulate() | 100 | 100 | size_t total_values = 0; | 101 | 2.20k | for (const auto& [value, count] : value_map) { | 102 | 2.20k | total_values += count; | 103 | 2.20k | } | 104 | | | 105 | | // If there is only one bucket, then all values will be assigned to that bucket | 106 | 100 | if (num_buckets == 1) { | 107 | 0 | return total_values; | 108 | 0 | } | 109 | | | 110 | | // To calculate the maximum value count in each bucket, we first calculate a conservative upper | 111 | | // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed | 112 | | // the actual maximum value count, but it does not underestimate it. The subsequent binary search | 113 | | // algorithm will approach the actual maximum value count. | 114 | 100 | size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1; | 115 | | | 116 | | // Initialize the lower bound to 0 | 117 | 100 | size_t lower_bucket_values = 0; | 118 | | | 119 | | // Perform a binary search to find the maximum number of values that can fit into each bucket | 120 | 100 | int search_step = 0; | 121 | 100 | const int max_search_steps = | 122 | 100 | 10; // Limit the number of search steps to avoid excessive iteration | 123 | | | 124 | 200 | while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) { | 125 | | // Calculate the midpoint of the upper and lower bounds | 126 | 100 | const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2; | 127 | | | 128 | | // Check if the given number of values can be assigned to the desired number of buckets | 129 | 100 | if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) { | 130 | | // If it can, then set the upper bound to the midpoint | 131 | 62 | upper_bucket_values = bucket_values; | 132 | 62 | } else { | 133 | | // If it can't, then set the lower bound to the midpoint | 134 | 38 | lower_bucket_values = bucket_values; | 135 | 38 | } | 136 | | // Increment the search step counter | 137 | 100 | ++search_step; | 138 | 100 | } | 139 | | | 140 | 100 | return upper_bucket_values; | 141 | 100 | } |
_ZN5doris27calculate_bucket_max_valuesINS_11DateV2ValueINS_15DateV2ValueTypeEEEEEmRKSt3mapIT_mSt4lessIS5_ESaISt4pairIKS5_mEEEm Line | Count | Source | 95 | 81 | size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) { | 96 | | // Ensure that the value map is not empty | 97 | 81 | assert(!value_map.empty()); | 98 | | | 99 | | // Calculate the total number of values in the map using std::accumulate() | 100 | 81 | size_t total_values = 0; | 101 | 458 | for (const auto& [value, count] : value_map) { | 102 | 458 | total_values += count; | 103 | 458 | } | 104 | | | 105 | | // If there is only one bucket, then all values will be assigned to that bucket | 106 | 81 | if (num_buckets == 1) { | 107 | 0 | return total_values; | 108 | 0 | } | 109 | | | 110 | | // To calculate the maximum value count in each bucket, we first calculate a conservative upper | 111 | | // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed | 112 | | // the actual maximum value count, but it does not underestimate it. The subsequent binary search | 113 | | // algorithm will approach the actual maximum value count. | 114 | 81 | size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1; | 115 | | | 116 | | // Initialize the lower bound to 0 | 117 | 81 | size_t lower_bucket_values = 0; | 118 | | | 119 | | // Perform a binary search to find the maximum number of values that can fit into each bucket | 120 | 81 | int search_step = 0; | 121 | 81 | const int max_search_steps = | 122 | 81 | 10; // Limit the number of search steps to avoid excessive iteration | 123 | | | 124 | 132 | while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) { | 125 | | // Calculate the midpoint of the upper and lower bounds | 126 | 51 | const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2; | 127 | | | 128 | | // Check if the given number of values can be assigned to the desired number of buckets | 129 | 51 | if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) { | 130 | | // If it can, then set the upper bound to the midpoint | 131 | 28 | upper_bucket_values = bucket_values; | 132 | 28 | } else { | 133 | | // If it can't, then set the lower bound to the midpoint | 134 | 23 | lower_bucket_values = bucket_values; | 135 | 23 | } | 136 | | // Increment the search step counter | 137 | 51 | ++search_step; | 138 | 51 | } | 139 | | | 140 | 81 | return upper_bucket_values; | 141 | 81 | } |
_ZN5doris27calculate_bucket_max_valuesINS_11DateV2ValueINS_19DateTimeV2ValueTypeEEEEEmRKSt3mapIT_mSt4lessIS5_ESaISt4pairIKS5_mEEEm Line | Count | Source | 95 | 81 | size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) { | 96 | | // Ensure that the value map is not empty | 97 | 81 | assert(!value_map.empty()); | 98 | | | 99 | | // Calculate the total number of values in the map using std::accumulate() | 100 | 81 | size_t total_values = 0; | 101 | 457 | for (const auto& [value, count] : value_map) { | 102 | 457 | total_values += count; | 103 | 457 | } | 104 | | | 105 | | // If there is only one bucket, then all values will be assigned to that bucket | 106 | 81 | if (num_buckets == 1) { | 107 | 0 | return total_values; | 108 | 0 | } | 109 | | | 110 | | // To calculate the maximum value count in each bucket, we first calculate a conservative upper | 111 | | // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed | 112 | | // the actual maximum value count, but it does not underestimate it. The subsequent binary search | 113 | | // algorithm will approach the actual maximum value count. | 114 | 81 | size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1; | 115 | | | 116 | | // Initialize the lower bound to 0 | 117 | 81 | size_t lower_bucket_values = 0; | 118 | | | 119 | | // Perform a binary search to find the maximum number of values that can fit into each bucket | 120 | 81 | int search_step = 0; | 121 | 81 | const int max_search_steps = | 122 | 81 | 10; // Limit the number of search steps to avoid excessive iteration | 123 | | | 124 | 132 | while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) { | 125 | | // Calculate the midpoint of the upper and lower bounds | 126 | 51 | const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2; | 127 | | | 128 | | // Check if the given number of values can be assigned to the desired number of buckets | 129 | 51 | if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) { | 130 | | // If it can, then set the upper bound to the midpoint | 131 | 28 | upper_bucket_values = bucket_values; | 132 | 28 | } else { | 133 | | // If it can't, then set the lower bound to the midpoint | 134 | 23 | lower_bucket_values = bucket_values; | 135 | 23 | } | 136 | | // Increment the search step counter | 137 | 51 | ++search_step; | 138 | 51 | } | 139 | | | 140 | 81 | return upper_bucket_values; | 141 | 81 | } |
|
142 | | |
143 | | /** |
144 | | * Greedy equi-height histogram construction algorithm, inspired by the MySQL |
145 | | * equi_height implementation(https://dev.mysql.com/doc/dev/mysql-server/latest/equi__height_8h.html). |
146 | | * |
147 | | * Given an ordered collection of [value, count] pairs and a maximum bucket |
148 | | * size, construct a histogram by inserting values into a bucket while keeping |
149 | | * track of its size. If the insertion of a value into a non-empty bucket |
150 | | * causes the bucket to exceed the maximum size, create a new empty bucket and |
151 | | * continue. |
152 | | * |
153 | | * The algorithm guarantees a selectivity estimation error of at most ~2 * |
154 | | * #values / #buckets, often less. Values with a higher relative frequency are |
155 | | * guaranteed to be placed in singleton buckets. |
156 | | * |
157 | | * The minimum composite bucket size is used to minimize the worst case |
158 | | * selectivity estimation error. In general, the algorithm will adapt to the |
159 | | * data distribution to minimize the size of composite buckets. The heavy values |
160 | | * can be placed in singleton buckets and the remaining values will be evenly |
161 | | * spread across the remaining buckets, leading to a lower composite bucket size. |
162 | | * |
163 | | * Note: The term "value" refers to an entry in a column and the actual value |
164 | | * of an entry. The ordered_map is an ordered collection of [distinct value, |
165 | | * value count] pairs. For example, a Value_map<String> could contain the pairs ["a", 1], ["b", 2] |
166 | | * to represent one "a" value and two "b" values. |
167 | | * |
168 | | * @param buckets A vector of empty buckets that will be populated with data. |
169 | | * @param ordered_map An ordered map of distinct values and their counts. |
170 | | * @param max_num_buckets The maximum number of buckets that can be used. |
171 | | * |
172 | | * @return True if the buckets were successfully built, false otherwise. |
173 | | */ |
174 | | template <typename T> |
175 | | bool build_histogram(std::vector<Bucket<T>>& buckets, const std::map<T, size_t>& ordered_map, |
176 | 707 | const size_t max_num_buckets) { |
177 | | // If the input map is empty, there is nothing to build. |
178 | 707 | if (ordered_map.empty()) { |
179 | 66 | return false; |
180 | 66 | } |
181 | | |
182 | | // Calculate the maximum number of values that can be assigned to each bucket. |
183 | 641 | auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets); |
184 | | |
185 | | // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional |
186 | | // allocations when inserting buckets. |
187 | 641 | buckets.clear(); |
188 | 641 | buckets.reserve(max_num_buckets); |
189 | | |
190 | | // Initialize bucket variables. |
191 | 641 | size_t distinct_values_count = 0; |
192 | 641 | size_t values_count = 0; |
193 | 641 | size_t cumulative_values = 0; |
194 | | |
195 | | // Record how many values still need to be assigned. |
196 | 641 | auto remaining_distinct_values = ordered_map.size(); |
197 | | |
198 | 641 | auto it = ordered_map.begin(); |
199 | | |
200 | | // Lower value of the current bucket. |
201 | 641 | const T* lower_value = &it->first; |
202 | | |
203 | | // Iterate over the ordered map of distinct values and their counts. |
204 | 6.39k | for (; it != ordered_map.end(); ++it) { |
205 | 5.74k | const auto count = it->second; |
206 | 5.74k | const auto current_value = it->first; |
207 | | |
208 | | // Update the bucket counts and track the number of distinct values assigned. |
209 | 5.74k | distinct_values_count++; |
210 | 5.74k | remaining_distinct_values--; |
211 | 5.74k | values_count += count; |
212 | 5.74k | cumulative_values += count; |
213 | | |
214 | | // Check whether the current value should be added to the current bucket. |
215 | 5.74k | auto next = std::next(it); |
216 | 5.74k | size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1; |
217 | | |
218 | 5.74k | if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets && |
219 | 5.74k | values_count + next->second <= bucket_max_values) { |
220 | | // If the current value is the last in the input map and there are more remaining |
221 | | // distinct values than empty buckets and adding the value does not cause the bucket |
222 | | // to exceed its max size, skip adding the value to the current bucket. |
223 | 3.89k | continue; |
224 | 3.89k | } |
225 | | |
226 | | // Finalize the current bucket and add it to our collection of buckets. |
227 | 1.85k | auto pre_sum = cumulative_values - values_count; |
228 | | |
229 | 1.85k | Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count, |
230 | 1.85k | pre_sum); |
231 | 1.85k | buckets.push_back(new_bucket); |
232 | | |
233 | | // Reset variables for the next bucket. |
234 | 1.85k | if (next != ordered_map.end()) { |
235 | 1.21k | lower_value = &next->first; |
236 | 1.21k | } |
237 | 1.85k | values_count = 0; |
238 | 1.85k | distinct_values_count = 0; |
239 | 1.85k | } |
240 | | |
241 | 641 | return true; |
242 | 707 | } _ZN5doris15build_histogramIhEEbRSt6vectorINS_6BucketIT_EESaIS4_EERKSt3mapIS3_mSt4lessIS3_ESaISt4pairIKS3_mEEEm Line | Count | Source | 176 | 2 | const size_t max_num_buckets) { | 177 | | // If the input map is empty, there is nothing to build. | 178 | 2 | if (ordered_map.empty()) { | 179 | 1 | return false; | 180 | 1 | } | 181 | | | 182 | | // Calculate the maximum number of values that can be assigned to each bucket. | 183 | 1 | auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets); | 184 | | | 185 | | // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional | 186 | | // allocations when inserting buckets. | 187 | 1 | buckets.clear(); | 188 | 1 | buckets.reserve(max_num_buckets); | 189 | | | 190 | | // Initialize bucket variables. | 191 | 1 | size_t distinct_values_count = 0; | 192 | 1 | size_t values_count = 0; | 193 | 1 | size_t cumulative_values = 0; | 194 | | | 195 | | // Record how many values still need to be assigned. | 196 | 1 | auto remaining_distinct_values = ordered_map.size(); | 197 | | | 198 | 1 | auto it = ordered_map.begin(); | 199 | | | 200 | | // Lower value of the current bucket. | 201 | 1 | const T* lower_value = &it->first; | 202 | | | 203 | | // Iterate over the ordered map of distinct values and their counts. | 204 | 3 | for (; it != ordered_map.end(); ++it) { | 205 | 2 | const auto count = it->second; | 206 | 2 | const auto current_value = it->first; | 207 | | | 208 | | // Update the bucket counts and track the number of distinct values assigned. | 209 | 2 | distinct_values_count++; | 210 | 2 | remaining_distinct_values--; | 211 | 2 | values_count += count; | 212 | 2 | cumulative_values += count; | 213 | | | 214 | | // Check whether the current value should be added to the current bucket. | 215 | 2 | auto next = std::next(it); | 216 | 2 | size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1; | 217 | | | 218 | 2 | if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets && | 219 | 2 | values_count + next->second <= bucket_max_values) { | 220 | | // If the current value is the last in the input map and there are more remaining | 221 | | // distinct values than empty buckets and adding the value does not cause the bucket | 222 | | // to exceed its max size, skip adding the value to the current bucket. | 223 | 0 | continue; | 224 | 0 | } | 225 | | | 226 | | // Finalize the current bucket and add it to our collection of buckets. | 227 | 2 | auto pre_sum = cumulative_values - values_count; | 228 | | | 229 | 2 | Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count, | 230 | 2 | pre_sum); | 231 | 2 | buckets.push_back(new_bucket); | 232 | | | 233 | | // Reset variables for the next bucket. | 234 | 2 | if (next != ordered_map.end()) { | 235 | 1 | lower_value = &next->first; | 236 | 1 | } | 237 | 2 | values_count = 0; | 238 | 2 | distinct_values_count = 0; | 239 | 2 | } | 240 | | | 241 | 1 | return true; | 242 | 2 | } |
_ZN5doris15build_histogramIaEEbRSt6vectorINS_6BucketIT_EESaIS4_EERKSt3mapIS3_mSt4lessIS3_ESaISt4pairIKS3_mEEEm Line | Count | Source | 176 | 48 | const size_t max_num_buckets) { | 177 | | // If the input map is empty, there is nothing to build. | 178 | 48 | if (ordered_map.empty()) { | 179 | 6 | return false; | 180 | 6 | } | 181 | | | 182 | | // Calculate the maximum number of values that can be assigned to each bucket. | 183 | 42 | auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets); | 184 | | | 185 | | // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional | 186 | | // allocations when inserting buckets. | 187 | 42 | buckets.clear(); | 188 | 42 | buckets.reserve(max_num_buckets); | 189 | | | 190 | | // Initialize bucket variables. | 191 | 42 | size_t distinct_values_count = 0; | 192 | 42 | size_t values_count = 0; | 193 | 42 | size_t cumulative_values = 0; | 194 | | | 195 | | // Record how many values still need to be assigned. | 196 | 42 | auto remaining_distinct_values = ordered_map.size(); | 197 | | | 198 | 42 | auto it = ordered_map.begin(); | 199 | | | 200 | | // Lower value of the current bucket. | 201 | 42 | const T* lower_value = &it->first; | 202 | | | 203 | | // Iterate over the ordered map of distinct values and their counts. | 204 | 382 | for (; it != ordered_map.end(); ++it) { | 205 | 340 | const auto count = it->second; | 206 | 340 | const auto current_value = it->first; | 207 | | | 208 | | // Update the bucket counts and track the number of distinct values assigned. | 209 | 340 | distinct_values_count++; | 210 | 340 | remaining_distinct_values--; | 211 | 340 | values_count += count; | 212 | 340 | cumulative_values += count; | 213 | | | 214 | | // Check whether the current value should be added to the current bucket. | 215 | 340 | auto next = std::next(it); | 216 | 340 | size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1; | 217 | | | 218 | 340 | if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets && | 219 | 340 | values_count + next->second <= bucket_max_values) { | 220 | | // If the current value is the last in the input map and there are more remaining | 221 | | // distinct values than empty buckets and adding the value does not cause the bucket | 222 | | // to exceed its max size, skip adding the value to the current bucket. | 223 | 211 | continue; | 224 | 211 | } | 225 | | | 226 | | // Finalize the current bucket and add it to our collection of buckets. | 227 | 129 | auto pre_sum = cumulative_values - values_count; | 228 | | | 229 | 129 | Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count, | 230 | 129 | pre_sum); | 231 | 129 | buckets.push_back(new_bucket); | 232 | | | 233 | | // Reset variables for the next bucket. | 234 | 129 | if (next != ordered_map.end()) { | 235 | 87 | lower_value = &next->first; | 236 | 87 | } | 237 | 129 | values_count = 0; | 238 | 129 | distinct_values_count = 0; | 239 | 129 | } | 240 | | | 241 | 42 | return true; | 242 | 48 | } |
_ZN5doris15build_histogramIsEEbRSt6vectorINS_6BucketIT_EESaIS4_EERKSt3mapIS3_mSt4lessIS3_ESaISt4pairIKS3_mEEEm Line | Count | Source | 176 | 48 | const size_t max_num_buckets) { | 177 | | // If the input map is empty, there is nothing to build. | 178 | 48 | if (ordered_map.empty()) { | 179 | 5 | return false; | 180 | 5 | } | 181 | | | 182 | | // Calculate the maximum number of values that can be assigned to each bucket. | 183 | 43 | auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets); | 184 | | | 185 | | // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional | 186 | | // allocations when inserting buckets. | 187 | 43 | buckets.clear(); | 188 | 43 | buckets.reserve(max_num_buckets); | 189 | | | 190 | | // Initialize bucket variables. | 191 | 43 | size_t distinct_values_count = 0; | 192 | 43 | size_t values_count = 0; | 193 | 43 | size_t cumulative_values = 0; | 194 | | | 195 | | // Record how many values still need to be assigned. | 196 | 43 | auto remaining_distinct_values = ordered_map.size(); | 197 | | | 198 | 43 | auto it = ordered_map.begin(); | 199 | | | 200 | | // Lower value of the current bucket. | 201 | 43 | const T* lower_value = &it->first; | 202 | | | 203 | | // Iterate over the ordered map of distinct values and their counts. | 204 | 392 | for (; it != ordered_map.end(); ++it) { | 205 | 349 | const auto count = it->second; | 206 | 349 | const auto current_value = it->first; | 207 | | | 208 | | // Update the bucket counts and track the number of distinct values assigned. | 209 | 349 | distinct_values_count++; | 210 | 349 | remaining_distinct_values--; | 211 | 349 | values_count += count; | 212 | 349 | cumulative_values += count; | 213 | | | 214 | | // Check whether the current value should be added to the current bucket. | 215 | 349 | auto next = std::next(it); | 216 | 349 | size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1; | 217 | | | 218 | 349 | if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets && | 219 | 349 | values_count + next->second <= bucket_max_values) { | 220 | | // If the current value is the last in the input map and there are more remaining | 221 | | // distinct values than empty buckets and adding the value does not cause the bucket | 222 | | // to exceed its max size, skip adding the value to the current bucket. | 223 | 217 | continue; | 224 | 217 | } | 225 | | | 226 | | // Finalize the current bucket and add it to our collection of buckets. | 227 | 132 | auto pre_sum = cumulative_values - values_count; | 228 | | | 229 | 132 | Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count, | 230 | 132 | pre_sum); | 231 | 132 | buckets.push_back(new_bucket); | 232 | | | 233 | | // Reset variables for the next bucket. | 234 | 132 | if (next != ordered_map.end()) { | 235 | 89 | lower_value = &next->first; | 236 | 89 | } | 237 | 132 | values_count = 0; | 238 | 132 | distinct_values_count = 0; | 239 | 132 | } | 240 | | | 241 | 43 | return true; | 242 | 48 | } |
_ZN5doris15build_histogramIiEEbRSt6vectorINS_6BucketIT_EESaIS4_EERKSt3mapIS3_mSt4lessIS3_ESaISt4pairIKS3_mEEEm Line | Count | Source | 176 | 74 | const size_t max_num_buckets) { | 177 | | // If the input map is empty, there is nothing to build. | 178 | 74 | if (ordered_map.empty()) { | 179 | 9 | return false; | 180 | 9 | } | 181 | | | 182 | | // Calculate the maximum number of values that can be assigned to each bucket. | 183 | 65 | auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets); | 184 | | | 185 | | // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional | 186 | | // allocations when inserting buckets. | 187 | 65 | buckets.clear(); | 188 | 65 | buckets.reserve(max_num_buckets); | 189 | | | 190 | | // Initialize bucket variables. | 191 | 65 | size_t distinct_values_count = 0; | 192 | 65 | size_t values_count = 0; | 193 | 65 | size_t cumulative_values = 0; | 194 | | | 195 | | // Record how many values still need to be assigned. | 196 | 65 | auto remaining_distinct_values = ordered_map.size(); | 197 | | | 198 | 65 | auto it = ordered_map.begin(); | 199 | | | 200 | | // Lower value of the current bucket. | 201 | 65 | const T* lower_value = &it->first; | 202 | | | 203 | | // Iterate over the ordered map of distinct values and their counts. | 204 | 428 | for (; it != ordered_map.end(); ++it) { | 205 | 363 | const auto count = it->second; | 206 | 363 | const auto current_value = it->first; | 207 | | | 208 | | // Update the bucket counts and track the number of distinct values assigned. | 209 | 363 | distinct_values_count++; | 210 | 363 | remaining_distinct_values--; | 211 | 363 | values_count += count; | 212 | 363 | cumulative_values += count; | 213 | | | 214 | | // Check whether the current value should be added to the current bucket. | 215 | 363 | auto next = std::next(it); | 216 | 363 | size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1; | 217 | | | 218 | 363 | if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets && | 219 | 363 | values_count + next->second <= bucket_max_values) { | 220 | | // If the current value is the last in the input map and there are more remaining | 221 | | // distinct values than empty buckets and adding the value does not cause the bucket | 222 | | // to exceed its max size, skip adding the value to the current bucket. | 223 | 202 | continue; | 224 | 202 | } | 225 | | | 226 | | // Finalize the current bucket and add it to our collection of buckets. | 227 | 161 | auto pre_sum = cumulative_values - values_count; | 228 | | | 229 | 161 | Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count, | 230 | 161 | pre_sum); | 231 | 161 | buckets.push_back(new_bucket); | 232 | | | 233 | | // Reset variables for the next bucket. | 234 | 161 | if (next != ordered_map.end()) { | 235 | 96 | lower_value = &next->first; | 236 | 96 | } | 237 | 161 | values_count = 0; | 238 | 161 | distinct_values_count = 0; | 239 | 161 | } | 240 | | | 241 | 65 | return true; | 242 | 74 | } |
_ZN5doris15build_histogramIlEEbRSt6vectorINS_6BucketIT_EESaIS4_EERKSt3mapIS3_mSt4lessIS3_ESaISt4pairIKS3_mEEEm Line | Count | Source | 176 | 65 | const size_t max_num_buckets) { | 177 | | // If the input map is empty, there is nothing to build. | 178 | 65 | if (ordered_map.empty()) { | 179 | 6 | return false; | 180 | 6 | } | 181 | | | 182 | | // Calculate the maximum number of values that can be assigned to each bucket. | 183 | 59 | auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets); | 184 | | | 185 | | // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional | 186 | | // allocations when inserting buckets. | 187 | 59 | buckets.clear(); | 188 | 59 | buckets.reserve(max_num_buckets); | 189 | | | 190 | | // Initialize bucket variables. | 191 | 59 | size_t distinct_values_count = 0; | 192 | 59 | size_t values_count = 0; | 193 | 59 | size_t cumulative_values = 0; | 194 | | | 195 | | // Record how many values still need to be assigned. | 196 | 59 | auto remaining_distinct_values = ordered_map.size(); | 197 | | | 198 | 59 | auto it = ordered_map.begin(); | 199 | | | 200 | | // Lower value of the current bucket. | 201 | 59 | const T* lower_value = &it->first; | 202 | | | 203 | | // Iterate over the ordered map of distinct values and their counts. | 204 | 477 | for (; it != ordered_map.end(); ++it) { | 205 | 418 | const auto count = it->second; | 206 | 418 | const auto current_value = it->first; | 207 | | | 208 | | // Update the bucket counts and track the number of distinct values assigned. | 209 | 418 | distinct_values_count++; | 210 | 418 | remaining_distinct_values--; | 211 | 418 | values_count += count; | 212 | 418 | cumulative_values += count; | 213 | | | 214 | | // Check whether the current value should be added to the current bucket. | 215 | 418 | auto next = std::next(it); | 216 | 418 | size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1; | 217 | | | 218 | 418 | if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets && | 219 | 418 | values_count + next->second <= bucket_max_values) { | 220 | | // If the current value is the last in the input map and there are more remaining | 221 | | // distinct values than empty buckets and adding the value does not cause the bucket | 222 | | // to exceed its max size, skip adding the value to the current bucket. | 223 | 206 | continue; | 224 | 206 | } | 225 | | | 226 | | // Finalize the current bucket and add it to our collection of buckets. | 227 | 212 | auto pre_sum = cumulative_values - values_count; | 228 | | | 229 | 212 | Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count, | 230 | 212 | pre_sum); | 231 | 212 | buckets.push_back(new_bucket); | 232 | | | 233 | | // Reset variables for the next bucket. | 234 | 212 | if (next != ordered_map.end()) { | 235 | 153 | lower_value = &next->first; | 236 | 153 | } | 237 | 212 | values_count = 0; | 238 | 212 | distinct_values_count = 0; | 239 | 212 | } | 240 | | | 241 | 59 | return true; | 242 | 65 | } |
_ZN5doris15build_histogramInEEbRSt6vectorINS_6BucketIT_EESaIS4_EERKSt3mapIS3_mSt4lessIS3_ESaISt4pairIKS3_mEEEm Line | Count | Source | 176 | 48 | const size_t max_num_buckets) { | 177 | | // If the input map is empty, there is nothing to build. | 178 | 48 | if (ordered_map.empty()) { | 179 | 6 | return false; | 180 | 6 | } | 181 | | | 182 | | // Calculate the maximum number of values that can be assigned to each bucket. | 183 | 42 | auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets); | 184 | | | 185 | | // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional | 186 | | // allocations when inserting buckets. | 187 | 42 | buckets.clear(); | 188 | 42 | buckets.reserve(max_num_buckets); | 189 | | | 190 | | // Initialize bucket variables. | 191 | 42 | size_t distinct_values_count = 0; | 192 | 42 | size_t values_count = 0; | 193 | 42 | size_t cumulative_values = 0; | 194 | | | 195 | | // Record how many values still need to be assigned. | 196 | 42 | auto remaining_distinct_values = ordered_map.size(); | 197 | | | 198 | 42 | auto it = ordered_map.begin(); | 199 | | | 200 | | // Lower value of the current bucket. | 201 | 42 | const T* lower_value = &it->first; | 202 | | | 203 | | // Iterate over the ordered map of distinct values and their counts. | 204 | 385 | for (; it != ordered_map.end(); ++it) { | 205 | 343 | const auto count = it->second; | 206 | 343 | const auto current_value = it->first; | 207 | | | 208 | | // Update the bucket counts and track the number of distinct values assigned. | 209 | 343 | distinct_values_count++; | 210 | 343 | remaining_distinct_values--; | 211 | 343 | values_count += count; | 212 | 343 | cumulative_values += count; | 213 | | | 214 | | // Check whether the current value should be added to the current bucket. | 215 | 343 | auto next = std::next(it); | 216 | 343 | size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1; | 217 | | | 218 | 343 | if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets && | 219 | 343 | values_count + next->second <= bucket_max_values) { | 220 | | // If the current value is the last in the input map and there are more remaining | 221 | | // distinct values than empty buckets and adding the value does not cause the bucket | 222 | | // to exceed its max size, skip adding the value to the current bucket. | 223 | 202 | continue; | 224 | 202 | } | 225 | | | 226 | | // Finalize the current bucket and add it to our collection of buckets. | 227 | 141 | auto pre_sum = cumulative_values - values_count; | 228 | | | 229 | 141 | Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count, | 230 | 141 | pre_sum); | 231 | 141 | buckets.push_back(new_bucket); | 232 | | | 233 | | // Reset variables for the next bucket. | 234 | 141 | if (next != ordered_map.end()) { | 235 | 99 | lower_value = &next->first; | 236 | 99 | } | 237 | 141 | values_count = 0; | 238 | 141 | distinct_values_count = 0; | 239 | 141 | } | 240 | | | 241 | 42 | return true; | 242 | 48 | } |
_ZN5doris15build_histogramIfEEbRSt6vectorINS_6BucketIT_EESaIS4_EERKSt3mapIS3_mSt4lessIS3_ESaISt4pairIKS3_mEEEm Line | Count | Source | 176 | 46 | const size_t max_num_buckets) { | 177 | | // If the input map is empty, there is nothing to build. | 178 | 46 | if (ordered_map.empty()) { | 179 | 5 | return false; | 180 | 5 | } | 181 | | | 182 | | // Calculate the maximum number of values that can be assigned to each bucket. | 183 | 41 | auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets); | 184 | | | 185 | | // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional | 186 | | // allocations when inserting buckets. | 187 | 41 | buckets.clear(); | 188 | 41 | buckets.reserve(max_num_buckets); | 189 | | | 190 | | // Initialize bucket variables. | 191 | 41 | size_t distinct_values_count = 0; | 192 | 41 | size_t values_count = 0; | 193 | 41 | size_t cumulative_values = 0; | 194 | | | 195 | | // Record how many values still need to be assigned. | 196 | 41 | auto remaining_distinct_values = ordered_map.size(); | 197 | | | 198 | 41 | auto it = ordered_map.begin(); | 199 | | | 200 | | // Lower value of the current bucket. | 201 | 41 | const T* lower_value = &it->first; | 202 | | | 203 | | // Iterate over the ordered map of distinct values and their counts. | 204 | 369 | for (; it != ordered_map.end(); ++it) { | 205 | 328 | const auto count = it->second; | 206 | 328 | const auto current_value = it->first; | 207 | | | 208 | | // Update the bucket counts and track the number of distinct values assigned. | 209 | 328 | distinct_values_count++; | 210 | 328 | remaining_distinct_values--; | 211 | 328 | values_count += count; | 212 | 328 | cumulative_values += count; | 213 | | | 214 | | // Check whether the current value should be added to the current bucket. | 215 | 328 | auto next = std::next(it); | 216 | 328 | size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1; | 217 | | | 218 | 328 | if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets && | 219 | 328 | values_count + next->second <= bucket_max_values) { | 220 | | // If the current value is the last in the input map and there are more remaining | 221 | | // distinct values than empty buckets and adding the value does not cause the bucket | 222 | | // to exceed its max size, skip adding the value to the current bucket. | 223 | 200 | continue; | 224 | 200 | } | 225 | | | 226 | | // Finalize the current bucket and add it to our collection of buckets. | 227 | 128 | auto pre_sum = cumulative_values - values_count; | 228 | | | 229 | 128 | Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count, | 230 | 128 | pre_sum); | 231 | 128 | buckets.push_back(new_bucket); | 232 | | | 233 | | // Reset variables for the next bucket. | 234 | 128 | if (next != ordered_map.end()) { | 235 | 87 | lower_value = &next->first; | 236 | 87 | } | 237 | 128 | values_count = 0; | 238 | 128 | distinct_values_count = 0; | 239 | 128 | } | 240 | | | 241 | 41 | return true; | 242 | 46 | } |
_ZN5doris15build_histogramIdEEbRSt6vectorINS_6BucketIT_EESaIS4_EERKSt3mapIS3_mSt4lessIS3_ESaISt4pairIKS3_mEEEm Line | Count | Source | 176 | 46 | const size_t max_num_buckets) { | 177 | | // If the input map is empty, there is nothing to build. | 178 | 46 | if (ordered_map.empty()) { | 179 | 5 | return false; | 180 | 5 | } | 181 | | | 182 | | // Calculate the maximum number of values that can be assigned to each bucket. | 183 | 41 | auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets); | 184 | | | 185 | | // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional | 186 | | // allocations when inserting buckets. | 187 | 41 | buckets.clear(); | 188 | 41 | buckets.reserve(max_num_buckets); | 189 | | | 190 | | // Initialize bucket variables. | 191 | 41 | size_t distinct_values_count = 0; | 192 | 41 | size_t values_count = 0; | 193 | 41 | size_t cumulative_values = 0; | 194 | | | 195 | | // Record how many values still need to be assigned. | 196 | 41 | auto remaining_distinct_values = ordered_map.size(); | 197 | | | 198 | 41 | auto it = ordered_map.begin(); | 199 | | | 200 | | // Lower value of the current bucket. | 201 | 41 | const T* lower_value = &it->first; | 202 | | | 203 | | // Iterate over the ordered map of distinct values and their counts. | 204 | 369 | for (; it != ordered_map.end(); ++it) { | 205 | 328 | const auto count = it->second; | 206 | 328 | const auto current_value = it->first; | 207 | | | 208 | | // Update the bucket counts and track the number of distinct values assigned. | 209 | 328 | distinct_values_count++; | 210 | 328 | remaining_distinct_values--; | 211 | 328 | values_count += count; | 212 | 328 | cumulative_values += count; | 213 | | | 214 | | // Check whether the current value should be added to the current bucket. | 215 | 328 | auto next = std::next(it); | 216 | 328 | size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1; | 217 | | | 218 | 328 | if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets && | 219 | 328 | values_count + next->second <= bucket_max_values) { | 220 | | // If the current value is the last in the input map and there are more remaining | 221 | | // distinct values than empty buckets and adding the value does not cause the bucket | 222 | | // to exceed its max size, skip adding the value to the current bucket. | 223 | 200 | continue; | 224 | 200 | } | 225 | | | 226 | | // Finalize the current bucket and add it to our collection of buckets. | 227 | 128 | auto pre_sum = cumulative_values - values_count; | 228 | | | 229 | 128 | Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count, | 230 | 128 | pre_sum); | 231 | 128 | buckets.push_back(new_bucket); | 232 | | | 233 | | // Reset variables for the next bucket. | 234 | 128 | if (next != ordered_map.end()) { | 235 | 87 | lower_value = &next->first; | 236 | 87 | } | 237 | 128 | values_count = 0; | 238 | 128 | distinct_values_count = 0; | 239 | 128 | } | 240 | | | 241 | 41 | return true; | 242 | 46 | } |
_ZN5doris15build_histogramINS_7DecimalIiEEEEbRSt6vectorINS_6BucketIT_EESaIS6_EERKSt3mapIS5_mSt4lessIS5_ESaISt4pairIKS5_mEEEm Line | Count | Source | 176 | 49 | const size_t max_num_buckets) { | 177 | | // If the input map is empty, there is nothing to build. | 178 | 49 | if (ordered_map.empty()) { | 179 | 4 | return false; | 180 | 4 | } | 181 | | | 182 | | // Calculate the maximum number of values that can be assigned to each bucket. | 183 | 45 | auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets); | 184 | | | 185 | | // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional | 186 | | // allocations when inserting buckets. | 187 | 45 | buckets.clear(); | 188 | 45 | buckets.reserve(max_num_buckets); | 189 | | | 190 | | // Initialize bucket variables. | 191 | 45 | size_t distinct_values_count = 0; | 192 | 45 | size_t values_count = 0; | 193 | 45 | size_t cumulative_values = 0; | 194 | | | 195 | | // Record how many values still need to be assigned. | 196 | 45 | auto remaining_distinct_values = ordered_map.size(); | 197 | | | 198 | 45 | auto it = ordered_map.begin(); | 199 | | | 200 | | // Lower value of the current bucket. | 201 | 45 | const T* lower_value = &it->first; | 202 | | | 203 | | // Iterate over the ordered map of distinct values and their counts. | 204 | 206 | for (; it != ordered_map.end(); ++it) { | 205 | 161 | const auto count = it->second; | 206 | 161 | const auto current_value = it->first; | 207 | | | 208 | | // Update the bucket counts and track the number of distinct values assigned. | 209 | 161 | distinct_values_count++; | 210 | 161 | remaining_distinct_values--; | 211 | 161 | values_count += count; | 212 | 161 | cumulative_values += count; | 213 | | | 214 | | // Check whether the current value should be added to the current bucket. | 215 | 161 | auto next = std::next(it); | 216 | 161 | size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1; | 217 | | | 218 | 161 | if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets && | 219 | 161 | values_count + next->second <= bucket_max_values) { | 220 | | // If the current value is the last in the input map and there are more remaining | 221 | | // distinct values than empty buckets and adding the value does not cause the bucket | 222 | | // to exceed its max size, skip adding the value to the current bucket. | 223 | 28 | continue; | 224 | 28 | } | 225 | | | 226 | | // Finalize the current bucket and add it to our collection of buckets. | 227 | 133 | auto pre_sum = cumulative_values - values_count; | 228 | | | 229 | 133 | Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count, | 230 | 133 | pre_sum); | 231 | 133 | buckets.push_back(new_bucket); | 232 | | | 233 | | // Reset variables for the next bucket. | 234 | 133 | if (next != ordered_map.end()) { | 235 | 88 | lower_value = &next->first; | 236 | 88 | } | 237 | 133 | values_count = 0; | 238 | 133 | distinct_values_count = 0; | 239 | 133 | } | 240 | | | 241 | 45 | return true; | 242 | 49 | } |
Unexecuted instantiation: _ZN5doris15build_histogramINS_7DecimalIlEEEEbRSt6vectorINS_6BucketIT_EESaIS6_EERKSt3mapIS5_mSt4lessIS5_ESaISt4pairIKS5_mEEEm Unexecuted instantiation: _ZN5doris15build_histogramINS_12Decimal128V3EEEbRSt6vectorINS_6BucketIT_EESaIS5_EERKSt3mapIS4_mSt4lessIS4_ESaISt4pairIKS4_mEEEm Unexecuted instantiation: _ZN5doris15build_histogramINS_7DecimalIN4wide7integerILm256EiEEEEEEbRSt6vectorINS_6BucketIT_EESaIS9_EERKSt3mapIS8_mSt4lessIS8_ESaISt4pairIKS8_mEEEm _ZN5doris15build_histogramINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEEbRSt6vectorINS_6BucketIT_EESaISA_EERKSt3mapIS9_mSt4lessIS9_ESaISt4pairIKS9_mEEEm Line | Count | Source | 176 | 105 | const size_t max_num_buckets) { | 177 | | // If the input map is empty, there is nothing to build. | 178 | 105 | if (ordered_map.empty()) { | 179 | 5 | return false; | 180 | 5 | } | 181 | | | 182 | | // Calculate the maximum number of values that can be assigned to each bucket. | 183 | 100 | auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets); | 184 | | | 185 | | // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional | 186 | | // allocations when inserting buckets. | 187 | 100 | buckets.clear(); | 188 | 100 | buckets.reserve(max_num_buckets); | 189 | | | 190 | | // Initialize bucket variables. | 191 | 100 | size_t distinct_values_count = 0; | 192 | 100 | size_t values_count = 0; | 193 | 100 | size_t cumulative_values = 0; | 194 | | | 195 | | // Record how many values still need to be assigned. | 196 | 100 | auto remaining_distinct_values = ordered_map.size(); | 197 | | | 198 | 100 | auto it = ordered_map.begin(); | 199 | | | 200 | | // Lower value of the current bucket. | 201 | 100 | const T* lower_value = &it->first; | 202 | | | 203 | | // Iterate over the ordered map of distinct values and their counts. | 204 | 2.30k | for (; it != ordered_map.end(); ++it) { | 205 | 2.20k | const auto count = it->second; | 206 | 2.20k | const auto current_value = it->first; | 207 | | | 208 | | // Update the bucket counts and track the number of distinct values assigned. | 209 | 2.20k | distinct_values_count++; | 210 | 2.20k | remaining_distinct_values--; | 211 | 2.20k | values_count += count; | 212 | 2.20k | cumulative_values += count; | 213 | | | 214 | | // Check whether the current value should be added to the current bucket. | 215 | 2.20k | auto next = std::next(it); | 216 | 2.20k | size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1; | 217 | | | 218 | 2.20k | if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets && | 219 | 2.20k | values_count + next->second <= bucket_max_values) { | 220 | | // If the current value is the last in the input map and there are more remaining | 221 | | // distinct values than empty buckets and adding the value does not cause the bucket | 222 | | // to exceed its max size, skip adding the value to the current bucket. | 223 | 2.02k | continue; | 224 | 2.02k | } | 225 | | | 226 | | // Finalize the current bucket and add it to our collection of buckets. | 227 | 178 | auto pre_sum = cumulative_values - values_count; | 228 | | | 229 | 178 | Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count, | 230 | 178 | pre_sum); | 231 | 178 | buckets.push_back(new_bucket); | 232 | | | 233 | | // Reset variables for the next bucket. | 234 | 178 | if (next != ordered_map.end()) { | 235 | 78 | lower_value = &next->first; | 236 | 78 | } | 237 | 178 | values_count = 0; | 238 | 178 | distinct_values_count = 0; | 239 | 178 | } | 240 | | | 241 | 100 | return true; | 242 | 105 | } |
_ZN5doris15build_histogramINS_11DateV2ValueINS_15DateV2ValueTypeEEEEEbRSt6vectorINS_6BucketIT_EESaIS7_EERKSt3mapIS6_mSt4lessIS6_ESaISt4pairIKS6_mEEEm Line | Count | Source | 176 | 88 | const size_t max_num_buckets) { | 177 | | // If the input map is empty, there is nothing to build. | 178 | 88 | if (ordered_map.empty()) { | 179 | 7 | return false; | 180 | 7 | } | 181 | | | 182 | | // Calculate the maximum number of values that can be assigned to each bucket. | 183 | 81 | auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets); | 184 | | | 185 | | // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional | 186 | | // allocations when inserting buckets. | 187 | 81 | buckets.clear(); | 188 | 81 | buckets.reserve(max_num_buckets); | 189 | | | 190 | | // Initialize bucket variables. | 191 | 81 | size_t distinct_values_count = 0; | 192 | 81 | size_t values_count = 0; | 193 | 81 | size_t cumulative_values = 0; | 194 | | | 195 | | // Record how many values still need to be assigned. | 196 | 81 | auto remaining_distinct_values = ordered_map.size(); | 197 | | | 198 | 81 | auto it = ordered_map.begin(); | 199 | | | 200 | | // Lower value of the current bucket. | 201 | 81 | const T* lower_value = &it->first; | 202 | | | 203 | | // Iterate over the ordered map of distinct values and their counts. | 204 | 539 | for (; it != ordered_map.end(); ++it) { | 205 | 458 | const auto count = it->second; | 206 | 458 | const auto current_value = it->first; | 207 | | | 208 | | // Update the bucket counts and track the number of distinct values assigned. | 209 | 458 | distinct_values_count++; | 210 | 458 | remaining_distinct_values--; | 211 | 458 | values_count += count; | 212 | 458 | cumulative_values += count; | 213 | | | 214 | | // Check whether the current value should be added to the current bucket. | 215 | 458 | auto next = std::next(it); | 216 | 458 | size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1; | 217 | | | 218 | 458 | if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets && | 219 | 458 | values_count + next->second <= bucket_max_values) { | 220 | | // If the current value is the last in the input map and there are more remaining | 221 | | // distinct values than empty buckets and adding the value does not cause the bucket | 222 | | // to exceed its max size, skip adding the value to the current bucket. | 223 | 202 | continue; | 224 | 202 | } | 225 | | | 226 | | // Finalize the current bucket and add it to our collection of buckets. | 227 | 256 | auto pre_sum = cumulative_values - values_count; | 228 | | | 229 | 256 | Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count, | 230 | 256 | pre_sum); | 231 | 256 | buckets.push_back(new_bucket); | 232 | | | 233 | | // Reset variables for the next bucket. | 234 | 256 | if (next != ordered_map.end()) { | 235 | 175 | lower_value = &next->first; | 236 | 175 | } | 237 | 256 | values_count = 0; | 238 | 256 | distinct_values_count = 0; | 239 | 256 | } | 240 | | | 241 | 81 | return true; | 242 | 88 | } |
_ZN5doris15build_histogramINS_11DateV2ValueINS_19DateTimeV2ValueTypeEEEEEbRSt6vectorINS_6BucketIT_EESaIS7_EERKSt3mapIS6_mSt4lessIS6_ESaISt4pairIKS6_mEEEm Line | Count | Source | 176 | 88 | const size_t max_num_buckets) { | 177 | | // If the input map is empty, there is nothing to build. | 178 | 88 | if (ordered_map.empty()) { | 179 | 7 | return false; | 180 | 7 | } | 181 | | | 182 | | // Calculate the maximum number of values that can be assigned to each bucket. | 183 | 81 | auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets); | 184 | | | 185 | | // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional | 186 | | // allocations when inserting buckets. | 187 | 81 | buckets.clear(); | 188 | 81 | buckets.reserve(max_num_buckets); | 189 | | | 190 | | // Initialize bucket variables. | 191 | 81 | size_t distinct_values_count = 0; | 192 | 81 | size_t values_count = 0; | 193 | 81 | size_t cumulative_values = 0; | 194 | | | 195 | | // Record how many values still need to be assigned. | 196 | 81 | auto remaining_distinct_values = ordered_map.size(); | 197 | | | 198 | 81 | auto it = ordered_map.begin(); | 199 | | | 200 | | // Lower value of the current bucket. | 201 | 81 | const T* lower_value = &it->first; | 202 | | | 203 | | // Iterate over the ordered map of distinct values and their counts. | 204 | 538 | for (; it != ordered_map.end(); ++it) { | 205 | 457 | const auto count = it->second; | 206 | 457 | const auto current_value = it->first; | 207 | | | 208 | | // Update the bucket counts and track the number of distinct values assigned. | 209 | 457 | distinct_values_count++; | 210 | 457 | remaining_distinct_values--; | 211 | 457 | values_count += count; | 212 | 457 | cumulative_values += count; | 213 | | | 214 | | // Check whether the current value should be added to the current bucket. | 215 | 457 | auto next = std::next(it); | 216 | 457 | size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1; | 217 | | | 218 | 457 | if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets && | 219 | 457 | values_count + next->second <= bucket_max_values) { | 220 | | // If the current value is the last in the input map and there are more remaining | 221 | | // distinct values than empty buckets and adding the value does not cause the bucket | 222 | | // to exceed its max size, skip adding the value to the current bucket. | 223 | 202 | continue; | 224 | 202 | } | 225 | | | 226 | | // Finalize the current bucket and add it to our collection of buckets. | 227 | 255 | auto pre_sum = cumulative_values - values_count; | 228 | | | 229 | 255 | Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count, | 230 | 255 | pre_sum); | 231 | 255 | buckets.push_back(new_bucket); | 232 | | | 233 | | // Reset variables for the next bucket. | 234 | 255 | if (next != ordered_map.end()) { | 235 | 174 | lower_value = &next->first; | 236 | 174 | } | 237 | 255 | values_count = 0; | 238 | 255 | distinct_values_count = 0; | 239 | 255 | } | 240 | | | 241 | 81 | return true; | 242 | 88 | } |
|
243 | | |
244 | | template <typename T> |
245 | | bool histogram_to_json(rapidjson::StringBuffer& buffer, const std::vector<Bucket<T>>& buckets, |
246 | 702 | const DataTypePtr& data_type) { |
247 | 702 | rapidjson::Document doc; |
248 | 702 | doc.SetObject(); |
249 | 702 | rapidjson::Document::AllocatorType& allocator = doc.GetAllocator(); |
250 | | |
251 | 702 | int num_buckets = cast_set<int>(buckets.size()); |
252 | 702 | doc.AddMember("num_buckets", num_buckets, allocator); |
253 | | |
254 | 702 | rapidjson::Value bucket_arr(rapidjson::kArrayType); |
255 | 702 | bucket_arr.Reserve(num_buckets, allocator); |
256 | | |
257 | 702 | std::stringstream ss1; |
258 | 702 | std::stringstream ss2; |
259 | | |
260 | 702 | rapidjson::Value lower_val; |
261 | 702 | rapidjson::Value upper_val; |
262 | | |
263 | | // Convert bucket's lower and upper to 2 columns |
264 | 702 | MutableColumnPtr lower_column = data_type->create_column(); |
265 | 702 | MutableColumnPtr upper_column = data_type->create_column(); |
266 | 1.84k | for (const auto& bucket : buckets) { |
267 | | // String type is different, it has to pass in length |
268 | | // if it is string type , directly use string value |
269 | 1.84k | if constexpr (!std::is_same_v<T, std::string>) { |
270 | 1.66k | lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0); |
271 | 1.66k | upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0); |
272 | 1.66k | } |
273 | 1.84k | } |
274 | 702 | size_t row_num = 0; |
275 | | |
276 | 702 | auto format_options = DataTypeSerDe::get_default_format_options(); |
277 | 702 | auto time_zone = cctz::utc_time_zone(); |
278 | 702 | format_options.timezone = &time_zone; |
279 | | |
280 | 1.84k | for (const auto& bucket : buckets) { |
281 | 1.84k | if constexpr (std::is_same_v<T, std::string>) { |
282 | 178 | lower_val.SetString(bucket.lower.data(), |
283 | 178 | static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator); |
284 | 178 | upper_val.SetString(bucket.upper.data(), |
285 | 178 | static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator); |
286 | 1.66k | } else { |
287 | 1.66k | std::string lower_str = data_type->to_string(*lower_column, row_num, format_options); |
288 | 1.66k | std::string upper_str = data_type->to_string(*upper_column, row_num, format_options); |
289 | 1.66k | ++row_num; |
290 | 1.66k | lower_val.SetString(lower_str.data(), |
291 | 1.66k | static_cast<rapidjson::SizeType>(lower_str.size()), allocator); |
292 | 1.66k | upper_val.SetString(upper_str.data(), |
293 | 1.66k | static_cast<rapidjson::SizeType>(upper_str.size()), allocator); |
294 | 1.66k | } |
295 | 1.84k | rapidjson::Value bucket_json(rapidjson::kObjectType); |
296 | 1.84k | bucket_json.AddMember("lower", lower_val, allocator); |
297 | 1.84k | bucket_json.AddMember("upper", upper_val, allocator); |
298 | 1.84k | bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator); |
299 | 1.84k | bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator); |
300 | 1.84k | bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator); |
301 | | |
302 | 1.84k | bucket_arr.PushBack(bucket_json, allocator); |
303 | 1.84k | } |
304 | | |
305 | 702 | doc.AddMember("buckets", bucket_arr, allocator); |
306 | 702 | rapidjson::Writer<rapidjson::StringBuffer> writer(buffer); |
307 | 702 | doc.Accept(writer); |
308 | | |
309 | 702 | return !buckets.empty() && buffer.GetSize() > 0; |
310 | 702 | } _ZN5doris17histogram_to_jsonIhEEbRN9rapidjson19GenericStringBufferINS1_4UTF8IcEENS1_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISB_EERKSt10shared_ptrIKNS_9IDataTypeEE Line | Count | Source | 246 | 2 | const DataTypePtr& data_type) { | 247 | 2 | rapidjson::Document doc; | 248 | 2 | doc.SetObject(); | 249 | 2 | rapidjson::Document::AllocatorType& allocator = doc.GetAllocator(); | 250 | | | 251 | 2 | int num_buckets = cast_set<int>(buckets.size()); | 252 | 2 | doc.AddMember("num_buckets", num_buckets, allocator); | 253 | | | 254 | 2 | rapidjson::Value bucket_arr(rapidjson::kArrayType); | 255 | 2 | bucket_arr.Reserve(num_buckets, allocator); | 256 | | | 257 | 2 | std::stringstream ss1; | 258 | 2 | std::stringstream ss2; | 259 | | | 260 | 2 | rapidjson::Value lower_val; | 261 | 2 | rapidjson::Value upper_val; | 262 | | | 263 | | // Convert bucket's lower and upper to 2 columns | 264 | 2 | MutableColumnPtr lower_column = data_type->create_column(); | 265 | 2 | MutableColumnPtr upper_column = data_type->create_column(); | 266 | 2 | for (const auto& bucket : buckets) { | 267 | | // String type is different, it has to pass in length | 268 | | // if it is string type , directly use string value | 269 | 2 | if constexpr (!std::is_same_v<T, std::string>) { | 270 | 2 | lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0); | 271 | 2 | upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0); | 272 | 2 | } | 273 | 2 | } | 274 | 2 | size_t row_num = 0; | 275 | | | 276 | 2 | auto format_options = DataTypeSerDe::get_default_format_options(); | 277 | 2 | auto time_zone = cctz::utc_time_zone(); | 278 | 2 | format_options.timezone = &time_zone; | 279 | | | 280 | 2 | for (const auto& bucket : buckets) { | 281 | | if constexpr (std::is_same_v<T, std::string>) { | 282 | | lower_val.SetString(bucket.lower.data(), | 283 | | static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator); | 284 | | upper_val.SetString(bucket.upper.data(), | 285 | | static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator); | 286 | 2 | } else { | 287 | 2 | std::string lower_str = data_type->to_string(*lower_column, row_num, format_options); | 288 | 2 | std::string upper_str = data_type->to_string(*upper_column, row_num, format_options); | 289 | 2 | ++row_num; | 290 | 2 | lower_val.SetString(lower_str.data(), | 291 | 2 | static_cast<rapidjson::SizeType>(lower_str.size()), allocator); | 292 | 2 | upper_val.SetString(upper_str.data(), | 293 | 2 | static_cast<rapidjson::SizeType>(upper_str.size()), allocator); | 294 | 2 | } | 295 | 2 | rapidjson::Value bucket_json(rapidjson::kObjectType); | 296 | 2 | bucket_json.AddMember("lower", lower_val, allocator); | 297 | 2 | bucket_json.AddMember("upper", upper_val, allocator); | 298 | 2 | bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator); | 299 | 2 | bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator); | 300 | 2 | bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator); | 301 | | | 302 | 2 | bucket_arr.PushBack(bucket_json, allocator); | 303 | 2 | } | 304 | | | 305 | 2 | doc.AddMember("buckets", bucket_arr, allocator); | 306 | 2 | rapidjson::Writer<rapidjson::StringBuffer> writer(buffer); | 307 | 2 | doc.Accept(writer); | 308 | | | 309 | 2 | return !buckets.empty() && buffer.GetSize() > 0; | 310 | 2 | } |
_ZN5doris17histogram_to_jsonIaEEbRN9rapidjson19GenericStringBufferINS1_4UTF8IcEENS1_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISB_EERKSt10shared_ptrIKNS_9IDataTypeEE Line | Count | Source | 246 | 48 | const DataTypePtr& data_type) { | 247 | 48 | rapidjson::Document doc; | 248 | 48 | doc.SetObject(); | 249 | 48 | rapidjson::Document::AllocatorType& allocator = doc.GetAllocator(); | 250 | | | 251 | 48 | int num_buckets = cast_set<int>(buckets.size()); | 252 | 48 | doc.AddMember("num_buckets", num_buckets, allocator); | 253 | | | 254 | 48 | rapidjson::Value bucket_arr(rapidjson::kArrayType); | 255 | 48 | bucket_arr.Reserve(num_buckets, allocator); | 256 | | | 257 | 48 | std::stringstream ss1; | 258 | 48 | std::stringstream ss2; | 259 | | | 260 | 48 | rapidjson::Value lower_val; | 261 | 48 | rapidjson::Value upper_val; | 262 | | | 263 | | // Convert bucket's lower and upper to 2 columns | 264 | 48 | MutableColumnPtr lower_column = data_type->create_column(); | 265 | 48 | MutableColumnPtr upper_column = data_type->create_column(); | 266 | 129 | for (const auto& bucket : buckets) { | 267 | | // String type is different, it has to pass in length | 268 | | // if it is string type , directly use string value | 269 | 129 | if constexpr (!std::is_same_v<T, std::string>) { | 270 | 129 | lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0); | 271 | 129 | upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0); | 272 | 129 | } | 273 | 129 | } | 274 | 48 | size_t row_num = 0; | 275 | | | 276 | 48 | auto format_options = DataTypeSerDe::get_default_format_options(); | 277 | 48 | auto time_zone = cctz::utc_time_zone(); | 278 | 48 | format_options.timezone = &time_zone; | 279 | | | 280 | 129 | for (const auto& bucket : buckets) { | 281 | | if constexpr (std::is_same_v<T, std::string>) { | 282 | | lower_val.SetString(bucket.lower.data(), | 283 | | static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator); | 284 | | upper_val.SetString(bucket.upper.data(), | 285 | | static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator); | 286 | 129 | } else { | 287 | 129 | std::string lower_str = data_type->to_string(*lower_column, row_num, format_options); | 288 | 129 | std::string upper_str = data_type->to_string(*upper_column, row_num, format_options); | 289 | 129 | ++row_num; | 290 | 129 | lower_val.SetString(lower_str.data(), | 291 | 129 | static_cast<rapidjson::SizeType>(lower_str.size()), allocator); | 292 | 129 | upper_val.SetString(upper_str.data(), | 293 | 129 | static_cast<rapidjson::SizeType>(upper_str.size()), allocator); | 294 | 129 | } | 295 | 129 | rapidjson::Value bucket_json(rapidjson::kObjectType); | 296 | 129 | bucket_json.AddMember("lower", lower_val, allocator); | 297 | 129 | bucket_json.AddMember("upper", upper_val, allocator); | 298 | 129 | bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator); | 299 | 129 | bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator); | 300 | 129 | bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator); | 301 | | | 302 | 129 | bucket_arr.PushBack(bucket_json, allocator); | 303 | 129 | } | 304 | | | 305 | 48 | doc.AddMember("buckets", bucket_arr, allocator); | 306 | 48 | rapidjson::Writer<rapidjson::StringBuffer> writer(buffer); | 307 | 48 | doc.Accept(writer); | 308 | | | 309 | 48 | return !buckets.empty() && buffer.GetSize() > 0; | 310 | 48 | } |
_ZN5doris17histogram_to_jsonIsEEbRN9rapidjson19GenericStringBufferINS1_4UTF8IcEENS1_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISB_EERKSt10shared_ptrIKNS_9IDataTypeEE Line | Count | Source | 246 | 48 | const DataTypePtr& data_type) { | 247 | 48 | rapidjson::Document doc; | 248 | 48 | doc.SetObject(); | 249 | 48 | rapidjson::Document::AllocatorType& allocator = doc.GetAllocator(); | 250 | | | 251 | 48 | int num_buckets = cast_set<int>(buckets.size()); | 252 | 48 | doc.AddMember("num_buckets", num_buckets, allocator); | 253 | | | 254 | 48 | rapidjson::Value bucket_arr(rapidjson::kArrayType); | 255 | 48 | bucket_arr.Reserve(num_buckets, allocator); | 256 | | | 257 | 48 | std::stringstream ss1; | 258 | 48 | std::stringstream ss2; | 259 | | | 260 | 48 | rapidjson::Value lower_val; | 261 | 48 | rapidjson::Value upper_val; | 262 | | | 263 | | // Convert bucket's lower and upper to 2 columns | 264 | 48 | MutableColumnPtr lower_column = data_type->create_column(); | 265 | 48 | MutableColumnPtr upper_column = data_type->create_column(); | 266 | 132 | for (const auto& bucket : buckets) { | 267 | | // String type is different, it has to pass in length | 268 | | // if it is string type , directly use string value | 269 | 132 | if constexpr (!std::is_same_v<T, std::string>) { | 270 | 132 | lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0); | 271 | 132 | upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0); | 272 | 132 | } | 273 | 132 | } | 274 | 48 | size_t row_num = 0; | 275 | | | 276 | 48 | auto format_options = DataTypeSerDe::get_default_format_options(); | 277 | 48 | auto time_zone = cctz::utc_time_zone(); | 278 | 48 | format_options.timezone = &time_zone; | 279 | | | 280 | 132 | for (const auto& bucket : buckets) { | 281 | | if constexpr (std::is_same_v<T, std::string>) { | 282 | | lower_val.SetString(bucket.lower.data(), | 283 | | static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator); | 284 | | upper_val.SetString(bucket.upper.data(), | 285 | | static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator); | 286 | 132 | } else { | 287 | 132 | std::string lower_str = data_type->to_string(*lower_column, row_num, format_options); | 288 | 132 | std::string upper_str = data_type->to_string(*upper_column, row_num, format_options); | 289 | 132 | ++row_num; | 290 | 132 | lower_val.SetString(lower_str.data(), | 291 | 132 | static_cast<rapidjson::SizeType>(lower_str.size()), allocator); | 292 | 132 | upper_val.SetString(upper_str.data(), | 293 | 132 | static_cast<rapidjson::SizeType>(upper_str.size()), allocator); | 294 | 132 | } | 295 | 132 | rapidjson::Value bucket_json(rapidjson::kObjectType); | 296 | 132 | bucket_json.AddMember("lower", lower_val, allocator); | 297 | 132 | bucket_json.AddMember("upper", upper_val, allocator); | 298 | 132 | bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator); | 299 | 132 | bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator); | 300 | 132 | bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator); | 301 | | | 302 | 132 | bucket_arr.PushBack(bucket_json, allocator); | 303 | 132 | } | 304 | | | 305 | 48 | doc.AddMember("buckets", bucket_arr, allocator); | 306 | 48 | rapidjson::Writer<rapidjson::StringBuffer> writer(buffer); | 307 | 48 | doc.Accept(writer); | 308 | | | 309 | 48 | return !buckets.empty() && buffer.GetSize() > 0; | 310 | 48 | } |
_ZN5doris17histogram_to_jsonIiEEbRN9rapidjson19GenericStringBufferINS1_4UTF8IcEENS1_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISB_EERKSt10shared_ptrIKNS_9IDataTypeEE Line | Count | Source | 246 | 69 | const DataTypePtr& data_type) { | 247 | 69 | rapidjson::Document doc; | 248 | 69 | doc.SetObject(); | 249 | 69 | rapidjson::Document::AllocatorType& allocator = doc.GetAllocator(); | 250 | | | 251 | 69 | int num_buckets = cast_set<int>(buckets.size()); | 252 | 69 | doc.AddMember("num_buckets", num_buckets, allocator); | 253 | | | 254 | 69 | rapidjson::Value bucket_arr(rapidjson::kArrayType); | 255 | 69 | bucket_arr.Reserve(num_buckets, allocator); | 256 | | | 257 | 69 | std::stringstream ss1; | 258 | 69 | std::stringstream ss2; | 259 | | | 260 | 69 | rapidjson::Value lower_val; | 261 | 69 | rapidjson::Value upper_val; | 262 | | | 263 | | // Convert bucket's lower and upper to 2 columns | 264 | 69 | MutableColumnPtr lower_column = data_type->create_column(); | 265 | 69 | MutableColumnPtr upper_column = data_type->create_column(); | 266 | 149 | for (const auto& bucket : buckets) { | 267 | | // String type is different, it has to pass in length | 268 | | // if it is string type , directly use string value | 269 | 149 | if constexpr (!std::is_same_v<T, std::string>) { | 270 | 149 | lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0); | 271 | 149 | upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0); | 272 | 149 | } | 273 | 149 | } | 274 | 69 | size_t row_num = 0; | 275 | | | 276 | 69 | auto format_options = DataTypeSerDe::get_default_format_options(); | 277 | 69 | auto time_zone = cctz::utc_time_zone(); | 278 | 69 | format_options.timezone = &time_zone; | 279 | | | 280 | 149 | for (const auto& bucket : buckets) { | 281 | | if constexpr (std::is_same_v<T, std::string>) { | 282 | | lower_val.SetString(bucket.lower.data(), | 283 | | static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator); | 284 | | upper_val.SetString(bucket.upper.data(), | 285 | | static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator); | 286 | 149 | } else { | 287 | 149 | std::string lower_str = data_type->to_string(*lower_column, row_num, format_options); | 288 | 149 | std::string upper_str = data_type->to_string(*upper_column, row_num, format_options); | 289 | 149 | ++row_num; | 290 | 149 | lower_val.SetString(lower_str.data(), | 291 | 149 | static_cast<rapidjson::SizeType>(lower_str.size()), allocator); | 292 | 149 | upper_val.SetString(upper_str.data(), | 293 | 149 | static_cast<rapidjson::SizeType>(upper_str.size()), allocator); | 294 | 149 | } | 295 | 149 | rapidjson::Value bucket_json(rapidjson::kObjectType); | 296 | 149 | bucket_json.AddMember("lower", lower_val, allocator); | 297 | 149 | bucket_json.AddMember("upper", upper_val, allocator); | 298 | 149 | bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator); | 299 | 149 | bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator); | 300 | 149 | bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator); | 301 | | | 302 | 149 | bucket_arr.PushBack(bucket_json, allocator); | 303 | 149 | } | 304 | | | 305 | 69 | doc.AddMember("buckets", bucket_arr, allocator); | 306 | 69 | rapidjson::Writer<rapidjson::StringBuffer> writer(buffer); | 307 | 69 | doc.Accept(writer); | 308 | | | 309 | 69 | return !buckets.empty() && buffer.GetSize() > 0; | 310 | 69 | } |
_ZN5doris17histogram_to_jsonIlEEbRN9rapidjson19GenericStringBufferINS1_4UTF8IcEENS1_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISB_EERKSt10shared_ptrIKNS_9IDataTypeEE Line | Count | Source | 246 | 65 | const DataTypePtr& data_type) { | 247 | 65 | rapidjson::Document doc; | 248 | 65 | doc.SetObject(); | 249 | 65 | rapidjson::Document::AllocatorType& allocator = doc.GetAllocator(); | 250 | | | 251 | 65 | int num_buckets = cast_set<int>(buckets.size()); | 252 | 65 | doc.AddMember("num_buckets", num_buckets, allocator); | 253 | | | 254 | 65 | rapidjson::Value bucket_arr(rapidjson::kArrayType); | 255 | 65 | bucket_arr.Reserve(num_buckets, allocator); | 256 | | | 257 | 65 | std::stringstream ss1; | 258 | 65 | std::stringstream ss2; | 259 | | | 260 | 65 | rapidjson::Value lower_val; | 261 | 65 | rapidjson::Value upper_val; | 262 | | | 263 | | // Convert bucket's lower and upper to 2 columns | 264 | 65 | MutableColumnPtr lower_column = data_type->create_column(); | 265 | 65 | MutableColumnPtr upper_column = data_type->create_column(); | 266 | 212 | for (const auto& bucket : buckets) { | 267 | | // String type is different, it has to pass in length | 268 | | // if it is string type , directly use string value | 269 | 212 | if constexpr (!std::is_same_v<T, std::string>) { | 270 | 212 | lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0); | 271 | 212 | upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0); | 272 | 212 | } | 273 | 212 | } | 274 | 65 | size_t row_num = 0; | 275 | | | 276 | 65 | auto format_options = DataTypeSerDe::get_default_format_options(); | 277 | 65 | auto time_zone = cctz::utc_time_zone(); | 278 | 65 | format_options.timezone = &time_zone; | 279 | | | 280 | 212 | for (const auto& bucket : buckets) { | 281 | | if constexpr (std::is_same_v<T, std::string>) { | 282 | | lower_val.SetString(bucket.lower.data(), | 283 | | static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator); | 284 | | upper_val.SetString(bucket.upper.data(), | 285 | | static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator); | 286 | 212 | } else { | 287 | 212 | std::string lower_str = data_type->to_string(*lower_column, row_num, format_options); | 288 | 212 | std::string upper_str = data_type->to_string(*upper_column, row_num, format_options); | 289 | 212 | ++row_num; | 290 | 212 | lower_val.SetString(lower_str.data(), | 291 | 212 | static_cast<rapidjson::SizeType>(lower_str.size()), allocator); | 292 | 212 | upper_val.SetString(upper_str.data(), | 293 | 212 | static_cast<rapidjson::SizeType>(upper_str.size()), allocator); | 294 | 212 | } | 295 | 212 | rapidjson::Value bucket_json(rapidjson::kObjectType); | 296 | 212 | bucket_json.AddMember("lower", lower_val, allocator); | 297 | 212 | bucket_json.AddMember("upper", upper_val, allocator); | 298 | 212 | bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator); | 299 | 212 | bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator); | 300 | 212 | bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator); | 301 | | | 302 | 212 | bucket_arr.PushBack(bucket_json, allocator); | 303 | 212 | } | 304 | | | 305 | 65 | doc.AddMember("buckets", bucket_arr, allocator); | 306 | 65 | rapidjson::Writer<rapidjson::StringBuffer> writer(buffer); | 307 | 65 | doc.Accept(writer); | 308 | | | 309 | 65 | return !buckets.empty() && buffer.GetSize() > 0; | 310 | 65 | } |
_ZN5doris17histogram_to_jsonInEEbRN9rapidjson19GenericStringBufferINS1_4UTF8IcEENS1_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISB_EERKSt10shared_ptrIKNS_9IDataTypeEE Line | Count | Source | 246 | 48 | const DataTypePtr& data_type) { | 247 | 48 | rapidjson::Document doc; | 248 | 48 | doc.SetObject(); | 249 | 48 | rapidjson::Document::AllocatorType& allocator = doc.GetAllocator(); | 250 | | | 251 | 48 | int num_buckets = cast_set<int>(buckets.size()); | 252 | 48 | doc.AddMember("num_buckets", num_buckets, allocator); | 253 | | | 254 | 48 | rapidjson::Value bucket_arr(rapidjson::kArrayType); | 255 | 48 | bucket_arr.Reserve(num_buckets, allocator); | 256 | | | 257 | 48 | std::stringstream ss1; | 258 | 48 | std::stringstream ss2; | 259 | | | 260 | 48 | rapidjson::Value lower_val; | 261 | 48 | rapidjson::Value upper_val; | 262 | | | 263 | | // Convert bucket's lower and upper to 2 columns | 264 | 48 | MutableColumnPtr lower_column = data_type->create_column(); | 265 | 48 | MutableColumnPtr upper_column = data_type->create_column(); | 266 | 141 | for (const auto& bucket : buckets) { | 267 | | // String type is different, it has to pass in length | 268 | | // if it is string type , directly use string value | 269 | 141 | if constexpr (!std::is_same_v<T, std::string>) { | 270 | 141 | lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0); | 271 | 141 | upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0); | 272 | 141 | } | 273 | 141 | } | 274 | 48 | size_t row_num = 0; | 275 | | | 276 | 48 | auto format_options = DataTypeSerDe::get_default_format_options(); | 277 | 48 | auto time_zone = cctz::utc_time_zone(); | 278 | 48 | format_options.timezone = &time_zone; | 279 | | | 280 | 141 | for (const auto& bucket : buckets) { | 281 | | if constexpr (std::is_same_v<T, std::string>) { | 282 | | lower_val.SetString(bucket.lower.data(), | 283 | | static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator); | 284 | | upper_val.SetString(bucket.upper.data(), | 285 | | static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator); | 286 | 141 | } else { | 287 | 141 | std::string lower_str = data_type->to_string(*lower_column, row_num, format_options); | 288 | 141 | std::string upper_str = data_type->to_string(*upper_column, row_num, format_options); | 289 | 141 | ++row_num; | 290 | 141 | lower_val.SetString(lower_str.data(), | 291 | 141 | static_cast<rapidjson::SizeType>(lower_str.size()), allocator); | 292 | 141 | upper_val.SetString(upper_str.data(), | 293 | 141 | static_cast<rapidjson::SizeType>(upper_str.size()), allocator); | 294 | 141 | } | 295 | 141 | rapidjson::Value bucket_json(rapidjson::kObjectType); | 296 | 141 | bucket_json.AddMember("lower", lower_val, allocator); | 297 | 141 | bucket_json.AddMember("upper", upper_val, allocator); | 298 | 141 | bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator); | 299 | 141 | bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator); | 300 | 141 | bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator); | 301 | | | 302 | 141 | bucket_arr.PushBack(bucket_json, allocator); | 303 | 141 | } | 304 | | | 305 | 48 | doc.AddMember("buckets", bucket_arr, allocator); | 306 | 48 | rapidjson::Writer<rapidjson::StringBuffer> writer(buffer); | 307 | 48 | doc.Accept(writer); | 308 | | | 309 | 48 | return !buckets.empty() && buffer.GetSize() > 0; | 310 | 48 | } |
_ZN5doris17histogram_to_jsonIfEEbRN9rapidjson19GenericStringBufferINS1_4UTF8IcEENS1_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISB_EERKSt10shared_ptrIKNS_9IDataTypeEE Line | Count | Source | 246 | 46 | const DataTypePtr& data_type) { | 247 | 46 | rapidjson::Document doc; | 248 | 46 | doc.SetObject(); | 249 | 46 | rapidjson::Document::AllocatorType& allocator = doc.GetAllocator(); | 250 | | | 251 | 46 | int num_buckets = cast_set<int>(buckets.size()); | 252 | 46 | doc.AddMember("num_buckets", num_buckets, allocator); | 253 | | | 254 | 46 | rapidjson::Value bucket_arr(rapidjson::kArrayType); | 255 | 46 | bucket_arr.Reserve(num_buckets, allocator); | 256 | | | 257 | 46 | std::stringstream ss1; | 258 | 46 | std::stringstream ss2; | 259 | | | 260 | 46 | rapidjson::Value lower_val; | 261 | 46 | rapidjson::Value upper_val; | 262 | | | 263 | | // Convert bucket's lower and upper to 2 columns | 264 | 46 | MutableColumnPtr lower_column = data_type->create_column(); | 265 | 46 | MutableColumnPtr upper_column = data_type->create_column(); | 266 | 128 | for (const auto& bucket : buckets) { | 267 | | // String type is different, it has to pass in length | 268 | | // if it is string type , directly use string value | 269 | 128 | if constexpr (!std::is_same_v<T, std::string>) { | 270 | 128 | lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0); | 271 | 128 | upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0); | 272 | 128 | } | 273 | 128 | } | 274 | 46 | size_t row_num = 0; | 275 | | | 276 | 46 | auto format_options = DataTypeSerDe::get_default_format_options(); | 277 | 46 | auto time_zone = cctz::utc_time_zone(); | 278 | 46 | format_options.timezone = &time_zone; | 279 | | | 280 | 128 | for (const auto& bucket : buckets) { | 281 | | if constexpr (std::is_same_v<T, std::string>) { | 282 | | lower_val.SetString(bucket.lower.data(), | 283 | | static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator); | 284 | | upper_val.SetString(bucket.upper.data(), | 285 | | static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator); | 286 | 128 | } else { | 287 | 128 | std::string lower_str = data_type->to_string(*lower_column, row_num, format_options); | 288 | 128 | std::string upper_str = data_type->to_string(*upper_column, row_num, format_options); | 289 | 128 | ++row_num; | 290 | 128 | lower_val.SetString(lower_str.data(), | 291 | 128 | static_cast<rapidjson::SizeType>(lower_str.size()), allocator); | 292 | 128 | upper_val.SetString(upper_str.data(), | 293 | 128 | static_cast<rapidjson::SizeType>(upper_str.size()), allocator); | 294 | 128 | } | 295 | 128 | rapidjson::Value bucket_json(rapidjson::kObjectType); | 296 | 128 | bucket_json.AddMember("lower", lower_val, allocator); | 297 | 128 | bucket_json.AddMember("upper", upper_val, allocator); | 298 | 128 | bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator); | 299 | 128 | bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator); | 300 | 128 | bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator); | 301 | | | 302 | 128 | bucket_arr.PushBack(bucket_json, allocator); | 303 | 128 | } | 304 | | | 305 | 46 | doc.AddMember("buckets", bucket_arr, allocator); | 306 | 46 | rapidjson::Writer<rapidjson::StringBuffer> writer(buffer); | 307 | 46 | doc.Accept(writer); | 308 | | | 309 | 46 | return !buckets.empty() && buffer.GetSize() > 0; | 310 | 46 | } |
_ZN5doris17histogram_to_jsonIdEEbRN9rapidjson19GenericStringBufferINS1_4UTF8IcEENS1_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISB_EERKSt10shared_ptrIKNS_9IDataTypeEE Line | Count | Source | 246 | 46 | const DataTypePtr& data_type) { | 247 | 46 | rapidjson::Document doc; | 248 | 46 | doc.SetObject(); | 249 | 46 | rapidjson::Document::AllocatorType& allocator = doc.GetAllocator(); | 250 | | | 251 | 46 | int num_buckets = cast_set<int>(buckets.size()); | 252 | 46 | doc.AddMember("num_buckets", num_buckets, allocator); | 253 | | | 254 | 46 | rapidjson::Value bucket_arr(rapidjson::kArrayType); | 255 | 46 | bucket_arr.Reserve(num_buckets, allocator); | 256 | | | 257 | 46 | std::stringstream ss1; | 258 | 46 | std::stringstream ss2; | 259 | | | 260 | 46 | rapidjson::Value lower_val; | 261 | 46 | rapidjson::Value upper_val; | 262 | | | 263 | | // Convert bucket's lower and upper to 2 columns | 264 | 46 | MutableColumnPtr lower_column = data_type->create_column(); | 265 | 46 | MutableColumnPtr upper_column = data_type->create_column(); | 266 | 128 | for (const auto& bucket : buckets) { | 267 | | // String type is different, it has to pass in length | 268 | | // if it is string type , directly use string value | 269 | 128 | if constexpr (!std::is_same_v<T, std::string>) { | 270 | 128 | lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0); | 271 | 128 | upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0); | 272 | 128 | } | 273 | 128 | } | 274 | 46 | size_t row_num = 0; | 275 | | | 276 | 46 | auto format_options = DataTypeSerDe::get_default_format_options(); | 277 | 46 | auto time_zone = cctz::utc_time_zone(); | 278 | 46 | format_options.timezone = &time_zone; | 279 | | | 280 | 128 | for (const auto& bucket : buckets) { | 281 | | if constexpr (std::is_same_v<T, std::string>) { | 282 | | lower_val.SetString(bucket.lower.data(), | 283 | | static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator); | 284 | | upper_val.SetString(bucket.upper.data(), | 285 | | static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator); | 286 | 128 | } else { | 287 | 128 | std::string lower_str = data_type->to_string(*lower_column, row_num, format_options); | 288 | 128 | std::string upper_str = data_type->to_string(*upper_column, row_num, format_options); | 289 | 128 | ++row_num; | 290 | 128 | lower_val.SetString(lower_str.data(), | 291 | 128 | static_cast<rapidjson::SizeType>(lower_str.size()), allocator); | 292 | 128 | upper_val.SetString(upper_str.data(), | 293 | 128 | static_cast<rapidjson::SizeType>(upper_str.size()), allocator); | 294 | 128 | } | 295 | 128 | rapidjson::Value bucket_json(rapidjson::kObjectType); | 296 | 128 | bucket_json.AddMember("lower", lower_val, allocator); | 297 | 128 | bucket_json.AddMember("upper", upper_val, allocator); | 298 | 128 | bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator); | 299 | 128 | bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator); | 300 | 128 | bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator); | 301 | | | 302 | 128 | bucket_arr.PushBack(bucket_json, allocator); | 303 | 128 | } | 304 | | | 305 | 46 | doc.AddMember("buckets", bucket_arr, allocator); | 306 | 46 | rapidjson::Writer<rapidjson::StringBuffer> writer(buffer); | 307 | 46 | doc.Accept(writer); | 308 | | | 309 | 46 | return !buckets.empty() && buffer.GetSize() > 0; | 310 | 46 | } |
_ZN5doris17histogram_to_jsonINS_7DecimalIiEEEEbRN9rapidjson19GenericStringBufferINS3_4UTF8IcEENS3_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISD_EERKSt10shared_ptrIKNS_9IDataTypeEE Line | Count | Source | 246 | 49 | const DataTypePtr& data_type) { | 247 | 49 | rapidjson::Document doc; | 248 | 49 | doc.SetObject(); | 249 | 49 | rapidjson::Document::AllocatorType& allocator = doc.GetAllocator(); | 250 | | | 251 | 49 | int num_buckets = cast_set<int>(buckets.size()); | 252 | 49 | doc.AddMember("num_buckets", num_buckets, allocator); | 253 | | | 254 | 49 | rapidjson::Value bucket_arr(rapidjson::kArrayType); | 255 | 49 | bucket_arr.Reserve(num_buckets, allocator); | 256 | | | 257 | 49 | std::stringstream ss1; | 258 | 49 | std::stringstream ss2; | 259 | | | 260 | 49 | rapidjson::Value lower_val; | 261 | 49 | rapidjson::Value upper_val; | 262 | | | 263 | | // Convert bucket's lower and upper to 2 columns | 264 | 49 | MutableColumnPtr lower_column = data_type->create_column(); | 265 | 49 | MutableColumnPtr upper_column = data_type->create_column(); | 266 | 133 | for (const auto& bucket : buckets) { | 267 | | // String type is different, it has to pass in length | 268 | | // if it is string type , directly use string value | 269 | 133 | if constexpr (!std::is_same_v<T, std::string>) { | 270 | 133 | lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0); | 271 | 133 | upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0); | 272 | 133 | } | 273 | 133 | } | 274 | 49 | size_t row_num = 0; | 275 | | | 276 | 49 | auto format_options = DataTypeSerDe::get_default_format_options(); | 277 | 49 | auto time_zone = cctz::utc_time_zone(); | 278 | 49 | format_options.timezone = &time_zone; | 279 | | | 280 | 133 | for (const auto& bucket : buckets) { | 281 | | if constexpr (std::is_same_v<T, std::string>) { | 282 | | lower_val.SetString(bucket.lower.data(), | 283 | | static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator); | 284 | | upper_val.SetString(bucket.upper.data(), | 285 | | static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator); | 286 | 133 | } else { | 287 | 133 | std::string lower_str = data_type->to_string(*lower_column, row_num, format_options); | 288 | 133 | std::string upper_str = data_type->to_string(*upper_column, row_num, format_options); | 289 | 133 | ++row_num; | 290 | 133 | lower_val.SetString(lower_str.data(), | 291 | 133 | static_cast<rapidjson::SizeType>(lower_str.size()), allocator); | 292 | 133 | upper_val.SetString(upper_str.data(), | 293 | 133 | static_cast<rapidjson::SizeType>(upper_str.size()), allocator); | 294 | 133 | } | 295 | 133 | rapidjson::Value bucket_json(rapidjson::kObjectType); | 296 | 133 | bucket_json.AddMember("lower", lower_val, allocator); | 297 | 133 | bucket_json.AddMember("upper", upper_val, allocator); | 298 | 133 | bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator); | 299 | 133 | bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator); | 300 | 133 | bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator); | 301 | | | 302 | 133 | bucket_arr.PushBack(bucket_json, allocator); | 303 | 133 | } | 304 | | | 305 | 49 | doc.AddMember("buckets", bucket_arr, allocator); | 306 | 49 | rapidjson::Writer<rapidjson::StringBuffer> writer(buffer); | 307 | 49 | doc.Accept(writer); | 308 | | | 309 | 49 | return !buckets.empty() && buffer.GetSize() > 0; | 310 | 49 | } |
Unexecuted instantiation: _ZN5doris17histogram_to_jsonINS_7DecimalIlEEEEbRN9rapidjson19GenericStringBufferINS3_4UTF8IcEENS3_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISD_EERKSt10shared_ptrIKNS_9IDataTypeEE Unexecuted instantiation: _ZN5doris17histogram_to_jsonINS_12Decimal128V3EEEbRN9rapidjson19GenericStringBufferINS2_4UTF8IcEENS2_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISC_EERKSt10shared_ptrIKNS_9IDataTypeEE Unexecuted instantiation: _ZN5doris17histogram_to_jsonINS_7DecimalIN4wide7integerILm256EiEEEEEEbRN9rapidjson19GenericStringBufferINS6_4UTF8IcEENS6_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISG_EERKSt10shared_ptrIKNS_9IDataTypeEE _ZN5doris17histogram_to_jsonINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEEbRN9rapidjson19GenericStringBufferINS7_4UTF8IcEENS7_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISH_EERKSt10shared_ptrIKNS_9IDataTypeEE Line | Count | Source | 246 | 105 | const DataTypePtr& data_type) { | 247 | 105 | rapidjson::Document doc; | 248 | 105 | doc.SetObject(); | 249 | 105 | rapidjson::Document::AllocatorType& allocator = doc.GetAllocator(); | 250 | | | 251 | 105 | int num_buckets = cast_set<int>(buckets.size()); | 252 | 105 | doc.AddMember("num_buckets", num_buckets, allocator); | 253 | | | 254 | 105 | rapidjson::Value bucket_arr(rapidjson::kArrayType); | 255 | 105 | bucket_arr.Reserve(num_buckets, allocator); | 256 | | | 257 | 105 | std::stringstream ss1; | 258 | 105 | std::stringstream ss2; | 259 | | | 260 | 105 | rapidjson::Value lower_val; | 261 | 105 | rapidjson::Value upper_val; | 262 | | | 263 | | // Convert bucket's lower and upper to 2 columns | 264 | 105 | MutableColumnPtr lower_column = data_type->create_column(); | 265 | 105 | MutableColumnPtr upper_column = data_type->create_column(); | 266 | 178 | for (const auto& bucket : buckets) { | 267 | | // String type is different, it has to pass in length | 268 | | // if it is string type , directly use string value | 269 | | if constexpr (!std::is_same_v<T, std::string>) { | 270 | | lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0); | 271 | | upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0); | 272 | | } | 273 | 178 | } | 274 | 105 | size_t row_num = 0; | 275 | | | 276 | 105 | auto format_options = DataTypeSerDe::get_default_format_options(); | 277 | 105 | auto time_zone = cctz::utc_time_zone(); | 278 | 105 | format_options.timezone = &time_zone; | 279 | | | 280 | 178 | for (const auto& bucket : buckets) { | 281 | 178 | if constexpr (std::is_same_v<T, std::string>) { | 282 | 178 | lower_val.SetString(bucket.lower.data(), | 283 | 178 | static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator); | 284 | 178 | upper_val.SetString(bucket.upper.data(), | 285 | 178 | static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator); | 286 | | } else { | 287 | | std::string lower_str = data_type->to_string(*lower_column, row_num, format_options); | 288 | | std::string upper_str = data_type->to_string(*upper_column, row_num, format_options); | 289 | | ++row_num; | 290 | | lower_val.SetString(lower_str.data(), | 291 | | static_cast<rapidjson::SizeType>(lower_str.size()), allocator); | 292 | | upper_val.SetString(upper_str.data(), | 293 | | static_cast<rapidjson::SizeType>(upper_str.size()), allocator); | 294 | | } | 295 | 178 | rapidjson::Value bucket_json(rapidjson::kObjectType); | 296 | 178 | bucket_json.AddMember("lower", lower_val, allocator); | 297 | 178 | bucket_json.AddMember("upper", upper_val, allocator); | 298 | 178 | bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator); | 299 | 178 | bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator); | 300 | 178 | bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator); | 301 | | | 302 | 178 | bucket_arr.PushBack(bucket_json, allocator); | 303 | 178 | } | 304 | | | 305 | 105 | doc.AddMember("buckets", bucket_arr, allocator); | 306 | 105 | rapidjson::Writer<rapidjson::StringBuffer> writer(buffer); | 307 | 105 | doc.Accept(writer); | 308 | | | 309 | 105 | return !buckets.empty() && buffer.GetSize() > 0; | 310 | 105 | } |
_ZN5doris17histogram_to_jsonINS_11DateV2ValueINS_15DateV2ValueTypeEEEEEbRN9rapidjson19GenericStringBufferINS4_4UTF8IcEENS4_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISE_EERKSt10shared_ptrIKNS_9IDataTypeEE Line | Count | Source | 246 | 88 | const DataTypePtr& data_type) { | 247 | 88 | rapidjson::Document doc; | 248 | 88 | doc.SetObject(); | 249 | 88 | rapidjson::Document::AllocatorType& allocator = doc.GetAllocator(); | 250 | | | 251 | 88 | int num_buckets = cast_set<int>(buckets.size()); | 252 | 88 | doc.AddMember("num_buckets", num_buckets, allocator); | 253 | | | 254 | 88 | rapidjson::Value bucket_arr(rapidjson::kArrayType); | 255 | 88 | bucket_arr.Reserve(num_buckets, allocator); | 256 | | | 257 | 88 | std::stringstream ss1; | 258 | 88 | std::stringstream ss2; | 259 | | | 260 | 88 | rapidjson::Value lower_val; | 261 | 88 | rapidjson::Value upper_val; | 262 | | | 263 | | // Convert bucket's lower and upper to 2 columns | 264 | 88 | MutableColumnPtr lower_column = data_type->create_column(); | 265 | 88 | MutableColumnPtr upper_column = data_type->create_column(); | 266 | 256 | for (const auto& bucket : buckets) { | 267 | | // String type is different, it has to pass in length | 268 | | // if it is string type , directly use string value | 269 | 256 | if constexpr (!std::is_same_v<T, std::string>) { | 270 | 256 | lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0); | 271 | 256 | upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0); | 272 | 256 | } | 273 | 256 | } | 274 | 88 | size_t row_num = 0; | 275 | | | 276 | 88 | auto format_options = DataTypeSerDe::get_default_format_options(); | 277 | 88 | auto time_zone = cctz::utc_time_zone(); | 278 | 88 | format_options.timezone = &time_zone; | 279 | | | 280 | 256 | for (const auto& bucket : buckets) { | 281 | | if constexpr (std::is_same_v<T, std::string>) { | 282 | | lower_val.SetString(bucket.lower.data(), | 283 | | static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator); | 284 | | upper_val.SetString(bucket.upper.data(), | 285 | | static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator); | 286 | 256 | } else { | 287 | 256 | std::string lower_str = data_type->to_string(*lower_column, row_num, format_options); | 288 | 256 | std::string upper_str = data_type->to_string(*upper_column, row_num, format_options); | 289 | 256 | ++row_num; | 290 | 256 | lower_val.SetString(lower_str.data(), | 291 | 256 | static_cast<rapidjson::SizeType>(lower_str.size()), allocator); | 292 | 256 | upper_val.SetString(upper_str.data(), | 293 | 256 | static_cast<rapidjson::SizeType>(upper_str.size()), allocator); | 294 | 256 | } | 295 | 256 | rapidjson::Value bucket_json(rapidjson::kObjectType); | 296 | 256 | bucket_json.AddMember("lower", lower_val, allocator); | 297 | 256 | bucket_json.AddMember("upper", upper_val, allocator); | 298 | 256 | bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator); | 299 | 256 | bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator); | 300 | 256 | bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator); | 301 | | | 302 | 256 | bucket_arr.PushBack(bucket_json, allocator); | 303 | 256 | } | 304 | | | 305 | 88 | doc.AddMember("buckets", bucket_arr, allocator); | 306 | 88 | rapidjson::Writer<rapidjson::StringBuffer> writer(buffer); | 307 | 88 | doc.Accept(writer); | 308 | | | 309 | 88 | return !buckets.empty() && buffer.GetSize() > 0; | 310 | 88 | } |
_ZN5doris17histogram_to_jsonINS_11DateV2ValueINS_19DateTimeV2ValueTypeEEEEEbRN9rapidjson19GenericStringBufferINS4_4UTF8IcEENS4_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISE_EERKSt10shared_ptrIKNS_9IDataTypeEE Line | Count | Source | 246 | 88 | const DataTypePtr& data_type) { | 247 | 88 | rapidjson::Document doc; | 248 | 88 | doc.SetObject(); | 249 | 88 | rapidjson::Document::AllocatorType& allocator = doc.GetAllocator(); | 250 | | | 251 | 88 | int num_buckets = cast_set<int>(buckets.size()); | 252 | 88 | doc.AddMember("num_buckets", num_buckets, allocator); | 253 | | | 254 | 88 | rapidjson::Value bucket_arr(rapidjson::kArrayType); | 255 | 88 | bucket_arr.Reserve(num_buckets, allocator); | 256 | | | 257 | 88 | std::stringstream ss1; | 258 | 88 | std::stringstream ss2; | 259 | | | 260 | 88 | rapidjson::Value lower_val; | 261 | 88 | rapidjson::Value upper_val; | 262 | | | 263 | | // Convert bucket's lower and upper to 2 columns | 264 | 88 | MutableColumnPtr lower_column = data_type->create_column(); | 265 | 88 | MutableColumnPtr upper_column = data_type->create_column(); | 266 | 255 | for (const auto& bucket : buckets) { | 267 | | // String type is different, it has to pass in length | 268 | | // if it is string type , directly use string value | 269 | 255 | if constexpr (!std::is_same_v<T, std::string>) { | 270 | 255 | lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0); | 271 | 255 | upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0); | 272 | 255 | } | 273 | 255 | } | 274 | 88 | size_t row_num = 0; | 275 | | | 276 | 88 | auto format_options = DataTypeSerDe::get_default_format_options(); | 277 | 88 | auto time_zone = cctz::utc_time_zone(); | 278 | 88 | format_options.timezone = &time_zone; | 279 | | | 280 | 255 | for (const auto& bucket : buckets) { | 281 | | if constexpr (std::is_same_v<T, std::string>) { | 282 | | lower_val.SetString(bucket.lower.data(), | 283 | | static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator); | 284 | | upper_val.SetString(bucket.upper.data(), | 285 | | static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator); | 286 | 255 | } else { | 287 | 255 | std::string lower_str = data_type->to_string(*lower_column, row_num, format_options); | 288 | 255 | std::string upper_str = data_type->to_string(*upper_column, row_num, format_options); | 289 | 255 | ++row_num; | 290 | 255 | lower_val.SetString(lower_str.data(), | 291 | 255 | static_cast<rapidjson::SizeType>(lower_str.size()), allocator); | 292 | 255 | upper_val.SetString(upper_str.data(), | 293 | 255 | static_cast<rapidjson::SizeType>(upper_str.size()), allocator); | 294 | 255 | } | 295 | 255 | rapidjson::Value bucket_json(rapidjson::kObjectType); | 296 | 255 | bucket_json.AddMember("lower", lower_val, allocator); | 297 | 255 | bucket_json.AddMember("upper", upper_val, allocator); | 298 | 255 | bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator); | 299 | 255 | bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator); | 300 | 255 | bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator); | 301 | | | 302 | 255 | bucket_arr.PushBack(bucket_json, allocator); | 303 | 255 | } | 304 | | | 305 | 88 | doc.AddMember("buckets", bucket_arr, allocator); | 306 | 88 | rapidjson::Writer<rapidjson::StringBuffer> writer(buffer); | 307 | 88 | doc.Accept(writer); | 308 | | | 309 | 88 | return !buckets.empty() && buffer.GetSize() > 0; | 310 | 88 | } |
|
311 | | #include "common/compile_check_end.h" |
312 | | } // namespace doris |