be/src/exec/common/histogram_helpers.hpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #pragma once |
19 | | |
20 | | #include <rapidjson/document.h> |
21 | | #include <rapidjson/prettywriter.h> |
22 | | #include <rapidjson/stringbuffer.h> |
23 | | |
24 | | #include <boost/dynamic_bitset.hpp> |
25 | | |
26 | | #include "common/cast_set.h" |
27 | | #include "core/data_type/data_type_decimal.h" |
28 | | |
29 | | namespace doris { |
30 | | template <typename T> |
31 | | struct Bucket { |
32 | | public: |
33 | | Bucket() = default; |
34 | | Bucket(T lower, T upper, size_t ndv, size_t count, size_t pre_sum) |
35 | 116 | : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {}Unexecuted instantiation: _ZN5doris6BucketINS_7DecimalIiEEEC2ES2_S2_mmm Unexecuted instantiation: _ZN5doris6BucketINS_7DecimalIlEEEC2ES2_S2_mmm Unexecuted instantiation: _ZN5doris6BucketINS_12Decimal128V3EEC2ES1_S1_mmm Unexecuted instantiation: _ZN5doris6BucketINS_7DecimalIN4wide7integerILm256EiEEEEEC2ES5_S5_mmm _ZN5doris6BucketINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEC2ES6_S6_mmm Line | Count | Source | 35 | 10 | : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {} |
_ZN5doris6BucketINS_11DateV2ValueINS_15DateV2ValueTypeEEEEC2ES3_S3_mmm Line | Count | Source | 35 | 10 | : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {} |
_ZN5doris6BucketINS_11DateV2ValueINS_19DateTimeV2ValueTypeEEEEC2ES3_S3_mmm Line | Count | Source | 35 | 10 | : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {} |
_ZN5doris6BucketIiEC2Eiimmm Line | Count | Source | 35 | 26 | : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {} |
Unexecuted instantiation: _ZN5doris6BucketIhEC2Ehhmmm _ZN5doris6BucketIaEC2Eaammm Line | Count | Source | 35 | 10 | : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {} |
_ZN5doris6BucketIsEC2Essmmm Line | Count | Source | 35 | 10 | : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {} |
_ZN5doris6BucketIlEC2Ellmmm Line | Count | Source | 35 | 10 | : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {} |
_ZN5doris6BucketInEC2Ennmmm Line | Count | Source | 35 | 10 | : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {} |
_ZN5doris6BucketIfEC2Effmmm Line | Count | Source | 35 | 10 | : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {} |
_ZN5doris6BucketIdEC2Eddmmm Line | Count | Source | 35 | 10 | : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {} |
|
36 | | |
37 | | T lower; |
38 | | T upper; |
39 | | size_t ndv; |
40 | | size_t count; |
41 | | size_t pre_sum; |
42 | | }; |
43 | | |
44 | | /** |
45 | | * Checks if it is possible to assign the provided value_map to the given |
46 | | * number of buckets such that no bucket has a size larger than max_bucket_size. |
47 | | * |
48 | | * @param value_map A mapping of values to their counts. |
49 | | * @param max_bucket_size The maximum size that any bucket is allowed to have. |
50 | | * @param num_buckets The number of buckets that we want to assign values to. |
51 | | * |
52 | | * @return true if the values can be assigned to the buckets, false otherwise. |
53 | | */ |
54 | | template <typename T> |
55 | | bool can_assign_into_buckets(const std::map<T, size_t>& value_map, const size_t max_bucket_size, |
56 | 188 | const size_t num_buckets) { |
57 | 188 | if (value_map.empty()) { |
58 | 1 | return false; |
59 | 187 | }; |
60 | | |
61 | 187 | size_t used_buckets = 1; |
62 | 187 | size_t current_bucket_size = 0; |
63 | | |
64 | 28.9k | for (const auto& [value, count] : value_map) { |
65 | 28.9k | current_bucket_size += count; |
66 | | |
67 | | // If adding the current value to the current bucket would exceed max_bucket_size, |
68 | | // then we start a new bucket. |
69 | 28.9k | if (current_bucket_size > max_bucket_size) { |
70 | 701 | ++used_buckets; |
71 | 701 | current_bucket_size = count; |
72 | 701 | } |
73 | | |
74 | | // If we have used more buckets than num_buckets, we cannot assign the values to buckets. |
75 | 28.9k | if (used_buckets > num_buckets) { |
76 | 95 | return false; |
77 | 95 | } |
78 | 28.9k | } |
79 | | |
80 | 92 | return true; |
81 | 187 | } _ZN5doris23can_assign_into_bucketsIiEEbRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEmm Line | Count | Source | 56 | 65 | const size_t num_buckets) { | 57 | 65 | if (value_map.empty()) { | 58 | 1 | return false; | 59 | 64 | }; | 60 | | | 61 | 64 | size_t used_buckets = 1; | 62 | 64 | size_t current_bucket_size = 0; | 63 | | | 64 | 1.37k | for (const auto& [value, count] : value_map) { | 65 | 1.37k | current_bucket_size += count; | 66 | | | 67 | | // If adding the current value to the current bucket would exceed max_bucket_size, | 68 | | // then we start a new bucket. | 69 | 1.37k | if (current_bucket_size > max_bucket_size) { | 70 | 161 | ++used_buckets; | 71 | 161 | current_bucket_size = count; | 72 | 161 | } | 73 | | | 74 | | // If we have used more buckets than num_buckets, we cannot assign the values to buckets. | 75 | 1.37k | if (used_buckets > num_buckets) { | 76 | 29 | return false; | 77 | 29 | } | 78 | 1.37k | } | 79 | | | 80 | 35 | return true; | 81 | 64 | } |
Unexecuted instantiation: _ZN5doris23can_assign_into_bucketsIhEEbRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEmm _ZN5doris23can_assign_into_bucketsIaEEbRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEmm Line | Count | Source | 56 | 13 | const size_t num_buckets) { | 57 | 13 | if (value_map.empty()) { | 58 | 0 | return false; | 59 | 13 | }; | 60 | | | 61 | 13 | size_t used_buckets = 1; | 62 | 13 | size_t current_bucket_size = 0; | 63 | | | 64 | 1.19k | for (const auto& [value, count] : value_map) { | 65 | 1.19k | current_bucket_size += count; | 66 | | | 67 | | // If adding the current value to the current bucket would exceed max_bucket_size, | 68 | | // then we start a new bucket. | 69 | 1.19k | if (current_bucket_size > max_bucket_size) { | 70 | 57 | ++used_buckets; | 71 | 57 | current_bucket_size = count; | 72 | 57 | } | 73 | | | 74 | | // If we have used more buckets than num_buckets, we cannot assign the values to buckets. | 75 | 1.19k | if (used_buckets > num_buckets) { | 76 | 7 | return false; | 77 | 7 | } | 78 | 1.19k | } | 79 | | | 80 | 6 | return true; | 81 | 13 | } |
_ZN5doris23can_assign_into_bucketsIsEEbRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEmm Line | Count | Source | 56 | 13 | const size_t num_buckets) { | 57 | 13 | if (value_map.empty()) { | 58 | 0 | return false; | 59 | 13 | }; | 60 | | | 61 | 13 | size_t used_buckets = 1; | 62 | 13 | size_t current_bucket_size = 0; | 63 | | | 64 | 1.19k | for (const auto& [value, count] : value_map) { | 65 | 1.19k | current_bucket_size += count; | 66 | | | 67 | | // If adding the current value to the current bucket would exceed max_bucket_size, | 68 | | // then we start a new bucket. | 69 | 1.19k | if (current_bucket_size > max_bucket_size) { | 70 | 57 | ++used_buckets; | 71 | 57 | current_bucket_size = count; | 72 | 57 | } | 73 | | | 74 | | // If we have used more buckets than num_buckets, we cannot assign the values to buckets. | 75 | 1.19k | if (used_buckets > num_buckets) { | 76 | 7 | return false; | 77 | 7 | } | 78 | 1.19k | } | 79 | | | 80 | 6 | return true; | 81 | 13 | } |
_ZN5doris23can_assign_into_bucketsIlEEbRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEmm Line | Count | Source | 56 | 13 | const size_t num_buckets) { | 57 | 13 | if (value_map.empty()) { | 58 | 0 | return false; | 59 | 13 | }; | 60 | | | 61 | 13 | size_t used_buckets = 1; | 62 | 13 | size_t current_bucket_size = 0; | 63 | | | 64 | 1.19k | for (const auto& [value, count] : value_map) { | 65 | 1.19k | current_bucket_size += count; | 66 | | | 67 | | // If adding the current value to the current bucket would exceed max_bucket_size, | 68 | | // then we start a new bucket. | 69 | 1.19k | if (current_bucket_size > max_bucket_size) { | 70 | 57 | ++used_buckets; | 71 | 57 | current_bucket_size = count; | 72 | 57 | } | 73 | | | 74 | | // If we have used more buckets than num_buckets, we cannot assign the values to buckets. | 75 | 1.19k | if (used_buckets > num_buckets) { | 76 | 7 | return false; | 77 | 7 | } | 78 | 1.19k | } | 79 | | | 80 | 6 | return true; | 81 | 13 | } |
_ZN5doris23can_assign_into_bucketsInEEbRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEmm Line | Count | Source | 56 | 13 | const size_t num_buckets) { | 57 | 13 | if (value_map.empty()) { | 58 | 0 | return false; | 59 | 13 | }; | 60 | | | 61 | 13 | size_t used_buckets = 1; | 62 | 13 | size_t current_bucket_size = 0; | 63 | | | 64 | 1.19k | for (const auto& [value, count] : value_map) { | 65 | 1.19k | current_bucket_size += count; | 66 | | | 67 | | // If adding the current value to the current bucket would exceed max_bucket_size, | 68 | | // then we start a new bucket. | 69 | 1.19k | if (current_bucket_size > max_bucket_size) { | 70 | 57 | ++used_buckets; | 71 | 57 | current_bucket_size = count; | 72 | 57 | } | 73 | | | 74 | | // If we have used more buckets than num_buckets, we cannot assign the values to buckets. | 75 | 1.19k | if (used_buckets > num_buckets) { | 76 | 7 | return false; | 77 | 7 | } | 78 | 1.19k | } | 79 | | | 80 | 6 | return true; | 81 | 13 | } |
_ZN5doris23can_assign_into_bucketsIfEEbRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEmm Line | Count | Source | 56 | 13 | const size_t num_buckets) { | 57 | 13 | if (value_map.empty()) { | 58 | 0 | return false; | 59 | 13 | }; | 60 | | | 61 | 13 | size_t used_buckets = 1; | 62 | 13 | size_t current_bucket_size = 0; | 63 | | | 64 | 1.19k | for (const auto& [value, count] : value_map) { | 65 | 1.19k | current_bucket_size += count; | 66 | | | 67 | | // If adding the current value to the current bucket would exceed max_bucket_size, | 68 | | // then we start a new bucket. | 69 | 1.19k | if (current_bucket_size > max_bucket_size) { | 70 | 57 | ++used_buckets; | 71 | 57 | current_bucket_size = count; | 72 | 57 | } | 73 | | | 74 | | // If we have used more buckets than num_buckets, we cannot assign the values to buckets. | 75 | 1.19k | if (used_buckets > num_buckets) { | 76 | 7 | return false; | 77 | 7 | } | 78 | 1.19k | } | 79 | | | 80 | 6 | return true; | 81 | 13 | } |
_ZN5doris23can_assign_into_bucketsIdEEbRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEmm Line | Count | Source | 56 | 13 | const size_t num_buckets) { | 57 | 13 | if (value_map.empty()) { | 58 | 0 | return false; | 59 | 13 | }; | 60 | | | 61 | 13 | size_t used_buckets = 1; | 62 | 13 | size_t current_bucket_size = 0; | 63 | | | 64 | 1.19k | for (const auto& [value, count] : value_map) { | 65 | 1.19k | current_bucket_size += count; | 66 | | | 67 | | // If adding the current value to the current bucket would exceed max_bucket_size, | 68 | | // then we start a new bucket. | 69 | 1.19k | if (current_bucket_size > max_bucket_size) { | 70 | 57 | ++used_buckets; | 71 | 57 | current_bucket_size = count; | 72 | 57 | } | 73 | | | 74 | | // If we have used more buckets than num_buckets, we cannot assign the values to buckets. | 75 | 1.19k | if (used_buckets > num_buckets) { | 76 | 7 | return false; | 77 | 7 | } | 78 | 1.19k | } | 79 | | | 80 | 6 | return true; | 81 | 13 | } |
Unexecuted instantiation: _ZN5doris23can_assign_into_bucketsINS_7DecimalIiEEEEbRKSt3mapIT_mSt4lessIS4_ESaISt4pairIKS4_mEEEmm Unexecuted instantiation: _ZN5doris23can_assign_into_bucketsINS_7DecimalIlEEEEbRKSt3mapIT_mSt4lessIS4_ESaISt4pairIKS4_mEEEmm Unexecuted instantiation: _ZN5doris23can_assign_into_bucketsINS_12Decimal128V3EEEbRKSt3mapIT_mSt4lessIS3_ESaISt4pairIKS3_mEEEmm Unexecuted instantiation: _ZN5doris23can_assign_into_bucketsINS_7DecimalIN4wide7integerILm256EiEEEEEEbRKSt3mapIT_mSt4lessIS7_ESaISt4pairIKS7_mEEEmm _ZN5doris23can_assign_into_bucketsINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEEbRKSt3mapIT_mSt4lessIS8_ESaISt4pairIKS8_mEEEmm Line | Count | Source | 56 | 19 | const size_t num_buckets) { | 57 | 19 | if (value_map.empty()) { | 58 | 0 | return false; | 59 | 19 | }; | 60 | | | 61 | 19 | size_t used_buckets = 1; | 62 | 19 | size_t current_bucket_size = 0; | 63 | | | 64 | 18.0k | for (const auto& [value, count] : value_map) { | 65 | 18.0k | current_bucket_size += count; | 66 | | | 67 | | // If adding the current value to the current bucket would exceed max_bucket_size, | 68 | | // then we start a new bucket. | 69 | 18.0k | if (current_bucket_size > max_bucket_size) { | 70 | 84 | ++used_buckets; | 71 | 84 | current_bucket_size = count; | 72 | 84 | } | 73 | | | 74 | | // If we have used more buckets than num_buckets, we cannot assign the values to buckets. | 75 | 18.0k | if (used_buckets > num_buckets) { | 76 | 10 | return false; | 77 | 10 | } | 78 | 18.0k | } | 79 | | | 80 | 9 | return true; | 81 | 19 | } |
_ZN5doris23can_assign_into_bucketsINS_11DateV2ValueINS_15DateV2ValueTypeEEEEEbRKSt3mapIT_mSt4lessIS5_ESaISt4pairIKS5_mEEEmm Line | Count | Source | 56 | 13 | const size_t num_buckets) { | 57 | 13 | if (value_map.empty()) { | 58 | 0 | return false; | 59 | 13 | }; | 60 | | | 61 | 13 | size_t used_buckets = 1; | 62 | 13 | size_t current_bucket_size = 0; | 63 | | | 64 | 1.19k | for (const auto& [value, count] : value_map) { | 65 | 1.19k | current_bucket_size += count; | 66 | | | 67 | | // If adding the current value to the current bucket would exceed max_bucket_size, | 68 | | // then we start a new bucket. | 69 | 1.19k | if (current_bucket_size > max_bucket_size) { | 70 | 57 | ++used_buckets; | 71 | 57 | current_bucket_size = count; | 72 | 57 | } | 73 | | | 74 | | // If we have used more buckets than num_buckets, we cannot assign the values to buckets. | 75 | 1.19k | if (used_buckets > num_buckets) { | 76 | 7 | return false; | 77 | 7 | } | 78 | 1.19k | } | 79 | | | 80 | 6 | return true; | 81 | 13 | } |
_ZN5doris23can_assign_into_bucketsINS_11DateV2ValueINS_19DateTimeV2ValueTypeEEEEEbRKSt3mapIT_mSt4lessIS5_ESaISt4pairIKS5_mEEEmm Line | Count | Source | 56 | 13 | const size_t num_buckets) { | 57 | 13 | if (value_map.empty()) { | 58 | 0 | return false; | 59 | 13 | }; | 60 | | | 61 | 13 | size_t used_buckets = 1; | 62 | 13 | size_t current_bucket_size = 0; | 63 | | | 64 | 1.19k | for (const auto& [value, count] : value_map) { | 65 | 1.19k | current_bucket_size += count; | 66 | | | 67 | | // If adding the current value to the current bucket would exceed max_bucket_size, | 68 | | // then we start a new bucket. | 69 | 1.19k | if (current_bucket_size > max_bucket_size) { | 70 | 57 | ++used_buckets; | 71 | 57 | current_bucket_size = count; | 72 | 57 | } | 73 | | | 74 | | // If we have used more buckets than num_buckets, we cannot assign the values to buckets. | 75 | 1.19k | if (used_buckets > num_buckets) { | 76 | 7 | return false; | 77 | 7 | } | 78 | 1.19k | } | 79 | | | 80 | 6 | return true; | 81 | 13 | } |
|
82 | | |
83 | | /** |
84 | | * Calculates the maximum number of values that can fit into each bucket given a set of values |
85 | | * and the desired number of buckets. |
86 | | * |
87 | | * @tparam T the type of the values in the value map |
88 | | * @param value_map the map of values and their counts |
89 | | * @param num_buckets the desired number of buckets |
90 | | * @return the maximum number of values that can fit into each bucket |
91 | | */ |
92 | | template <typename T> |
93 | 32 | size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) { |
94 | | // Ensure that the value map is not empty |
95 | 32 | assert(!value_map.empty()); |
96 | | |
97 | | // Calculate the total number of values in the map using std::accumulate() |
98 | 32 | size_t total_values = 0; |
99 | 3.83k | for (const auto& [value, count] : value_map) { |
100 | 3.83k | total_values += count; |
101 | 3.83k | } |
102 | | |
103 | | // If there is only one bucket, then all values will be assigned to that bucket |
104 | 32 | if (num_buckets == 1) { |
105 | 3 | return total_values; |
106 | 3 | } |
107 | | |
108 | | // To calculate the maximum value count in each bucket, we first calculate a conservative upper |
109 | | // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed |
110 | | // the actual maximum value count, but it does not underestimate it. The subsequent binary search |
111 | | // algorithm will approach the actual maximum value count. |
112 | 29 | size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1; |
113 | | |
114 | | // Initialize the lower bound to 0 |
115 | 29 | size_t lower_bucket_values = 0; |
116 | | |
117 | | // Perform a binary search to find the maximum number of values that can fit into each bucket |
118 | 29 | int search_step = 0; |
119 | 29 | const int max_search_steps = |
120 | 29 | 10; // Limit the number of search steps to avoid excessive iteration |
121 | | |
122 | 203 | while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) { |
123 | | // Calculate the midpoint of the upper and lower bounds |
124 | 174 | const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2; |
125 | | |
126 | | // Check if the given number of values can be assigned to the desired number of buckets |
127 | 174 | if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) { |
128 | | // If it can, then set the upper bound to the midpoint |
129 | 85 | upper_bucket_values = bucket_values; |
130 | 89 | } else { |
131 | | // If it can't, then set the lower bound to the midpoint |
132 | 89 | lower_bucket_values = bucket_values; |
133 | 89 | } |
134 | | // Increment the search step counter |
135 | 174 | ++search_step; |
136 | 174 | } |
137 | | |
138 | 29 | return upper_bucket_values; |
139 | 32 | } _ZN5doris27calculate_bucket_max_valuesIiEEmRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEm Line | Count | Source | 93 | 14 | size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) { | 94 | | // Ensure that the value map is not empty | 95 | 14 | assert(!value_map.empty()); | 96 | | | 97 | | // Calculate the total number of values in the map using std::accumulate() | 98 | 14 | size_t total_values = 0; | 99 | 233 | for (const auto& [value, count] : value_map) { | 100 | 233 | total_values += count; | 101 | 233 | } | 102 | | | 103 | | // If there is only one bucket, then all values will be assigned to that bucket | 104 | 14 | if (num_buckets == 1) { | 105 | 3 | return total_values; | 106 | 3 | } | 107 | | | 108 | | // To calculate the maximum value count in each bucket, we first calculate a conservative upper | 109 | | // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed | 110 | | // the actual maximum value count, but it does not underestimate it. The subsequent binary search | 111 | | // algorithm will approach the actual maximum value count. | 112 | 11 | size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1; | 113 | | | 114 | | // Initialize the lower bound to 0 | 115 | 11 | size_t lower_bucket_values = 0; | 116 | | | 117 | | // Perform a binary search to find the maximum number of values that can fit into each bucket | 118 | 11 | int search_step = 0; | 119 | 11 | const int max_search_steps = | 120 | 11 | 10; // Limit the number of search steps to avoid excessive iteration | 121 | | | 122 | 62 | while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) { | 123 | | // Calculate the midpoint of the upper and lower bounds | 124 | 51 | const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2; | 125 | | | 126 | | // Check if the given number of values can be assigned to the desired number of buckets | 127 | 51 | if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) { | 128 | | // If it can, then set the upper bound to the midpoint | 129 | 28 | upper_bucket_values = bucket_values; | 130 | 28 | } else { | 131 | | // If it can't, then set the lower bound to the midpoint | 132 | 23 | lower_bucket_values = bucket_values; | 133 | 23 | } | 134 | | // Increment the search step counter | 135 | 51 | ++search_step; | 136 | 51 | } | 137 | | | 138 | 11 | return upper_bucket_values; | 139 | 14 | } |
Unexecuted instantiation: _ZN5doris27calculate_bucket_max_valuesIhEEmRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEm _ZN5doris27calculate_bucket_max_valuesIaEEmRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEm Line | Count | Source | 93 | 2 | size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) { | 94 | | // Ensure that the value map is not empty | 95 | 2 | assert(!value_map.empty()); | 96 | | | 97 | | // Calculate the total number of values in the map using std::accumulate() | 98 | 2 | size_t total_values = 0; | 99 | 200 | for (const auto& [value, count] : value_map) { | 100 | 200 | total_values += count; | 101 | 200 | } | 102 | | | 103 | | // If there is only one bucket, then all values will be assigned to that bucket | 104 | 2 | if (num_buckets == 1) { | 105 | 0 | return total_values; | 106 | 0 | } | 107 | | | 108 | | // To calculate the maximum value count in each bucket, we first calculate a conservative upper | 109 | | // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed | 110 | | // the actual maximum value count, but it does not underestimate it. The subsequent binary search | 111 | | // algorithm will approach the actual maximum value count. | 112 | 2 | size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1; | 113 | | | 114 | | // Initialize the lower bound to 0 | 115 | 2 | size_t lower_bucket_values = 0; | 116 | | | 117 | | // Perform a binary search to find the maximum number of values that can fit into each bucket | 118 | 2 | int search_step = 0; | 119 | 2 | const int max_search_steps = | 120 | 2 | 10; // Limit the number of search steps to avoid excessive iteration | 121 | | | 122 | 15 | while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) { | 123 | | // Calculate the midpoint of the upper and lower bounds | 124 | 13 | const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2; | 125 | | | 126 | | // Check if the given number of values can be assigned to the desired number of buckets | 127 | 13 | if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) { | 128 | | // If it can, then set the upper bound to the midpoint | 129 | 6 | upper_bucket_values = bucket_values; | 130 | 7 | } else { | 131 | | // If it can't, then set the lower bound to the midpoint | 132 | 7 | lower_bucket_values = bucket_values; | 133 | 7 | } | 134 | | // Increment the search step counter | 135 | 13 | ++search_step; | 136 | 13 | } | 137 | | | 138 | 2 | return upper_bucket_values; | 139 | 2 | } |
_ZN5doris27calculate_bucket_max_valuesIsEEmRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEm Line | Count | Source | 93 | 2 | size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) { | 94 | | // Ensure that the value map is not empty | 95 | 2 | assert(!value_map.empty()); | 96 | | | 97 | | // Calculate the total number of values in the map using std::accumulate() | 98 | 2 | size_t total_values = 0; | 99 | 200 | for (const auto& [value, count] : value_map) { | 100 | 200 | total_values += count; | 101 | 200 | } | 102 | | | 103 | | // If there is only one bucket, then all values will be assigned to that bucket | 104 | 2 | if (num_buckets == 1) { | 105 | 0 | return total_values; | 106 | 0 | } | 107 | | | 108 | | // To calculate the maximum value count in each bucket, we first calculate a conservative upper | 109 | | // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed | 110 | | // the actual maximum value count, but it does not underestimate it. The subsequent binary search | 111 | | // algorithm will approach the actual maximum value count. | 112 | 2 | size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1; | 113 | | | 114 | | // Initialize the lower bound to 0 | 115 | 2 | size_t lower_bucket_values = 0; | 116 | | | 117 | | // Perform a binary search to find the maximum number of values that can fit into each bucket | 118 | 2 | int search_step = 0; | 119 | 2 | const int max_search_steps = | 120 | 2 | 10; // Limit the number of search steps to avoid excessive iteration | 121 | | | 122 | 15 | while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) { | 123 | | // Calculate the midpoint of the upper and lower bounds | 124 | 13 | const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2; | 125 | | | 126 | | // Check if the given number of values can be assigned to the desired number of buckets | 127 | 13 | if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) { | 128 | | // If it can, then set the upper bound to the midpoint | 129 | 6 | upper_bucket_values = bucket_values; | 130 | 7 | } else { | 131 | | // If it can't, then set the lower bound to the midpoint | 132 | 7 | lower_bucket_values = bucket_values; | 133 | 7 | } | 134 | | // Increment the search step counter | 135 | 13 | ++search_step; | 136 | 13 | } | 137 | | | 138 | 2 | return upper_bucket_values; | 139 | 2 | } |
_ZN5doris27calculate_bucket_max_valuesIlEEmRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEm Line | Count | Source | 93 | 2 | size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) { | 94 | | // Ensure that the value map is not empty | 95 | 2 | assert(!value_map.empty()); | 96 | | | 97 | | // Calculate the total number of values in the map using std::accumulate() | 98 | 2 | size_t total_values = 0; | 99 | 200 | for (const auto& [value, count] : value_map) { | 100 | 200 | total_values += count; | 101 | 200 | } | 102 | | | 103 | | // If there is only one bucket, then all values will be assigned to that bucket | 104 | 2 | if (num_buckets == 1) { | 105 | 0 | return total_values; | 106 | 0 | } | 107 | | | 108 | | // To calculate the maximum value count in each bucket, we first calculate a conservative upper | 109 | | // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed | 110 | | // the actual maximum value count, but it does not underestimate it. The subsequent binary search | 111 | | // algorithm will approach the actual maximum value count. | 112 | 2 | size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1; | 113 | | | 114 | | // Initialize the lower bound to 0 | 115 | 2 | size_t lower_bucket_values = 0; | 116 | | | 117 | | // Perform a binary search to find the maximum number of values that can fit into each bucket | 118 | 2 | int search_step = 0; | 119 | 2 | const int max_search_steps = | 120 | 2 | 10; // Limit the number of search steps to avoid excessive iteration | 121 | | | 122 | 15 | while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) { | 123 | | // Calculate the midpoint of the upper and lower bounds | 124 | 13 | const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2; | 125 | | | 126 | | // Check if the given number of values can be assigned to the desired number of buckets | 127 | 13 | if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) { | 128 | | // If it can, then set the upper bound to the midpoint | 129 | 6 | upper_bucket_values = bucket_values; | 130 | 7 | } else { | 131 | | // If it can't, then set the lower bound to the midpoint | 132 | 7 | lower_bucket_values = bucket_values; | 133 | 7 | } | 134 | | // Increment the search step counter | 135 | 13 | ++search_step; | 136 | 13 | } | 137 | | | 138 | 2 | return upper_bucket_values; | 139 | 2 | } |
_ZN5doris27calculate_bucket_max_valuesInEEmRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEm Line | Count | Source | 93 | 2 | size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) { | 94 | | // Ensure that the value map is not empty | 95 | 2 | assert(!value_map.empty()); | 96 | | | 97 | | // Calculate the total number of values in the map using std::accumulate() | 98 | 2 | size_t total_values = 0; | 99 | 200 | for (const auto& [value, count] : value_map) { | 100 | 200 | total_values += count; | 101 | 200 | } | 102 | | | 103 | | // If there is only one bucket, then all values will be assigned to that bucket | 104 | 2 | if (num_buckets == 1) { | 105 | 0 | return total_values; | 106 | 0 | } | 107 | | | 108 | | // To calculate the maximum value count in each bucket, we first calculate a conservative upper | 109 | | // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed | 110 | | // the actual maximum value count, but it does not underestimate it. The subsequent binary search | 111 | | // algorithm will approach the actual maximum value count. | 112 | 2 | size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1; | 113 | | | 114 | | // Initialize the lower bound to 0 | 115 | 2 | size_t lower_bucket_values = 0; | 116 | | | 117 | | // Perform a binary search to find the maximum number of values that can fit into each bucket | 118 | 2 | int search_step = 0; | 119 | 2 | const int max_search_steps = | 120 | 2 | 10; // Limit the number of search steps to avoid excessive iteration | 121 | | | 122 | 15 | while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) { | 123 | | // Calculate the midpoint of the upper and lower bounds | 124 | 13 | const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2; | 125 | | | 126 | | // Check if the given number of values can be assigned to the desired number of buckets | 127 | 13 | if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) { | 128 | | // If it can, then set the upper bound to the midpoint | 129 | 6 | upper_bucket_values = bucket_values; | 130 | 7 | } else { | 131 | | // If it can't, then set the lower bound to the midpoint | 132 | 7 | lower_bucket_values = bucket_values; | 133 | 7 | } | 134 | | // Increment the search step counter | 135 | 13 | ++search_step; | 136 | 13 | } | 137 | | | 138 | 2 | return upper_bucket_values; | 139 | 2 | } |
_ZN5doris27calculate_bucket_max_valuesIfEEmRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEm Line | Count | Source | 93 | 2 | size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) { | 94 | | // Ensure that the value map is not empty | 95 | 2 | assert(!value_map.empty()); | 96 | | | 97 | | // Calculate the total number of values in the map using std::accumulate() | 98 | 2 | size_t total_values = 0; | 99 | 200 | for (const auto& [value, count] : value_map) { | 100 | 200 | total_values += count; | 101 | 200 | } | 102 | | | 103 | | // If there is only one bucket, then all values will be assigned to that bucket | 104 | 2 | if (num_buckets == 1) { | 105 | 0 | return total_values; | 106 | 0 | } | 107 | | | 108 | | // To calculate the maximum value count in each bucket, we first calculate a conservative upper | 109 | | // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed | 110 | | // the actual maximum value count, but it does not underestimate it. The subsequent binary search | 111 | | // algorithm will approach the actual maximum value count. | 112 | 2 | size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1; | 113 | | | 114 | | // Initialize the lower bound to 0 | 115 | 2 | size_t lower_bucket_values = 0; | 116 | | | 117 | | // Perform a binary search to find the maximum number of values that can fit into each bucket | 118 | 2 | int search_step = 0; | 119 | 2 | const int max_search_steps = | 120 | 2 | 10; // Limit the number of search steps to avoid excessive iteration | 121 | | | 122 | 15 | while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) { | 123 | | // Calculate the midpoint of the upper and lower bounds | 124 | 13 | const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2; | 125 | | | 126 | | // Check if the given number of values can be assigned to the desired number of buckets | 127 | 13 | if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) { | 128 | | // If it can, then set the upper bound to the midpoint | 129 | 6 | upper_bucket_values = bucket_values; | 130 | 7 | } else { | 131 | | // If it can't, then set the lower bound to the midpoint | 132 | 7 | lower_bucket_values = bucket_values; | 133 | 7 | } | 134 | | // Increment the search step counter | 135 | 13 | ++search_step; | 136 | 13 | } | 137 | | | 138 | 2 | return upper_bucket_values; | 139 | 2 | } |
_ZN5doris27calculate_bucket_max_valuesIdEEmRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEm Line | Count | Source | 93 | 2 | size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) { | 94 | | // Ensure that the value map is not empty | 95 | 2 | assert(!value_map.empty()); | 96 | | | 97 | | // Calculate the total number of values in the map using std::accumulate() | 98 | 2 | size_t total_values = 0; | 99 | 200 | for (const auto& [value, count] : value_map) { | 100 | 200 | total_values += count; | 101 | 200 | } | 102 | | | 103 | | // If there is only one bucket, then all values will be assigned to that bucket | 104 | 2 | if (num_buckets == 1) { | 105 | 0 | return total_values; | 106 | 0 | } | 107 | | | 108 | | // To calculate the maximum value count in each bucket, we first calculate a conservative upper | 109 | | // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed | 110 | | // the actual maximum value count, but it does not underestimate it. The subsequent binary search | 111 | | // algorithm will approach the actual maximum value count. | 112 | 2 | size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1; | 113 | | | 114 | | // Initialize the lower bound to 0 | 115 | 2 | size_t lower_bucket_values = 0; | 116 | | | 117 | | // Perform a binary search to find the maximum number of values that can fit into each bucket | 118 | 2 | int search_step = 0; | 119 | 2 | const int max_search_steps = | 120 | 2 | 10; // Limit the number of search steps to avoid excessive iteration | 121 | | | 122 | 15 | while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) { | 123 | | // Calculate the midpoint of the upper and lower bounds | 124 | 13 | const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2; | 125 | | | 126 | | // Check if the given number of values can be assigned to the desired number of buckets | 127 | 13 | if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) { | 128 | | // If it can, then set the upper bound to the midpoint | 129 | 6 | upper_bucket_values = bucket_values; | 130 | 7 | } else { | 131 | | // If it can't, then set the lower bound to the midpoint | 132 | 7 | lower_bucket_values = bucket_values; | 133 | 7 | } | 134 | | // Increment the search step counter | 135 | 13 | ++search_step; | 136 | 13 | } | 137 | | | 138 | 2 | return upper_bucket_values; | 139 | 2 | } |
Unexecuted instantiation: _ZN5doris27calculate_bucket_max_valuesINS_7DecimalIiEEEEmRKSt3mapIT_mSt4lessIS4_ESaISt4pairIKS4_mEEEm Unexecuted instantiation: _ZN5doris27calculate_bucket_max_valuesINS_7DecimalIlEEEEmRKSt3mapIT_mSt4lessIS4_ESaISt4pairIKS4_mEEEm Unexecuted instantiation: _ZN5doris27calculate_bucket_max_valuesINS_12Decimal128V3EEEmRKSt3mapIT_mSt4lessIS3_ESaISt4pairIKS3_mEEEm Unexecuted instantiation: _ZN5doris27calculate_bucket_max_valuesINS_7DecimalIN4wide7integerILm256EiEEEEEEmRKSt3mapIT_mSt4lessIS7_ESaISt4pairIKS7_mEEEm _ZN5doris27calculate_bucket_max_valuesINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEEmRKSt3mapIT_mSt4lessIS8_ESaISt4pairIKS8_mEEEm Line | Count | Source | 93 | 2 | size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) { | 94 | | // Ensure that the value map is not empty | 95 | 2 | assert(!value_map.empty()); | 96 | | | 97 | | // Calculate the total number of values in the map using std::accumulate() | 98 | 2 | size_t total_values = 0; | 99 | 2.00k | for (const auto& [value, count] : value_map) { | 100 | 2.00k | total_values += count; | 101 | 2.00k | } | 102 | | | 103 | | // If there is only one bucket, then all values will be assigned to that bucket | 104 | 2 | if (num_buckets == 1) { | 105 | 0 | return total_values; | 106 | 0 | } | 107 | | | 108 | | // To calculate the maximum value count in each bucket, we first calculate a conservative upper | 109 | | // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed | 110 | | // the actual maximum value count, but it does not underestimate it. The subsequent binary search | 111 | | // algorithm will approach the actual maximum value count. | 112 | 2 | size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1; | 113 | | | 114 | | // Initialize the lower bound to 0 | 115 | 2 | size_t lower_bucket_values = 0; | 116 | | | 117 | | // Perform a binary search to find the maximum number of values that can fit into each bucket | 118 | 2 | int search_step = 0; | 119 | 2 | const int max_search_steps = | 120 | 2 | 10; // Limit the number of search steps to avoid excessive iteration | 121 | | | 122 | 21 | while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) { | 123 | | // Calculate the midpoint of the upper and lower bounds | 124 | 19 | const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2; | 125 | | | 126 | | // Check if the given number of values can be assigned to the desired number of buckets | 127 | 19 | if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) { | 128 | | // If it can, then set the upper bound to the midpoint | 129 | 9 | upper_bucket_values = bucket_values; | 130 | 10 | } else { | 131 | | // If it can't, then set the lower bound to the midpoint | 132 | 10 | lower_bucket_values = bucket_values; | 133 | 10 | } | 134 | | // Increment the search step counter | 135 | 19 | ++search_step; | 136 | 19 | } | 137 | | | 138 | 2 | return upper_bucket_values; | 139 | 2 | } |
_ZN5doris27calculate_bucket_max_valuesINS_11DateV2ValueINS_15DateV2ValueTypeEEEEEmRKSt3mapIT_mSt4lessIS5_ESaISt4pairIKS5_mEEEm Line | Count | Source | 93 | 2 | size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) { | 94 | | // Ensure that the value map is not empty | 95 | 2 | assert(!value_map.empty()); | 96 | | | 97 | | // Calculate the total number of values in the map using std::accumulate() | 98 | 2 | size_t total_values = 0; | 99 | 200 | for (const auto& [value, count] : value_map) { | 100 | 200 | total_values += count; | 101 | 200 | } | 102 | | | 103 | | // If there is only one bucket, then all values will be assigned to that bucket | 104 | 2 | if (num_buckets == 1) { | 105 | 0 | return total_values; | 106 | 0 | } | 107 | | | 108 | | // To calculate the maximum value count in each bucket, we first calculate a conservative upper | 109 | | // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed | 110 | | // the actual maximum value count, but it does not underestimate it. The subsequent binary search | 111 | | // algorithm will approach the actual maximum value count. | 112 | 2 | size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1; | 113 | | | 114 | | // Initialize the lower bound to 0 | 115 | 2 | size_t lower_bucket_values = 0; | 116 | | | 117 | | // Perform a binary search to find the maximum number of values that can fit into each bucket | 118 | 2 | int search_step = 0; | 119 | 2 | const int max_search_steps = | 120 | 2 | 10; // Limit the number of search steps to avoid excessive iteration | 121 | | | 122 | 15 | while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) { | 123 | | // Calculate the midpoint of the upper and lower bounds | 124 | 13 | const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2; | 125 | | | 126 | | // Check if the given number of values can be assigned to the desired number of buckets | 127 | 13 | if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) { | 128 | | // If it can, then set the upper bound to the midpoint | 129 | 6 | upper_bucket_values = bucket_values; | 130 | 7 | } else { | 131 | | // If it can't, then set the lower bound to the midpoint | 132 | 7 | lower_bucket_values = bucket_values; | 133 | 7 | } | 134 | | // Increment the search step counter | 135 | 13 | ++search_step; | 136 | 13 | } | 137 | | | 138 | 2 | return upper_bucket_values; | 139 | 2 | } |
_ZN5doris27calculate_bucket_max_valuesINS_11DateV2ValueINS_19DateTimeV2ValueTypeEEEEEmRKSt3mapIT_mSt4lessIS5_ESaISt4pairIKS5_mEEEm Line | Count | Source | 93 | 2 | size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) { | 94 | | // Ensure that the value map is not empty | 95 | 2 | assert(!value_map.empty()); | 96 | | | 97 | | // Calculate the total number of values in the map using std::accumulate() | 98 | 2 | size_t total_values = 0; | 99 | 200 | for (const auto& [value, count] : value_map) { | 100 | 200 | total_values += count; | 101 | 200 | } | 102 | | | 103 | | // If there is only one bucket, then all values will be assigned to that bucket | 104 | 2 | if (num_buckets == 1) { | 105 | 0 | return total_values; | 106 | 0 | } | 107 | | | 108 | | // To calculate the maximum value count in each bucket, we first calculate a conservative upper | 109 | | // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed | 110 | | // the actual maximum value count, but it does not underestimate it. The subsequent binary search | 111 | | // algorithm will approach the actual maximum value count. | 112 | 2 | size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1; | 113 | | | 114 | | // Initialize the lower bound to 0 | 115 | 2 | size_t lower_bucket_values = 0; | 116 | | | 117 | | // Perform a binary search to find the maximum number of values that can fit into each bucket | 118 | 2 | int search_step = 0; | 119 | 2 | const int max_search_steps = | 120 | 2 | 10; // Limit the number of search steps to avoid excessive iteration | 121 | | | 122 | 15 | while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) { | 123 | | // Calculate the midpoint of the upper and lower bounds | 124 | 13 | const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2; | 125 | | | 126 | | // Check if the given number of values can be assigned to the desired number of buckets | 127 | 13 | if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) { | 128 | | // If it can, then set the upper bound to the midpoint | 129 | 6 | upper_bucket_values = bucket_values; | 130 | 7 | } else { | 131 | | // If it can't, then set the lower bound to the midpoint | 132 | 7 | lower_bucket_values = bucket_values; | 133 | 7 | } | 134 | | // Increment the search step counter | 135 | 13 | ++search_step; | 136 | 13 | } | 137 | | | 138 | 2 | return upper_bucket_values; | 139 | 2 | } |
|
140 | | |
141 | | /** |
142 | | * Greedy equi-height histogram construction algorithm, inspired by the MySQL |
143 | | * equi_height implementation(https://dev.mysql.com/doc/dev/mysql-server/latest/equi__height_8h.html). |
144 | | * |
145 | | * Given an ordered collection of [value, count] pairs and a maximum bucket |
146 | | * size, construct a histogram by inserting values into a bucket while keeping |
147 | | * track of its size. If the insertion of a value into a non-empty bucket |
148 | | * causes the bucket to exceed the maximum size, create a new empty bucket and |
149 | | * continue. |
150 | | * |
151 | | * The algorithm guarantees a selectivity estimation error of at most ~2 * |
152 | | * #values / #buckets, often less. Values with a higher relative frequency are |
153 | | * guaranteed to be placed in singleton buckets. |
154 | | * |
155 | | * The minimum composite bucket size is used to minimize the worst case |
156 | | * selectivity estimation error. In general, the algorithm will adapt to the |
157 | | * data distribution to minimize the size of composite buckets. The heavy values |
158 | | * can be placed in singleton buckets and the remaining values will be evenly |
159 | | * spread across the remaining buckets, leading to a lower composite bucket size. |
160 | | * |
161 | | * Note: The term "value" refers to an entry in a column and the actual value |
162 | | * of an entry. The ordered_map is an ordered collection of [distinct value, |
163 | | * value count] pairs. For example, a Value_map<String> could contain the pairs ["a", 1], ["b", 2] |
164 | | * to represent one "a" value and two "b" values. |
165 | | * |
166 | | * @param buckets A vector of empty buckets that will be populated with data. |
167 | | * @param ordered_map An ordered map of distinct values and their counts. |
168 | | * @param max_num_buckets The maximum number of buckets that can be used. |
169 | | * |
170 | | * @return True if the buckets were successfully built, false otherwise. |
171 | | */ |
172 | | template <typename T> |
173 | | bool build_histogram(std::vector<Bucket<T>>& buckets, const std::map<T, size_t>& ordered_map, |
174 | 43 | const size_t max_num_buckets) { |
175 | | // If the input map is empty, there is nothing to build. |
176 | 43 | if (ordered_map.empty()) { |
177 | 17 | return false; |
178 | 17 | } |
179 | | |
180 | | // Calculate the maximum number of values that can be assigned to each bucket. |
181 | 26 | auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets); |
182 | | |
183 | | // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional |
184 | | // allocations when inserting buckets. |
185 | 26 | buckets.clear(); |
186 | 26 | buckets.reserve(max_num_buckets); |
187 | | |
188 | | // Initialize bucket variables. |
189 | 26 | size_t distinct_values_count = 0; |
190 | 26 | size_t values_count = 0; |
191 | 26 | size_t cumulative_values = 0; |
192 | | |
193 | | // Record how many values still need to be assigned. |
194 | 26 | auto remaining_distinct_values = ordered_map.size(); |
195 | | |
196 | 26 | auto it = ordered_map.begin(); |
197 | | |
198 | | // Lower value of the current bucket. |
199 | 26 | const T* lower_value = &it->first; |
200 | | |
201 | | // Iterate over the ordered map of distinct values and their counts. |
202 | 3.84k | for (; it != ordered_map.end(); ++it) { |
203 | 3.82k | const auto count = it->second; |
204 | 3.82k | const auto current_value = it->first; |
205 | | |
206 | | // Update the bucket counts and track the number of distinct values assigned. |
207 | 3.82k | distinct_values_count++; |
208 | 3.82k | remaining_distinct_values--; |
209 | 3.82k | values_count += count; |
210 | 3.82k | cumulative_values += count; |
211 | | |
212 | | // Check whether the current value should be added to the current bucket. |
213 | 3.82k | auto next = std::next(it); |
214 | 3.82k | size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1; |
215 | | |
216 | 3.82k | if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets && |
217 | 3.82k | values_count + next->second <= bucket_max_values) { |
218 | | // If the current value is the last in the input map and there are more remaining |
219 | | // distinct values than empty buckets and adding the value does not cause the bucket |
220 | | // to exceed its max size, skip adding the value to the current bucket. |
221 | 3.70k | continue; |
222 | 3.70k | } |
223 | | |
224 | | // Finalize the current bucket and add it to our collection of buckets. |
225 | 114 | auto pre_sum = cumulative_values - values_count; |
226 | | |
227 | 114 | Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count, |
228 | 114 | pre_sum); |
229 | 114 | buckets.push_back(new_bucket); |
230 | | |
231 | | // Reset variables for the next bucket. |
232 | 114 | if (next != ordered_map.end()) { |
233 | 88 | lower_value = &next->first; |
234 | 88 | } |
235 | 114 | values_count = 0; |
236 | 114 | distinct_values_count = 0; |
237 | 114 | } |
238 | | |
239 | 26 | return true; |
240 | 43 | } _ZN5doris15build_histogramIiEEbRSt6vectorINS_6BucketIT_EESaIS4_EERKSt3mapIS3_mSt4lessIS3_ESaISt4pairIKS3_mEEEm Line | Count | Source | 174 | 11 | const size_t max_num_buckets) { | 175 | | // If the input map is empty, there is nothing to build. | 176 | 11 | if (ordered_map.empty()) { | 177 | 3 | return false; | 178 | 3 | } | 179 | | | 180 | | // Calculate the maximum number of values that can be assigned to each bucket. | 181 | 8 | auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets); | 182 | | | 183 | | // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional | 184 | | // allocations when inserting buckets. | 185 | 8 | buckets.clear(); | 186 | 8 | buckets.reserve(max_num_buckets); | 187 | | | 188 | | // Initialize bucket variables. | 189 | 8 | size_t distinct_values_count = 0; | 190 | 8 | size_t values_count = 0; | 191 | 8 | size_t cumulative_values = 0; | 192 | | | 193 | | // Record how many values still need to be assigned. | 194 | 8 | auto remaining_distinct_values = ordered_map.size(); | 195 | | | 196 | 8 | auto it = ordered_map.begin(); | 197 | | | 198 | | // Lower value of the current bucket. | 199 | 8 | const T* lower_value = &it->first; | 200 | | | 201 | | // Iterate over the ordered map of distinct values and their counts. | 202 | 229 | for (; it != ordered_map.end(); ++it) { | 203 | 221 | const auto count = it->second; | 204 | 221 | const auto current_value = it->first; | 205 | | | 206 | | // Update the bucket counts and track the number of distinct values assigned. | 207 | 221 | distinct_values_count++; | 208 | 221 | remaining_distinct_values--; | 209 | 221 | values_count += count; | 210 | 221 | cumulative_values += count; | 211 | | | 212 | | // Check whether the current value should be added to the current bucket. | 213 | 221 | auto next = std::next(it); | 214 | 221 | size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1; | 215 | | | 216 | 221 | if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets && | 217 | 221 | values_count + next->second <= bucket_max_values) { | 218 | | // If the current value is the last in the input map and there are more remaining | 219 | | // distinct values than empty buckets and adding the value does not cause the bucket | 220 | | // to exceed its max size, skip adding the value to the current bucket. | 221 | 197 | continue; | 222 | 197 | } | 223 | | | 224 | | // Finalize the current bucket and add it to our collection of buckets. | 225 | 24 | auto pre_sum = cumulative_values - values_count; | 226 | | | 227 | 24 | Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count, | 228 | 24 | pre_sum); | 229 | 24 | buckets.push_back(new_bucket); | 230 | | | 231 | | // Reset variables for the next bucket. | 232 | 24 | if (next != ordered_map.end()) { | 233 | 16 | lower_value = &next->first; | 234 | 16 | } | 235 | 24 | values_count = 0; | 236 | 24 | distinct_values_count = 0; | 237 | 24 | } | 238 | | | 239 | 8 | return true; | 240 | 11 | } |
Unexecuted instantiation: _ZN5doris15build_histogramIhEEbRSt6vectorINS_6BucketIT_EESaIS4_EERKSt3mapIS3_mSt4lessIS3_ESaISt4pairIKS3_mEEEm _ZN5doris15build_histogramIaEEbRSt6vectorINS_6BucketIT_EESaIS4_EERKSt3mapIS3_mSt4lessIS3_ESaISt4pairIKS3_mEEEm Line | Count | Source | 174 | 4 | const size_t max_num_buckets) { | 175 | | // If the input map is empty, there is nothing to build. | 176 | 4 | if (ordered_map.empty()) { | 177 | 2 | return false; | 178 | 2 | } | 179 | | | 180 | | // Calculate the maximum number of values that can be assigned to each bucket. | 181 | 2 | auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets); | 182 | | | 183 | | // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional | 184 | | // allocations when inserting buckets. | 185 | 2 | buckets.clear(); | 186 | 2 | buckets.reserve(max_num_buckets); | 187 | | | 188 | | // Initialize bucket variables. | 189 | 2 | size_t distinct_values_count = 0; | 190 | 2 | size_t values_count = 0; | 191 | 2 | size_t cumulative_values = 0; | 192 | | | 193 | | // Record how many values still need to be assigned. | 194 | 2 | auto remaining_distinct_values = ordered_map.size(); | 195 | | | 196 | 2 | auto it = ordered_map.begin(); | 197 | | | 198 | | // Lower value of the current bucket. | 199 | 2 | const T* lower_value = &it->first; | 200 | | | 201 | | // Iterate over the ordered map of distinct values and their counts. | 202 | 202 | for (; it != ordered_map.end(); ++it) { | 203 | 200 | const auto count = it->second; | 204 | 200 | const auto current_value = it->first; | 205 | | | 206 | | // Update the bucket counts and track the number of distinct values assigned. | 207 | 200 | distinct_values_count++; | 208 | 200 | remaining_distinct_values--; | 209 | 200 | values_count += count; | 210 | 200 | cumulative_values += count; | 211 | | | 212 | | // Check whether the current value should be added to the current bucket. | 213 | 200 | auto next = std::next(it); | 214 | 200 | size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1; | 215 | | | 216 | 200 | if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets && | 217 | 200 | values_count + next->second <= bucket_max_values) { | 218 | | // If the current value is the last in the input map and there are more remaining | 219 | | // distinct values than empty buckets and adding the value does not cause the bucket | 220 | | // to exceed its max size, skip adding the value to the current bucket. | 221 | 190 | continue; | 222 | 190 | } | 223 | | | 224 | | // Finalize the current bucket and add it to our collection of buckets. | 225 | 10 | auto pre_sum = cumulative_values - values_count; | 226 | | | 227 | 10 | Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count, | 228 | 10 | pre_sum); | 229 | 10 | buckets.push_back(new_bucket); | 230 | | | 231 | | // Reset variables for the next bucket. | 232 | 10 | if (next != ordered_map.end()) { | 233 | 8 | lower_value = &next->first; | 234 | 8 | } | 235 | 10 | values_count = 0; | 236 | 10 | distinct_values_count = 0; | 237 | 10 | } | 238 | | | 239 | 2 | return true; | 240 | 4 | } |
_ZN5doris15build_histogramIsEEbRSt6vectorINS_6BucketIT_EESaIS4_EERKSt3mapIS3_mSt4lessIS3_ESaISt4pairIKS3_mEEEm Line | Count | Source | 174 | 4 | const size_t max_num_buckets) { | 175 | | // If the input map is empty, there is nothing to build. | 176 | 4 | if (ordered_map.empty()) { | 177 | 2 | return false; | 178 | 2 | } | 179 | | | 180 | | // Calculate the maximum number of values that can be assigned to each bucket. | 181 | 2 | auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets); | 182 | | | 183 | | // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional | 184 | | // allocations when inserting buckets. | 185 | 2 | buckets.clear(); | 186 | 2 | buckets.reserve(max_num_buckets); | 187 | | | 188 | | // Initialize bucket variables. | 189 | 2 | size_t distinct_values_count = 0; | 190 | 2 | size_t values_count = 0; | 191 | 2 | size_t cumulative_values = 0; | 192 | | | 193 | | // Record how many values still need to be assigned. | 194 | 2 | auto remaining_distinct_values = ordered_map.size(); | 195 | | | 196 | 2 | auto it = ordered_map.begin(); | 197 | | | 198 | | // Lower value of the current bucket. | 199 | 2 | const T* lower_value = &it->first; | 200 | | | 201 | | // Iterate over the ordered map of distinct values and their counts. | 202 | 202 | for (; it != ordered_map.end(); ++it) { | 203 | 200 | const auto count = it->second; | 204 | 200 | const auto current_value = it->first; | 205 | | | 206 | | // Update the bucket counts and track the number of distinct values assigned. | 207 | 200 | distinct_values_count++; | 208 | 200 | remaining_distinct_values--; | 209 | 200 | values_count += count; | 210 | 200 | cumulative_values += count; | 211 | | | 212 | | // Check whether the current value should be added to the current bucket. | 213 | 200 | auto next = std::next(it); | 214 | 200 | size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1; | 215 | | | 216 | 200 | if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets && | 217 | 200 | values_count + next->second <= bucket_max_values) { | 218 | | // If the current value is the last in the input map and there are more remaining | 219 | | // distinct values than empty buckets and adding the value does not cause the bucket | 220 | | // to exceed its max size, skip adding the value to the current bucket. | 221 | 190 | continue; | 222 | 190 | } | 223 | | | 224 | | // Finalize the current bucket and add it to our collection of buckets. | 225 | 10 | auto pre_sum = cumulative_values - values_count; | 226 | | | 227 | 10 | Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count, | 228 | 10 | pre_sum); | 229 | 10 | buckets.push_back(new_bucket); | 230 | | | 231 | | // Reset variables for the next bucket. | 232 | 10 | if (next != ordered_map.end()) { | 233 | 8 | lower_value = &next->first; | 234 | 8 | } | 235 | 10 | values_count = 0; | 236 | 10 | distinct_values_count = 0; | 237 | 10 | } | 238 | | | 239 | 2 | return true; | 240 | 4 | } |
_ZN5doris15build_histogramIlEEbRSt6vectorINS_6BucketIT_EESaIS4_EERKSt3mapIS3_mSt4lessIS3_ESaISt4pairIKS3_mEEEm Line | Count | Source | 174 | 4 | const size_t max_num_buckets) { | 175 | | // If the input map is empty, there is nothing to build. | 176 | 4 | if (ordered_map.empty()) { | 177 | 2 | return false; | 178 | 2 | } | 179 | | | 180 | | // Calculate the maximum number of values that can be assigned to each bucket. | 181 | 2 | auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets); | 182 | | | 183 | | // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional | 184 | | // allocations when inserting buckets. | 185 | 2 | buckets.clear(); | 186 | 2 | buckets.reserve(max_num_buckets); | 187 | | | 188 | | // Initialize bucket variables. | 189 | 2 | size_t distinct_values_count = 0; | 190 | 2 | size_t values_count = 0; | 191 | 2 | size_t cumulative_values = 0; | 192 | | | 193 | | // Record how many values still need to be assigned. | 194 | 2 | auto remaining_distinct_values = ordered_map.size(); | 195 | | | 196 | 2 | auto it = ordered_map.begin(); | 197 | | | 198 | | // Lower value of the current bucket. | 199 | 2 | const T* lower_value = &it->first; | 200 | | | 201 | | // Iterate over the ordered map of distinct values and their counts. | 202 | 202 | for (; it != ordered_map.end(); ++it) { | 203 | 200 | const auto count = it->second; | 204 | 200 | const auto current_value = it->first; | 205 | | | 206 | | // Update the bucket counts and track the number of distinct values assigned. | 207 | 200 | distinct_values_count++; | 208 | 200 | remaining_distinct_values--; | 209 | 200 | values_count += count; | 210 | 200 | cumulative_values += count; | 211 | | | 212 | | // Check whether the current value should be added to the current bucket. | 213 | 200 | auto next = std::next(it); | 214 | 200 | size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1; | 215 | | | 216 | 200 | if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets && | 217 | 200 | values_count + next->second <= bucket_max_values) { | 218 | | // If the current value is the last in the input map and there are more remaining | 219 | | // distinct values than empty buckets and adding the value does not cause the bucket | 220 | | // to exceed its max size, skip adding the value to the current bucket. | 221 | 190 | continue; | 222 | 190 | } | 223 | | | 224 | | // Finalize the current bucket and add it to our collection of buckets. | 225 | 10 | auto pre_sum = cumulative_values - values_count; | 226 | | | 227 | 10 | Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count, | 228 | 10 | pre_sum); | 229 | 10 | buckets.push_back(new_bucket); | 230 | | | 231 | | // Reset variables for the next bucket. | 232 | 10 | if (next != ordered_map.end()) { | 233 | 8 | lower_value = &next->first; | 234 | 8 | } | 235 | 10 | values_count = 0; | 236 | 10 | distinct_values_count = 0; | 237 | 10 | } | 238 | | | 239 | 2 | return true; | 240 | 4 | } |
_ZN5doris15build_histogramInEEbRSt6vectorINS_6BucketIT_EESaIS4_EERKSt3mapIS3_mSt4lessIS3_ESaISt4pairIKS3_mEEEm Line | Count | Source | 174 | 4 | const size_t max_num_buckets) { | 175 | | // If the input map is empty, there is nothing to build. | 176 | 4 | if (ordered_map.empty()) { | 177 | 2 | return false; | 178 | 2 | } | 179 | | | 180 | | // Calculate the maximum number of values that can be assigned to each bucket. | 181 | 2 | auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets); | 182 | | | 183 | | // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional | 184 | | // allocations when inserting buckets. | 185 | 2 | buckets.clear(); | 186 | 2 | buckets.reserve(max_num_buckets); | 187 | | | 188 | | // Initialize bucket variables. | 189 | 2 | size_t distinct_values_count = 0; | 190 | 2 | size_t values_count = 0; | 191 | 2 | size_t cumulative_values = 0; | 192 | | | 193 | | // Record how many values still need to be assigned. | 194 | 2 | auto remaining_distinct_values = ordered_map.size(); | 195 | | | 196 | 2 | auto it = ordered_map.begin(); | 197 | | | 198 | | // Lower value of the current bucket. | 199 | 2 | const T* lower_value = &it->first; | 200 | | | 201 | | // Iterate over the ordered map of distinct values and their counts. | 202 | 202 | for (; it != ordered_map.end(); ++it) { | 203 | 200 | const auto count = it->second; | 204 | 200 | const auto current_value = it->first; | 205 | | | 206 | | // Update the bucket counts and track the number of distinct values assigned. | 207 | 200 | distinct_values_count++; | 208 | 200 | remaining_distinct_values--; | 209 | 200 | values_count += count; | 210 | 200 | cumulative_values += count; | 211 | | | 212 | | // Check whether the current value should be added to the current bucket. | 213 | 200 | auto next = std::next(it); | 214 | 200 | size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1; | 215 | | | 216 | 200 | if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets && | 217 | 200 | values_count + next->second <= bucket_max_values) { | 218 | | // If the current value is the last in the input map and there are more remaining | 219 | | // distinct values than empty buckets and adding the value does not cause the bucket | 220 | | // to exceed its max size, skip adding the value to the current bucket. | 221 | 190 | continue; | 222 | 190 | } | 223 | | | 224 | | // Finalize the current bucket and add it to our collection of buckets. | 225 | 10 | auto pre_sum = cumulative_values - values_count; | 226 | | | 227 | 10 | Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count, | 228 | 10 | pre_sum); | 229 | 10 | buckets.push_back(new_bucket); | 230 | | | 231 | | // Reset variables for the next bucket. | 232 | 10 | if (next != ordered_map.end()) { | 233 | 8 | lower_value = &next->first; | 234 | 8 | } | 235 | 10 | values_count = 0; | 236 | 10 | distinct_values_count = 0; | 237 | 10 | } | 238 | | | 239 | 2 | return true; | 240 | 4 | } |
_ZN5doris15build_histogramIfEEbRSt6vectorINS_6BucketIT_EESaIS4_EERKSt3mapIS3_mSt4lessIS3_ESaISt4pairIKS3_mEEEm Line | Count | Source | 174 | 4 | const size_t max_num_buckets) { | 175 | | // If the input map is empty, there is nothing to build. | 176 | 4 | if (ordered_map.empty()) { | 177 | 2 | return false; | 178 | 2 | } | 179 | | | 180 | | // Calculate the maximum number of values that can be assigned to each bucket. | 181 | 2 | auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets); | 182 | | | 183 | | // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional | 184 | | // allocations when inserting buckets. | 185 | 2 | buckets.clear(); | 186 | 2 | buckets.reserve(max_num_buckets); | 187 | | | 188 | | // Initialize bucket variables. | 189 | 2 | size_t distinct_values_count = 0; | 190 | 2 | size_t values_count = 0; | 191 | 2 | size_t cumulative_values = 0; | 192 | | | 193 | | // Record how many values still need to be assigned. | 194 | 2 | auto remaining_distinct_values = ordered_map.size(); | 195 | | | 196 | 2 | auto it = ordered_map.begin(); | 197 | | | 198 | | // Lower value of the current bucket. | 199 | 2 | const T* lower_value = &it->first; | 200 | | | 201 | | // Iterate over the ordered map of distinct values and their counts. | 202 | 202 | for (; it != ordered_map.end(); ++it) { | 203 | 200 | const auto count = it->second; | 204 | 200 | const auto current_value = it->first; | 205 | | | 206 | | // Update the bucket counts and track the number of distinct values assigned. | 207 | 200 | distinct_values_count++; | 208 | 200 | remaining_distinct_values--; | 209 | 200 | values_count += count; | 210 | 200 | cumulative_values += count; | 211 | | | 212 | | // Check whether the current value should be added to the current bucket. | 213 | 200 | auto next = std::next(it); | 214 | 200 | size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1; | 215 | | | 216 | 200 | if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets && | 217 | 200 | values_count + next->second <= bucket_max_values) { | 218 | | // If the current value is the last in the input map and there are more remaining | 219 | | // distinct values than empty buckets and adding the value does not cause the bucket | 220 | | // to exceed its max size, skip adding the value to the current bucket. | 221 | 190 | continue; | 222 | 190 | } | 223 | | | 224 | | // Finalize the current bucket and add it to our collection of buckets. | 225 | 10 | auto pre_sum = cumulative_values - values_count; | 226 | | | 227 | 10 | Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count, | 228 | 10 | pre_sum); | 229 | 10 | buckets.push_back(new_bucket); | 230 | | | 231 | | // Reset variables for the next bucket. | 232 | 10 | if (next != ordered_map.end()) { | 233 | 8 | lower_value = &next->first; | 234 | 8 | } | 235 | 10 | values_count = 0; | 236 | 10 | distinct_values_count = 0; | 237 | 10 | } | 238 | | | 239 | 2 | return true; | 240 | 4 | } |
_ZN5doris15build_histogramIdEEbRSt6vectorINS_6BucketIT_EESaIS4_EERKSt3mapIS3_mSt4lessIS3_ESaISt4pairIKS3_mEEEm Line | Count | Source | 174 | 4 | const size_t max_num_buckets) { | 175 | | // If the input map is empty, there is nothing to build. | 176 | 4 | if (ordered_map.empty()) { | 177 | 2 | return false; | 178 | 2 | } | 179 | | | 180 | | // Calculate the maximum number of values that can be assigned to each bucket. | 181 | 2 | auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets); | 182 | | | 183 | | // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional | 184 | | // allocations when inserting buckets. | 185 | 2 | buckets.clear(); | 186 | 2 | buckets.reserve(max_num_buckets); | 187 | | | 188 | | // Initialize bucket variables. | 189 | 2 | size_t distinct_values_count = 0; | 190 | 2 | size_t values_count = 0; | 191 | 2 | size_t cumulative_values = 0; | 192 | | | 193 | | // Record how many values still need to be assigned. | 194 | 2 | auto remaining_distinct_values = ordered_map.size(); | 195 | | | 196 | 2 | auto it = ordered_map.begin(); | 197 | | | 198 | | // Lower value of the current bucket. | 199 | 2 | const T* lower_value = &it->first; | 200 | | | 201 | | // Iterate over the ordered map of distinct values and their counts. | 202 | 202 | for (; it != ordered_map.end(); ++it) { | 203 | 200 | const auto count = it->second; | 204 | 200 | const auto current_value = it->first; | 205 | | | 206 | | // Update the bucket counts and track the number of distinct values assigned. | 207 | 200 | distinct_values_count++; | 208 | 200 | remaining_distinct_values--; | 209 | 200 | values_count += count; | 210 | 200 | cumulative_values += count; | 211 | | | 212 | | // Check whether the current value should be added to the current bucket. | 213 | 200 | auto next = std::next(it); | 214 | 200 | size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1; | 215 | | | 216 | 200 | if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets && | 217 | 200 | values_count + next->second <= bucket_max_values) { | 218 | | // If the current value is the last in the input map and there are more remaining | 219 | | // distinct values than empty buckets and adding the value does not cause the bucket | 220 | | // to exceed its max size, skip adding the value to the current bucket. | 221 | 190 | continue; | 222 | 190 | } | 223 | | | 224 | | // Finalize the current bucket and add it to our collection of buckets. | 225 | 10 | auto pre_sum = cumulative_values - values_count; | 226 | | | 227 | 10 | Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count, | 228 | 10 | pre_sum); | 229 | 10 | buckets.push_back(new_bucket); | 230 | | | 231 | | // Reset variables for the next bucket. | 232 | 10 | if (next != ordered_map.end()) { | 233 | 8 | lower_value = &next->first; | 234 | 8 | } | 235 | 10 | values_count = 0; | 236 | 10 | distinct_values_count = 0; | 237 | 10 | } | 238 | | | 239 | 2 | return true; | 240 | 4 | } |
Unexecuted instantiation: _ZN5doris15build_histogramINS_7DecimalIiEEEEbRSt6vectorINS_6BucketIT_EESaIS6_EERKSt3mapIS5_mSt4lessIS5_ESaISt4pairIKS5_mEEEm Unexecuted instantiation: _ZN5doris15build_histogramINS_7DecimalIlEEEEbRSt6vectorINS_6BucketIT_EESaIS6_EERKSt3mapIS5_mSt4lessIS5_ESaISt4pairIKS5_mEEEm Unexecuted instantiation: _ZN5doris15build_histogramINS_12Decimal128V3EEEbRSt6vectorINS_6BucketIT_EESaIS5_EERKSt3mapIS4_mSt4lessIS4_ESaISt4pairIKS4_mEEEm Unexecuted instantiation: _ZN5doris15build_histogramINS_7DecimalIN4wide7integerILm256EiEEEEEEbRSt6vectorINS_6BucketIT_EESaIS9_EERKSt3mapIS8_mSt4lessIS8_ESaISt4pairIKS8_mEEEm _ZN5doris15build_histogramINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEEbRSt6vectorINS_6BucketIT_EESaISA_EERKSt3mapIS9_mSt4lessIS9_ESaISt4pairIKS9_mEEEm Line | Count | Source | 174 | 4 | const size_t max_num_buckets) { | 175 | | // If the input map is empty, there is nothing to build. | 176 | 4 | if (ordered_map.empty()) { | 177 | 2 | return false; | 178 | 2 | } | 179 | | | 180 | | // Calculate the maximum number of values that can be assigned to each bucket. | 181 | 2 | auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets); | 182 | | | 183 | | // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional | 184 | | // allocations when inserting buckets. | 185 | 2 | buckets.clear(); | 186 | 2 | buckets.reserve(max_num_buckets); | 187 | | | 188 | | // Initialize bucket variables. | 189 | 2 | size_t distinct_values_count = 0; | 190 | 2 | size_t values_count = 0; | 191 | 2 | size_t cumulative_values = 0; | 192 | | | 193 | | // Record how many values still need to be assigned. | 194 | 2 | auto remaining_distinct_values = ordered_map.size(); | 195 | | | 196 | 2 | auto it = ordered_map.begin(); | 197 | | | 198 | | // Lower value of the current bucket. | 199 | 2 | const T* lower_value = &it->first; | 200 | | | 201 | | // Iterate over the ordered map of distinct values and their counts. | 202 | 2.00k | for (; it != ordered_map.end(); ++it) { | 203 | 2.00k | const auto count = it->second; | 204 | 2.00k | const auto current_value = it->first; | 205 | | | 206 | | // Update the bucket counts and track the number of distinct values assigned. | 207 | 2.00k | distinct_values_count++; | 208 | 2.00k | remaining_distinct_values--; | 209 | 2.00k | values_count += count; | 210 | 2.00k | cumulative_values += count; | 211 | | | 212 | | // Check whether the current value should be added to the current bucket. | 213 | 2.00k | auto next = std::next(it); | 214 | 2.00k | size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1; | 215 | | | 216 | 2.00k | if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets && | 217 | 2.00k | values_count + next->second <= bucket_max_values) { | 218 | | // If the current value is the last in the input map and there are more remaining | 219 | | // distinct values than empty buckets and adding the value does not cause the bucket | 220 | | // to exceed its max size, skip adding the value to the current bucket. | 221 | 1.99k | continue; | 222 | 1.99k | } | 223 | | | 224 | | // Finalize the current bucket and add it to our collection of buckets. | 225 | 10 | auto pre_sum = cumulative_values - values_count; | 226 | | | 227 | 10 | Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count, | 228 | 10 | pre_sum); | 229 | 10 | buckets.push_back(new_bucket); | 230 | | | 231 | | // Reset variables for the next bucket. | 232 | 10 | if (next != ordered_map.end()) { | 233 | 8 | lower_value = &next->first; | 234 | 8 | } | 235 | 10 | values_count = 0; | 236 | 10 | distinct_values_count = 0; | 237 | 10 | } | 238 | | | 239 | 2 | return true; | 240 | 4 | } |
_ZN5doris15build_histogramINS_11DateV2ValueINS_15DateV2ValueTypeEEEEEbRSt6vectorINS_6BucketIT_EESaIS7_EERKSt3mapIS6_mSt4lessIS6_ESaISt4pairIKS6_mEEEm Line | Count | Source | 174 | 2 | const size_t max_num_buckets) { | 175 | | // If the input map is empty, there is nothing to build. | 176 | 2 | if (ordered_map.empty()) { | 177 | 0 | return false; | 178 | 0 | } | 179 | | | 180 | | // Calculate the maximum number of values that can be assigned to each bucket. | 181 | 2 | auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets); | 182 | | | 183 | | // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional | 184 | | // allocations when inserting buckets. | 185 | 2 | buckets.clear(); | 186 | 2 | buckets.reserve(max_num_buckets); | 187 | | | 188 | | // Initialize bucket variables. | 189 | 2 | size_t distinct_values_count = 0; | 190 | 2 | size_t values_count = 0; | 191 | 2 | size_t cumulative_values = 0; | 192 | | | 193 | | // Record how many values still need to be assigned. | 194 | 2 | auto remaining_distinct_values = ordered_map.size(); | 195 | | | 196 | 2 | auto it = ordered_map.begin(); | 197 | | | 198 | | // Lower value of the current bucket. | 199 | 2 | const T* lower_value = &it->first; | 200 | | | 201 | | // Iterate over the ordered map of distinct values and their counts. | 202 | 202 | for (; it != ordered_map.end(); ++it) { | 203 | 200 | const auto count = it->second; | 204 | 200 | const auto current_value = it->first; | 205 | | | 206 | | // Update the bucket counts and track the number of distinct values assigned. | 207 | 200 | distinct_values_count++; | 208 | 200 | remaining_distinct_values--; | 209 | 200 | values_count += count; | 210 | 200 | cumulative_values += count; | 211 | | | 212 | | // Check whether the current value should be added to the current bucket. | 213 | 200 | auto next = std::next(it); | 214 | 200 | size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1; | 215 | | | 216 | 200 | if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets && | 217 | 200 | values_count + next->second <= bucket_max_values) { | 218 | | // If the current value is the last in the input map and there are more remaining | 219 | | // distinct values than empty buckets and adding the value does not cause the bucket | 220 | | // to exceed its max size, skip adding the value to the current bucket. | 221 | 190 | continue; | 222 | 190 | } | 223 | | | 224 | | // Finalize the current bucket and add it to our collection of buckets. | 225 | 10 | auto pre_sum = cumulative_values - values_count; | 226 | | | 227 | 10 | Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count, | 228 | 10 | pre_sum); | 229 | 10 | buckets.push_back(new_bucket); | 230 | | | 231 | | // Reset variables for the next bucket. | 232 | 10 | if (next != ordered_map.end()) { | 233 | 8 | lower_value = &next->first; | 234 | 8 | } | 235 | 10 | values_count = 0; | 236 | 10 | distinct_values_count = 0; | 237 | 10 | } | 238 | | | 239 | 2 | return true; | 240 | 2 | } |
_ZN5doris15build_histogramINS_11DateV2ValueINS_19DateTimeV2ValueTypeEEEEEbRSt6vectorINS_6BucketIT_EESaIS7_EERKSt3mapIS6_mSt4lessIS6_ESaISt4pairIKS6_mEEEm Line | Count | Source | 174 | 2 | const size_t max_num_buckets) { | 175 | | // If the input map is empty, there is nothing to build. | 176 | 2 | if (ordered_map.empty()) { | 177 | 0 | return false; | 178 | 0 | } | 179 | | | 180 | | // Calculate the maximum number of values that can be assigned to each bucket. | 181 | 2 | auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets); | 182 | | | 183 | | // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional | 184 | | // allocations when inserting buckets. | 185 | 2 | buckets.clear(); | 186 | 2 | buckets.reserve(max_num_buckets); | 187 | | | 188 | | // Initialize bucket variables. | 189 | 2 | size_t distinct_values_count = 0; | 190 | 2 | size_t values_count = 0; | 191 | 2 | size_t cumulative_values = 0; | 192 | | | 193 | | // Record how many values still need to be assigned. | 194 | 2 | auto remaining_distinct_values = ordered_map.size(); | 195 | | | 196 | 2 | auto it = ordered_map.begin(); | 197 | | | 198 | | // Lower value of the current bucket. | 199 | 2 | const T* lower_value = &it->first; | 200 | | | 201 | | // Iterate over the ordered map of distinct values and their counts. | 202 | 202 | for (; it != ordered_map.end(); ++it) { | 203 | 200 | const auto count = it->second; | 204 | 200 | const auto current_value = it->first; | 205 | | | 206 | | // Update the bucket counts and track the number of distinct values assigned. | 207 | 200 | distinct_values_count++; | 208 | 200 | remaining_distinct_values--; | 209 | 200 | values_count += count; | 210 | 200 | cumulative_values += count; | 211 | | | 212 | | // Check whether the current value should be added to the current bucket. | 213 | 200 | auto next = std::next(it); | 214 | 200 | size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1; | 215 | | | 216 | 200 | if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets && | 217 | 200 | values_count + next->second <= bucket_max_values) { | 218 | | // If the current value is the last in the input map and there are more remaining | 219 | | // distinct values than empty buckets and adding the value does not cause the bucket | 220 | | // to exceed its max size, skip adding the value to the current bucket. | 221 | 190 | continue; | 222 | 190 | } | 223 | | | 224 | | // Finalize the current bucket and add it to our collection of buckets. | 225 | 10 | auto pre_sum = cumulative_values - values_count; | 226 | | | 227 | 10 | Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count, | 228 | 10 | pre_sum); | 229 | 10 | buckets.push_back(new_bucket); | 230 | | | 231 | | // Reset variables for the next bucket. | 232 | 10 | if (next != ordered_map.end()) { | 233 | 8 | lower_value = &next->first; | 234 | 8 | } | 235 | 10 | values_count = 0; | 236 | 10 | distinct_values_count = 0; | 237 | 10 | } | 238 | | | 239 | 2 | return true; | 240 | 2 | } |
|
241 | | |
242 | | template <typename T> |
243 | | bool histogram_to_json(rapidjson::StringBuffer& buffer, const std::vector<Bucket<T>>& buckets, |
244 | 38 | const DataTypePtr& data_type) { |
245 | 38 | rapidjson::Document doc; |
246 | 38 | doc.SetObject(); |
247 | 38 | rapidjson::Document::AllocatorType& allocator = doc.GetAllocator(); |
248 | | |
249 | 38 | int num_buckets = cast_set<int>(buckets.size()); |
250 | 38 | doc.AddMember("num_buckets", num_buckets, allocator); |
251 | | |
252 | 38 | rapidjson::Value bucket_arr(rapidjson::kArrayType); |
253 | 38 | bucket_arr.Reserve(num_buckets, allocator); |
254 | | |
255 | 38 | std::stringstream ss1; |
256 | 38 | std::stringstream ss2; |
257 | | |
258 | 38 | rapidjson::Value lower_val; |
259 | 38 | rapidjson::Value upper_val; |
260 | | |
261 | | // Convert bucket's lower and upper to 2 columns |
262 | 38 | MutableColumnPtr lower_column = data_type->create_column(); |
263 | 38 | MutableColumnPtr upper_column = data_type->create_column(); |
264 | 102 | for (const auto& bucket : buckets) { |
265 | | // String type is different, it has to pass in length |
266 | | // if it is string type , directly use string value |
267 | 102 | if constexpr (!std::is_same_v<T, std::string>) { |
268 | 92 | lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0); |
269 | 92 | upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0); |
270 | 92 | } |
271 | 102 | } |
272 | 38 | size_t row_num = 0; |
273 | | |
274 | 38 | auto format_options = DataTypeSerDe::get_default_format_options(); |
275 | 38 | auto time_zone = cctz::utc_time_zone(); |
276 | 38 | format_options.timezone = &time_zone; |
277 | | |
278 | 102 | for (const auto& bucket : buckets) { |
279 | 102 | if constexpr (std::is_same_v<T, std::string>) { |
280 | 10 | lower_val.SetString(bucket.lower.data(), |
281 | 10 | static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator); |
282 | 10 | upper_val.SetString(bucket.upper.data(), |
283 | 10 | static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator); |
284 | 92 | } else { |
285 | 92 | std::string lower_str = data_type->to_string(*lower_column, row_num, format_options); |
286 | 92 | std::string upper_str = data_type->to_string(*upper_column, row_num, format_options); |
287 | 92 | ++row_num; |
288 | 92 | lower_val.SetString(lower_str.data(), |
289 | 92 | static_cast<rapidjson::SizeType>(lower_str.size()), allocator); |
290 | 92 | upper_val.SetString(upper_str.data(), |
291 | 92 | static_cast<rapidjson::SizeType>(upper_str.size()), allocator); |
292 | 92 | } |
293 | 102 | rapidjson::Value bucket_json(rapidjson::kObjectType); |
294 | 102 | bucket_json.AddMember("lower", lower_val, allocator); |
295 | 102 | bucket_json.AddMember("upper", upper_val, allocator); |
296 | 102 | bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator); |
297 | 102 | bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator); |
298 | 102 | bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator); |
299 | | |
300 | 102 | bucket_arr.PushBack(bucket_json, allocator); |
301 | 102 | } |
302 | | |
303 | 38 | doc.AddMember("buckets", bucket_arr, allocator); |
304 | 38 | rapidjson::Writer<rapidjson::StringBuffer> writer(buffer); |
305 | 38 | doc.Accept(writer); |
306 | | |
307 | 38 | return !buckets.empty() && buffer.GetSize() > 0; |
308 | 38 | } _ZN5doris17histogram_to_jsonIiEEbRN9rapidjson19GenericStringBufferINS1_4UTF8IcEENS1_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISB_EERKSt10shared_ptrIKNS_9IDataTypeEE Line | Count | Source | 244 | 6 | const DataTypePtr& data_type) { | 245 | 6 | rapidjson::Document doc; | 246 | 6 | doc.SetObject(); | 247 | 6 | rapidjson::Document::AllocatorType& allocator = doc.GetAllocator(); | 248 | | | 249 | 6 | int num_buckets = cast_set<int>(buckets.size()); | 250 | 6 | doc.AddMember("num_buckets", num_buckets, allocator); | 251 | | | 252 | 6 | rapidjson::Value bucket_arr(rapidjson::kArrayType); | 253 | 6 | bucket_arr.Reserve(num_buckets, allocator); | 254 | | | 255 | 6 | std::stringstream ss1; | 256 | 6 | std::stringstream ss2; | 257 | | | 258 | 6 | rapidjson::Value lower_val; | 259 | 6 | rapidjson::Value upper_val; | 260 | | | 261 | | // Convert bucket's lower and upper to 2 columns | 262 | 6 | MutableColumnPtr lower_column = data_type->create_column(); | 263 | 6 | MutableColumnPtr upper_column = data_type->create_column(); | 264 | 12 | for (const auto& bucket : buckets) { | 265 | | // String type is different, it has to pass in length | 266 | | // if it is string type , directly use string value | 267 | 12 | if constexpr (!std::is_same_v<T, std::string>) { | 268 | 12 | lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0); | 269 | 12 | upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0); | 270 | 12 | } | 271 | 12 | } | 272 | 6 | size_t row_num = 0; | 273 | | | 274 | 6 | auto format_options = DataTypeSerDe::get_default_format_options(); | 275 | 6 | auto time_zone = cctz::utc_time_zone(); | 276 | 6 | format_options.timezone = &time_zone; | 277 | | | 278 | 12 | for (const auto& bucket : buckets) { | 279 | | if constexpr (std::is_same_v<T, std::string>) { | 280 | | lower_val.SetString(bucket.lower.data(), | 281 | | static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator); | 282 | | upper_val.SetString(bucket.upper.data(), | 283 | | static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator); | 284 | 12 | } else { | 285 | 12 | std::string lower_str = data_type->to_string(*lower_column, row_num, format_options); | 286 | 12 | std::string upper_str = data_type->to_string(*upper_column, row_num, format_options); | 287 | 12 | ++row_num; | 288 | 12 | lower_val.SetString(lower_str.data(), | 289 | 12 | static_cast<rapidjson::SizeType>(lower_str.size()), allocator); | 290 | 12 | upper_val.SetString(upper_str.data(), | 291 | 12 | static_cast<rapidjson::SizeType>(upper_str.size()), allocator); | 292 | 12 | } | 293 | 12 | rapidjson::Value bucket_json(rapidjson::kObjectType); | 294 | 12 | bucket_json.AddMember("lower", lower_val, allocator); | 295 | 12 | bucket_json.AddMember("upper", upper_val, allocator); | 296 | 12 | bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator); | 297 | 12 | bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator); | 298 | 12 | bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator); | 299 | | | 300 | 12 | bucket_arr.PushBack(bucket_json, allocator); | 301 | 12 | } | 302 | | | 303 | 6 | doc.AddMember("buckets", bucket_arr, allocator); | 304 | 6 | rapidjson::Writer<rapidjson::StringBuffer> writer(buffer); | 305 | 6 | doc.Accept(writer); | 306 | | | 307 | 6 | return !buckets.empty() && buffer.GetSize() > 0; | 308 | 6 | } |
Unexecuted instantiation: _ZN5doris17histogram_to_jsonIhEEbRN9rapidjson19GenericStringBufferINS1_4UTF8IcEENS1_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISB_EERKSt10shared_ptrIKNS_9IDataTypeEE _ZN5doris17histogram_to_jsonIaEEbRN9rapidjson19GenericStringBufferINS1_4UTF8IcEENS1_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISB_EERKSt10shared_ptrIKNS_9IDataTypeEE Line | Count | Source | 244 | 4 | const DataTypePtr& data_type) { | 245 | 4 | rapidjson::Document doc; | 246 | 4 | doc.SetObject(); | 247 | 4 | rapidjson::Document::AllocatorType& allocator = doc.GetAllocator(); | 248 | | | 249 | 4 | int num_buckets = cast_set<int>(buckets.size()); | 250 | 4 | doc.AddMember("num_buckets", num_buckets, allocator); | 251 | | | 252 | 4 | rapidjson::Value bucket_arr(rapidjson::kArrayType); | 253 | 4 | bucket_arr.Reserve(num_buckets, allocator); | 254 | | | 255 | 4 | std::stringstream ss1; | 256 | 4 | std::stringstream ss2; | 257 | | | 258 | 4 | rapidjson::Value lower_val; | 259 | 4 | rapidjson::Value upper_val; | 260 | | | 261 | | // Convert bucket's lower and upper to 2 columns | 262 | 4 | MutableColumnPtr lower_column = data_type->create_column(); | 263 | 4 | MutableColumnPtr upper_column = data_type->create_column(); | 264 | 10 | for (const auto& bucket : buckets) { | 265 | | // String type is different, it has to pass in length | 266 | | // if it is string type , directly use string value | 267 | 10 | if constexpr (!std::is_same_v<T, std::string>) { | 268 | 10 | lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0); | 269 | 10 | upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0); | 270 | 10 | } | 271 | 10 | } | 272 | 4 | size_t row_num = 0; | 273 | | | 274 | 4 | auto format_options = DataTypeSerDe::get_default_format_options(); | 275 | 4 | auto time_zone = cctz::utc_time_zone(); | 276 | 4 | format_options.timezone = &time_zone; | 277 | | | 278 | 10 | for (const auto& bucket : buckets) { | 279 | | if constexpr (std::is_same_v<T, std::string>) { | 280 | | lower_val.SetString(bucket.lower.data(), | 281 | | static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator); | 282 | | upper_val.SetString(bucket.upper.data(), | 283 | | static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator); | 284 | 10 | } else { | 285 | 10 | std::string lower_str = data_type->to_string(*lower_column, row_num, format_options); | 286 | 10 | std::string upper_str = data_type->to_string(*upper_column, row_num, format_options); | 287 | 10 | ++row_num; | 288 | 10 | lower_val.SetString(lower_str.data(), | 289 | 10 | static_cast<rapidjson::SizeType>(lower_str.size()), allocator); | 290 | 10 | upper_val.SetString(upper_str.data(), | 291 | 10 | static_cast<rapidjson::SizeType>(upper_str.size()), allocator); | 292 | 10 | } | 293 | 10 | rapidjson::Value bucket_json(rapidjson::kObjectType); | 294 | 10 | bucket_json.AddMember("lower", lower_val, allocator); | 295 | 10 | bucket_json.AddMember("upper", upper_val, allocator); | 296 | 10 | bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator); | 297 | 10 | bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator); | 298 | 10 | bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator); | 299 | | | 300 | 10 | bucket_arr.PushBack(bucket_json, allocator); | 301 | 10 | } | 302 | | | 303 | 4 | doc.AddMember("buckets", bucket_arr, allocator); | 304 | 4 | rapidjson::Writer<rapidjson::StringBuffer> writer(buffer); | 305 | 4 | doc.Accept(writer); | 306 | | | 307 | 4 | return !buckets.empty() && buffer.GetSize() > 0; | 308 | 4 | } |
_ZN5doris17histogram_to_jsonIsEEbRN9rapidjson19GenericStringBufferINS1_4UTF8IcEENS1_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISB_EERKSt10shared_ptrIKNS_9IDataTypeEE Line | Count | Source | 244 | 4 | const DataTypePtr& data_type) { | 245 | 4 | rapidjson::Document doc; | 246 | 4 | doc.SetObject(); | 247 | 4 | rapidjson::Document::AllocatorType& allocator = doc.GetAllocator(); | 248 | | | 249 | 4 | int num_buckets = cast_set<int>(buckets.size()); | 250 | 4 | doc.AddMember("num_buckets", num_buckets, allocator); | 251 | | | 252 | 4 | rapidjson::Value bucket_arr(rapidjson::kArrayType); | 253 | 4 | bucket_arr.Reserve(num_buckets, allocator); | 254 | | | 255 | 4 | std::stringstream ss1; | 256 | 4 | std::stringstream ss2; | 257 | | | 258 | 4 | rapidjson::Value lower_val; | 259 | 4 | rapidjson::Value upper_val; | 260 | | | 261 | | // Convert bucket's lower and upper to 2 columns | 262 | 4 | MutableColumnPtr lower_column = data_type->create_column(); | 263 | 4 | MutableColumnPtr upper_column = data_type->create_column(); | 264 | 10 | for (const auto& bucket : buckets) { | 265 | | // String type is different, it has to pass in length | 266 | | // if it is string type , directly use string value | 267 | 10 | if constexpr (!std::is_same_v<T, std::string>) { | 268 | 10 | lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0); | 269 | 10 | upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0); | 270 | 10 | } | 271 | 10 | } | 272 | 4 | size_t row_num = 0; | 273 | | | 274 | 4 | auto format_options = DataTypeSerDe::get_default_format_options(); | 275 | 4 | auto time_zone = cctz::utc_time_zone(); | 276 | 4 | format_options.timezone = &time_zone; | 277 | | | 278 | 10 | for (const auto& bucket : buckets) { | 279 | | if constexpr (std::is_same_v<T, std::string>) { | 280 | | lower_val.SetString(bucket.lower.data(), | 281 | | static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator); | 282 | | upper_val.SetString(bucket.upper.data(), | 283 | | static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator); | 284 | 10 | } else { | 285 | 10 | std::string lower_str = data_type->to_string(*lower_column, row_num, format_options); | 286 | 10 | std::string upper_str = data_type->to_string(*upper_column, row_num, format_options); | 287 | 10 | ++row_num; | 288 | 10 | lower_val.SetString(lower_str.data(), | 289 | 10 | static_cast<rapidjson::SizeType>(lower_str.size()), allocator); | 290 | 10 | upper_val.SetString(upper_str.data(), | 291 | 10 | static_cast<rapidjson::SizeType>(upper_str.size()), allocator); | 292 | 10 | } | 293 | 10 | rapidjson::Value bucket_json(rapidjson::kObjectType); | 294 | 10 | bucket_json.AddMember("lower", lower_val, allocator); | 295 | 10 | bucket_json.AddMember("upper", upper_val, allocator); | 296 | 10 | bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator); | 297 | 10 | bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator); | 298 | 10 | bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator); | 299 | | | 300 | 10 | bucket_arr.PushBack(bucket_json, allocator); | 301 | 10 | } | 302 | | | 303 | 4 | doc.AddMember("buckets", bucket_arr, allocator); | 304 | 4 | rapidjson::Writer<rapidjson::StringBuffer> writer(buffer); | 305 | 4 | doc.Accept(writer); | 306 | | | 307 | 4 | return !buckets.empty() && buffer.GetSize() > 0; | 308 | 4 | } |
_ZN5doris17histogram_to_jsonIlEEbRN9rapidjson19GenericStringBufferINS1_4UTF8IcEENS1_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISB_EERKSt10shared_ptrIKNS_9IDataTypeEE Line | Count | Source | 244 | 4 | const DataTypePtr& data_type) { | 245 | 4 | rapidjson::Document doc; | 246 | 4 | doc.SetObject(); | 247 | 4 | rapidjson::Document::AllocatorType& allocator = doc.GetAllocator(); | 248 | | | 249 | 4 | int num_buckets = cast_set<int>(buckets.size()); | 250 | 4 | doc.AddMember("num_buckets", num_buckets, allocator); | 251 | | | 252 | 4 | rapidjson::Value bucket_arr(rapidjson::kArrayType); | 253 | 4 | bucket_arr.Reserve(num_buckets, allocator); | 254 | | | 255 | 4 | std::stringstream ss1; | 256 | 4 | std::stringstream ss2; | 257 | | | 258 | 4 | rapidjson::Value lower_val; | 259 | 4 | rapidjson::Value upper_val; | 260 | | | 261 | | // Convert bucket's lower and upper to 2 columns | 262 | 4 | MutableColumnPtr lower_column = data_type->create_column(); | 263 | 4 | MutableColumnPtr upper_column = data_type->create_column(); | 264 | 10 | for (const auto& bucket : buckets) { | 265 | | // String type is different, it has to pass in length | 266 | | // if it is string type , directly use string value | 267 | 10 | if constexpr (!std::is_same_v<T, std::string>) { | 268 | 10 | lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0); | 269 | 10 | upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0); | 270 | 10 | } | 271 | 10 | } | 272 | 4 | size_t row_num = 0; | 273 | | | 274 | 4 | auto format_options = DataTypeSerDe::get_default_format_options(); | 275 | 4 | auto time_zone = cctz::utc_time_zone(); | 276 | 4 | format_options.timezone = &time_zone; | 277 | | | 278 | 10 | for (const auto& bucket : buckets) { | 279 | | if constexpr (std::is_same_v<T, std::string>) { | 280 | | lower_val.SetString(bucket.lower.data(), | 281 | | static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator); | 282 | | upper_val.SetString(bucket.upper.data(), | 283 | | static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator); | 284 | 10 | } else { | 285 | 10 | std::string lower_str = data_type->to_string(*lower_column, row_num, format_options); | 286 | 10 | std::string upper_str = data_type->to_string(*upper_column, row_num, format_options); | 287 | 10 | ++row_num; | 288 | 10 | lower_val.SetString(lower_str.data(), | 289 | 10 | static_cast<rapidjson::SizeType>(lower_str.size()), allocator); | 290 | 10 | upper_val.SetString(upper_str.data(), | 291 | 10 | static_cast<rapidjson::SizeType>(upper_str.size()), allocator); | 292 | 10 | } | 293 | 10 | rapidjson::Value bucket_json(rapidjson::kObjectType); | 294 | 10 | bucket_json.AddMember("lower", lower_val, allocator); | 295 | 10 | bucket_json.AddMember("upper", upper_val, allocator); | 296 | 10 | bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator); | 297 | 10 | bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator); | 298 | 10 | bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator); | 299 | | | 300 | 10 | bucket_arr.PushBack(bucket_json, allocator); | 301 | 10 | } | 302 | | | 303 | 4 | doc.AddMember("buckets", bucket_arr, allocator); | 304 | 4 | rapidjson::Writer<rapidjson::StringBuffer> writer(buffer); | 305 | 4 | doc.Accept(writer); | 306 | | | 307 | 4 | return !buckets.empty() && buffer.GetSize() > 0; | 308 | 4 | } |
_ZN5doris17histogram_to_jsonInEEbRN9rapidjson19GenericStringBufferINS1_4UTF8IcEENS1_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISB_EERKSt10shared_ptrIKNS_9IDataTypeEE Line | Count | Source | 244 | 4 | const DataTypePtr& data_type) { | 245 | 4 | rapidjson::Document doc; | 246 | 4 | doc.SetObject(); | 247 | 4 | rapidjson::Document::AllocatorType& allocator = doc.GetAllocator(); | 248 | | | 249 | 4 | int num_buckets = cast_set<int>(buckets.size()); | 250 | 4 | doc.AddMember("num_buckets", num_buckets, allocator); | 251 | | | 252 | 4 | rapidjson::Value bucket_arr(rapidjson::kArrayType); | 253 | 4 | bucket_arr.Reserve(num_buckets, allocator); | 254 | | | 255 | 4 | std::stringstream ss1; | 256 | 4 | std::stringstream ss2; | 257 | | | 258 | 4 | rapidjson::Value lower_val; | 259 | 4 | rapidjson::Value upper_val; | 260 | | | 261 | | // Convert bucket's lower and upper to 2 columns | 262 | 4 | MutableColumnPtr lower_column = data_type->create_column(); | 263 | 4 | MutableColumnPtr upper_column = data_type->create_column(); | 264 | 10 | for (const auto& bucket : buckets) { | 265 | | // String type is different, it has to pass in length | 266 | | // if it is string type , directly use string value | 267 | 10 | if constexpr (!std::is_same_v<T, std::string>) { | 268 | 10 | lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0); | 269 | 10 | upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0); | 270 | 10 | } | 271 | 10 | } | 272 | 4 | size_t row_num = 0; | 273 | | | 274 | 4 | auto format_options = DataTypeSerDe::get_default_format_options(); | 275 | 4 | auto time_zone = cctz::utc_time_zone(); | 276 | 4 | format_options.timezone = &time_zone; | 277 | | | 278 | 10 | for (const auto& bucket : buckets) { | 279 | | if constexpr (std::is_same_v<T, std::string>) { | 280 | | lower_val.SetString(bucket.lower.data(), | 281 | | static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator); | 282 | | upper_val.SetString(bucket.upper.data(), | 283 | | static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator); | 284 | 10 | } else { | 285 | 10 | std::string lower_str = data_type->to_string(*lower_column, row_num, format_options); | 286 | 10 | std::string upper_str = data_type->to_string(*upper_column, row_num, format_options); | 287 | 10 | ++row_num; | 288 | 10 | lower_val.SetString(lower_str.data(), | 289 | 10 | static_cast<rapidjson::SizeType>(lower_str.size()), allocator); | 290 | 10 | upper_val.SetString(upper_str.data(), | 291 | 10 | static_cast<rapidjson::SizeType>(upper_str.size()), allocator); | 292 | 10 | } | 293 | 10 | rapidjson::Value bucket_json(rapidjson::kObjectType); | 294 | 10 | bucket_json.AddMember("lower", lower_val, allocator); | 295 | 10 | bucket_json.AddMember("upper", upper_val, allocator); | 296 | 10 | bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator); | 297 | 10 | bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator); | 298 | 10 | bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator); | 299 | | | 300 | 10 | bucket_arr.PushBack(bucket_json, allocator); | 301 | 10 | } | 302 | | | 303 | 4 | doc.AddMember("buckets", bucket_arr, allocator); | 304 | 4 | rapidjson::Writer<rapidjson::StringBuffer> writer(buffer); | 305 | 4 | doc.Accept(writer); | 306 | | | 307 | 4 | return !buckets.empty() && buffer.GetSize() > 0; | 308 | 4 | } |
_ZN5doris17histogram_to_jsonIfEEbRN9rapidjson19GenericStringBufferINS1_4UTF8IcEENS1_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISB_EERKSt10shared_ptrIKNS_9IDataTypeEE Line | Count | Source | 244 | 4 | const DataTypePtr& data_type) { | 245 | 4 | rapidjson::Document doc; | 246 | 4 | doc.SetObject(); | 247 | 4 | rapidjson::Document::AllocatorType& allocator = doc.GetAllocator(); | 248 | | | 249 | 4 | int num_buckets = cast_set<int>(buckets.size()); | 250 | 4 | doc.AddMember("num_buckets", num_buckets, allocator); | 251 | | | 252 | 4 | rapidjson::Value bucket_arr(rapidjson::kArrayType); | 253 | 4 | bucket_arr.Reserve(num_buckets, allocator); | 254 | | | 255 | 4 | std::stringstream ss1; | 256 | 4 | std::stringstream ss2; | 257 | | | 258 | 4 | rapidjson::Value lower_val; | 259 | 4 | rapidjson::Value upper_val; | 260 | | | 261 | | // Convert bucket's lower and upper to 2 columns | 262 | 4 | MutableColumnPtr lower_column = data_type->create_column(); | 263 | 4 | MutableColumnPtr upper_column = data_type->create_column(); | 264 | 10 | for (const auto& bucket : buckets) { | 265 | | // String type is different, it has to pass in length | 266 | | // if it is string type , directly use string value | 267 | 10 | if constexpr (!std::is_same_v<T, std::string>) { | 268 | 10 | lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0); | 269 | 10 | upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0); | 270 | 10 | } | 271 | 10 | } | 272 | 4 | size_t row_num = 0; | 273 | | | 274 | 4 | auto format_options = DataTypeSerDe::get_default_format_options(); | 275 | 4 | auto time_zone = cctz::utc_time_zone(); | 276 | 4 | format_options.timezone = &time_zone; | 277 | | | 278 | 10 | for (const auto& bucket : buckets) { | 279 | | if constexpr (std::is_same_v<T, std::string>) { | 280 | | lower_val.SetString(bucket.lower.data(), | 281 | | static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator); | 282 | | upper_val.SetString(bucket.upper.data(), | 283 | | static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator); | 284 | 10 | } else { | 285 | 10 | std::string lower_str = data_type->to_string(*lower_column, row_num, format_options); | 286 | 10 | std::string upper_str = data_type->to_string(*upper_column, row_num, format_options); | 287 | 10 | ++row_num; | 288 | 10 | lower_val.SetString(lower_str.data(), | 289 | 10 | static_cast<rapidjson::SizeType>(lower_str.size()), allocator); | 290 | 10 | upper_val.SetString(upper_str.data(), | 291 | 10 | static_cast<rapidjson::SizeType>(upper_str.size()), allocator); | 292 | 10 | } | 293 | 10 | rapidjson::Value bucket_json(rapidjson::kObjectType); | 294 | 10 | bucket_json.AddMember("lower", lower_val, allocator); | 295 | 10 | bucket_json.AddMember("upper", upper_val, allocator); | 296 | 10 | bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator); | 297 | 10 | bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator); | 298 | 10 | bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator); | 299 | | | 300 | 10 | bucket_arr.PushBack(bucket_json, allocator); | 301 | 10 | } | 302 | | | 303 | 4 | doc.AddMember("buckets", bucket_arr, allocator); | 304 | 4 | rapidjson::Writer<rapidjson::StringBuffer> writer(buffer); | 305 | 4 | doc.Accept(writer); | 306 | | | 307 | 4 | return !buckets.empty() && buffer.GetSize() > 0; | 308 | 4 | } |
_ZN5doris17histogram_to_jsonIdEEbRN9rapidjson19GenericStringBufferINS1_4UTF8IcEENS1_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISB_EERKSt10shared_ptrIKNS_9IDataTypeEE Line | Count | Source | 244 | 4 | const DataTypePtr& data_type) { | 245 | 4 | rapidjson::Document doc; | 246 | 4 | doc.SetObject(); | 247 | 4 | rapidjson::Document::AllocatorType& allocator = doc.GetAllocator(); | 248 | | | 249 | 4 | int num_buckets = cast_set<int>(buckets.size()); | 250 | 4 | doc.AddMember("num_buckets", num_buckets, allocator); | 251 | | | 252 | 4 | rapidjson::Value bucket_arr(rapidjson::kArrayType); | 253 | 4 | bucket_arr.Reserve(num_buckets, allocator); | 254 | | | 255 | 4 | std::stringstream ss1; | 256 | 4 | std::stringstream ss2; | 257 | | | 258 | 4 | rapidjson::Value lower_val; | 259 | 4 | rapidjson::Value upper_val; | 260 | | | 261 | | // Convert bucket's lower and upper to 2 columns | 262 | 4 | MutableColumnPtr lower_column = data_type->create_column(); | 263 | 4 | MutableColumnPtr upper_column = data_type->create_column(); | 264 | 10 | for (const auto& bucket : buckets) { | 265 | | // String type is different, it has to pass in length | 266 | | // if it is string type , directly use string value | 267 | 10 | if constexpr (!std::is_same_v<T, std::string>) { | 268 | 10 | lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0); | 269 | 10 | upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0); | 270 | 10 | } | 271 | 10 | } | 272 | 4 | size_t row_num = 0; | 273 | | | 274 | 4 | auto format_options = DataTypeSerDe::get_default_format_options(); | 275 | 4 | auto time_zone = cctz::utc_time_zone(); | 276 | 4 | format_options.timezone = &time_zone; | 277 | | | 278 | 10 | for (const auto& bucket : buckets) { | 279 | | if constexpr (std::is_same_v<T, std::string>) { | 280 | | lower_val.SetString(bucket.lower.data(), | 281 | | static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator); | 282 | | upper_val.SetString(bucket.upper.data(), | 283 | | static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator); | 284 | 10 | } else { | 285 | 10 | std::string lower_str = data_type->to_string(*lower_column, row_num, format_options); | 286 | 10 | std::string upper_str = data_type->to_string(*upper_column, row_num, format_options); | 287 | 10 | ++row_num; | 288 | 10 | lower_val.SetString(lower_str.data(), | 289 | 10 | static_cast<rapidjson::SizeType>(lower_str.size()), allocator); | 290 | 10 | upper_val.SetString(upper_str.data(), | 291 | 10 | static_cast<rapidjson::SizeType>(upper_str.size()), allocator); | 292 | 10 | } | 293 | 10 | rapidjson::Value bucket_json(rapidjson::kObjectType); | 294 | 10 | bucket_json.AddMember("lower", lower_val, allocator); | 295 | 10 | bucket_json.AddMember("upper", upper_val, allocator); | 296 | 10 | bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator); | 297 | 10 | bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator); | 298 | 10 | bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator); | 299 | | | 300 | 10 | bucket_arr.PushBack(bucket_json, allocator); | 301 | 10 | } | 302 | | | 303 | 4 | doc.AddMember("buckets", bucket_arr, allocator); | 304 | 4 | rapidjson::Writer<rapidjson::StringBuffer> writer(buffer); | 305 | 4 | doc.Accept(writer); | 306 | | | 307 | 4 | return !buckets.empty() && buffer.GetSize() > 0; | 308 | 4 | } |
Unexecuted instantiation: _ZN5doris17histogram_to_jsonINS_7DecimalIiEEEEbRN9rapidjson19GenericStringBufferINS3_4UTF8IcEENS3_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISD_EERKSt10shared_ptrIKNS_9IDataTypeEE Unexecuted instantiation: _ZN5doris17histogram_to_jsonINS_7DecimalIlEEEEbRN9rapidjson19GenericStringBufferINS3_4UTF8IcEENS3_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISD_EERKSt10shared_ptrIKNS_9IDataTypeEE Unexecuted instantiation: _ZN5doris17histogram_to_jsonINS_12Decimal128V3EEEbRN9rapidjson19GenericStringBufferINS2_4UTF8IcEENS2_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISC_EERKSt10shared_ptrIKNS_9IDataTypeEE Unexecuted instantiation: _ZN5doris17histogram_to_jsonINS_7DecimalIN4wide7integerILm256EiEEEEEEbRN9rapidjson19GenericStringBufferINS6_4UTF8IcEENS6_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISG_EERKSt10shared_ptrIKNS_9IDataTypeEE _ZN5doris17histogram_to_jsonINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEEbRN9rapidjson19GenericStringBufferINS7_4UTF8IcEENS7_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISH_EERKSt10shared_ptrIKNS_9IDataTypeEE Line | Count | Source | 244 | 4 | const DataTypePtr& data_type) { | 245 | 4 | rapidjson::Document doc; | 246 | 4 | doc.SetObject(); | 247 | 4 | rapidjson::Document::AllocatorType& allocator = doc.GetAllocator(); | 248 | | | 249 | 4 | int num_buckets = cast_set<int>(buckets.size()); | 250 | 4 | doc.AddMember("num_buckets", num_buckets, allocator); | 251 | | | 252 | 4 | rapidjson::Value bucket_arr(rapidjson::kArrayType); | 253 | 4 | bucket_arr.Reserve(num_buckets, allocator); | 254 | | | 255 | 4 | std::stringstream ss1; | 256 | 4 | std::stringstream ss2; | 257 | | | 258 | 4 | rapidjson::Value lower_val; | 259 | 4 | rapidjson::Value upper_val; | 260 | | | 261 | | // Convert bucket's lower and upper to 2 columns | 262 | 4 | MutableColumnPtr lower_column = data_type->create_column(); | 263 | 4 | MutableColumnPtr upper_column = data_type->create_column(); | 264 | 10 | for (const auto& bucket : buckets) { | 265 | | // String type is different, it has to pass in length | 266 | | // if it is string type , directly use string value | 267 | | if constexpr (!std::is_same_v<T, std::string>) { | 268 | | lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0); | 269 | | upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0); | 270 | | } | 271 | 10 | } | 272 | 4 | size_t row_num = 0; | 273 | | | 274 | 4 | auto format_options = DataTypeSerDe::get_default_format_options(); | 275 | 4 | auto time_zone = cctz::utc_time_zone(); | 276 | 4 | format_options.timezone = &time_zone; | 277 | | | 278 | 10 | for (const auto& bucket : buckets) { | 279 | 10 | if constexpr (std::is_same_v<T, std::string>) { | 280 | 10 | lower_val.SetString(bucket.lower.data(), | 281 | 10 | static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator); | 282 | 10 | upper_val.SetString(bucket.upper.data(), | 283 | 10 | static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator); | 284 | | } else { | 285 | | std::string lower_str = data_type->to_string(*lower_column, row_num, format_options); | 286 | | std::string upper_str = data_type->to_string(*upper_column, row_num, format_options); | 287 | | ++row_num; | 288 | | lower_val.SetString(lower_str.data(), | 289 | | static_cast<rapidjson::SizeType>(lower_str.size()), allocator); | 290 | | upper_val.SetString(upper_str.data(), | 291 | | static_cast<rapidjson::SizeType>(upper_str.size()), allocator); | 292 | | } | 293 | 10 | rapidjson::Value bucket_json(rapidjson::kObjectType); | 294 | 10 | bucket_json.AddMember("lower", lower_val, allocator); | 295 | 10 | bucket_json.AddMember("upper", upper_val, allocator); | 296 | 10 | bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator); | 297 | 10 | bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator); | 298 | 10 | bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator); | 299 | | | 300 | 10 | bucket_arr.PushBack(bucket_json, allocator); | 301 | 10 | } | 302 | | | 303 | 4 | doc.AddMember("buckets", bucket_arr, allocator); | 304 | 4 | rapidjson::Writer<rapidjson::StringBuffer> writer(buffer); | 305 | 4 | doc.Accept(writer); | 306 | | | 307 | 4 | return !buckets.empty() && buffer.GetSize() > 0; | 308 | 4 | } |
_ZN5doris17histogram_to_jsonINS_11DateV2ValueINS_15DateV2ValueTypeEEEEEbRN9rapidjson19GenericStringBufferINS4_4UTF8IcEENS4_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISE_EERKSt10shared_ptrIKNS_9IDataTypeEE Line | Count | Source | 244 | 2 | const DataTypePtr& data_type) { | 245 | 2 | rapidjson::Document doc; | 246 | 2 | doc.SetObject(); | 247 | 2 | rapidjson::Document::AllocatorType& allocator = doc.GetAllocator(); | 248 | | | 249 | 2 | int num_buckets = cast_set<int>(buckets.size()); | 250 | 2 | doc.AddMember("num_buckets", num_buckets, allocator); | 251 | | | 252 | 2 | rapidjson::Value bucket_arr(rapidjson::kArrayType); | 253 | 2 | bucket_arr.Reserve(num_buckets, allocator); | 254 | | | 255 | 2 | std::stringstream ss1; | 256 | 2 | std::stringstream ss2; | 257 | | | 258 | 2 | rapidjson::Value lower_val; | 259 | 2 | rapidjson::Value upper_val; | 260 | | | 261 | | // Convert bucket's lower and upper to 2 columns | 262 | 2 | MutableColumnPtr lower_column = data_type->create_column(); | 263 | 2 | MutableColumnPtr upper_column = data_type->create_column(); | 264 | 10 | for (const auto& bucket : buckets) { | 265 | | // String type is different, it has to pass in length | 266 | | // if it is string type , directly use string value | 267 | 10 | if constexpr (!std::is_same_v<T, std::string>) { | 268 | 10 | lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0); | 269 | 10 | upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0); | 270 | 10 | } | 271 | 10 | } | 272 | 2 | size_t row_num = 0; | 273 | | | 274 | 2 | auto format_options = DataTypeSerDe::get_default_format_options(); | 275 | 2 | auto time_zone = cctz::utc_time_zone(); | 276 | 2 | format_options.timezone = &time_zone; | 277 | | | 278 | 10 | for (const auto& bucket : buckets) { | 279 | | if constexpr (std::is_same_v<T, std::string>) { | 280 | | lower_val.SetString(bucket.lower.data(), | 281 | | static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator); | 282 | | upper_val.SetString(bucket.upper.data(), | 283 | | static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator); | 284 | 10 | } else { | 285 | 10 | std::string lower_str = data_type->to_string(*lower_column, row_num, format_options); | 286 | 10 | std::string upper_str = data_type->to_string(*upper_column, row_num, format_options); | 287 | 10 | ++row_num; | 288 | 10 | lower_val.SetString(lower_str.data(), | 289 | 10 | static_cast<rapidjson::SizeType>(lower_str.size()), allocator); | 290 | 10 | upper_val.SetString(upper_str.data(), | 291 | 10 | static_cast<rapidjson::SizeType>(upper_str.size()), allocator); | 292 | 10 | } | 293 | 10 | rapidjson::Value bucket_json(rapidjson::kObjectType); | 294 | 10 | bucket_json.AddMember("lower", lower_val, allocator); | 295 | 10 | bucket_json.AddMember("upper", upper_val, allocator); | 296 | 10 | bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator); | 297 | 10 | bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator); | 298 | 10 | bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator); | 299 | | | 300 | 10 | bucket_arr.PushBack(bucket_json, allocator); | 301 | 10 | } | 302 | | | 303 | 2 | doc.AddMember("buckets", bucket_arr, allocator); | 304 | 2 | rapidjson::Writer<rapidjson::StringBuffer> writer(buffer); | 305 | 2 | doc.Accept(writer); | 306 | | | 307 | 2 | return !buckets.empty() && buffer.GetSize() > 0; | 308 | 2 | } |
_ZN5doris17histogram_to_jsonINS_11DateV2ValueINS_19DateTimeV2ValueTypeEEEEEbRN9rapidjson19GenericStringBufferINS4_4UTF8IcEENS4_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISE_EERKSt10shared_ptrIKNS_9IDataTypeEE Line | Count | Source | 244 | 2 | const DataTypePtr& data_type) { | 245 | 2 | rapidjson::Document doc; | 246 | 2 | doc.SetObject(); | 247 | 2 | rapidjson::Document::AllocatorType& allocator = doc.GetAllocator(); | 248 | | | 249 | 2 | int num_buckets = cast_set<int>(buckets.size()); | 250 | 2 | doc.AddMember("num_buckets", num_buckets, allocator); | 251 | | | 252 | 2 | rapidjson::Value bucket_arr(rapidjson::kArrayType); | 253 | 2 | bucket_arr.Reserve(num_buckets, allocator); | 254 | | | 255 | 2 | std::stringstream ss1; | 256 | 2 | std::stringstream ss2; | 257 | | | 258 | 2 | rapidjson::Value lower_val; | 259 | 2 | rapidjson::Value upper_val; | 260 | | | 261 | | // Convert bucket's lower and upper to 2 columns | 262 | 2 | MutableColumnPtr lower_column = data_type->create_column(); | 263 | 2 | MutableColumnPtr upper_column = data_type->create_column(); | 264 | 10 | for (const auto& bucket : buckets) { | 265 | | // String type is different, it has to pass in length | 266 | | // if it is string type , directly use string value | 267 | 10 | if constexpr (!std::is_same_v<T, std::string>) { | 268 | 10 | lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0); | 269 | 10 | upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0); | 270 | 10 | } | 271 | 10 | } | 272 | 2 | size_t row_num = 0; | 273 | | | 274 | 2 | auto format_options = DataTypeSerDe::get_default_format_options(); | 275 | 2 | auto time_zone = cctz::utc_time_zone(); | 276 | 2 | format_options.timezone = &time_zone; | 277 | | | 278 | 10 | for (const auto& bucket : buckets) { | 279 | | if constexpr (std::is_same_v<T, std::string>) { | 280 | | lower_val.SetString(bucket.lower.data(), | 281 | | static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator); | 282 | | upper_val.SetString(bucket.upper.data(), | 283 | | static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator); | 284 | 10 | } else { | 285 | 10 | std::string lower_str = data_type->to_string(*lower_column, row_num, format_options); | 286 | 10 | std::string upper_str = data_type->to_string(*upper_column, row_num, format_options); | 287 | 10 | ++row_num; | 288 | 10 | lower_val.SetString(lower_str.data(), | 289 | 10 | static_cast<rapidjson::SizeType>(lower_str.size()), allocator); | 290 | 10 | upper_val.SetString(upper_str.data(), | 291 | 10 | static_cast<rapidjson::SizeType>(upper_str.size()), allocator); | 292 | 10 | } | 293 | 10 | rapidjson::Value bucket_json(rapidjson::kObjectType); | 294 | 10 | bucket_json.AddMember("lower", lower_val, allocator); | 295 | 10 | bucket_json.AddMember("upper", upper_val, allocator); | 296 | 10 | bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator); | 297 | 10 | bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator); | 298 | 10 | bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator); | 299 | | | 300 | 10 | bucket_arr.PushBack(bucket_json, allocator); | 301 | 10 | } | 302 | | | 303 | 2 | doc.AddMember("buckets", bucket_arr, allocator); | 304 | 2 | rapidjson::Writer<rapidjson::StringBuffer> writer(buffer); | 305 | 2 | doc.Accept(writer); | 306 | | | 307 | 2 | return !buckets.empty() && buffer.GetSize() > 0; | 308 | 2 | } |
|
309 | | } // namespace doris |