be/src/exec/common/histogram_helpers.hpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #pragma once |
19 | | |
20 | | #include <rapidjson/document.h> |
21 | | #include <rapidjson/prettywriter.h> |
22 | | #include <rapidjson/stringbuffer.h> |
23 | | |
24 | | #include <boost/dynamic_bitset.hpp> |
25 | | |
26 | | #include "common/cast_set.h" |
27 | | #include "core/data_type/data_type_decimal.h" |
28 | | #include "util/io_helper.h" |
29 | | |
30 | | namespace doris { |
31 | | #include "common/compile_check_begin.h" |
32 | | template <typename T> |
33 | | struct Bucket { |
34 | | public: |
35 | | Bucket() = default; |
36 | | Bucket(T lower, T upper, size_t ndv, size_t count, size_t pre_sum) |
37 | 116 | : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {}Unexecuted instantiation: _ZN5doris6BucketINS_7DecimalIiEEEC2ES2_S2_mmm Unexecuted instantiation: _ZN5doris6BucketINS_7DecimalIlEEEC2ES2_S2_mmm Unexecuted instantiation: _ZN5doris6BucketINS_12Decimal128V3EEC2ES1_S1_mmm Unexecuted instantiation: _ZN5doris6BucketINS_7DecimalIN4wide7integerILm256EiEEEEEC2ES5_S5_mmm _ZN5doris6BucketINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEC2ES6_S6_mmm Line | Count | Source | 37 | 10 | : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {} |
_ZN5doris6BucketINS_11DateV2ValueINS_15DateV2ValueTypeEEEEC2ES3_S3_mmm Line | Count | Source | 37 | 10 | : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {} |
_ZN5doris6BucketINS_11DateV2ValueINS_19DateTimeV2ValueTypeEEEEC2ES3_S3_mmm Line | Count | Source | 37 | 10 | : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {} |
_ZN5doris6BucketIiEC2Eiimmm Line | Count | Source | 37 | 26 | : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {} |
Unexecuted instantiation: _ZN5doris6BucketIhEC2Ehhmmm _ZN5doris6BucketIaEC2Eaammm Line | Count | Source | 37 | 10 | : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {} |
_ZN5doris6BucketIsEC2Essmmm Line | Count | Source | 37 | 10 | : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {} |
_ZN5doris6BucketIlEC2Ellmmm Line | Count | Source | 37 | 10 | : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {} |
_ZN5doris6BucketInEC2Ennmmm Line | Count | Source | 37 | 10 | : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {} |
_ZN5doris6BucketIfEC2Effmmm Line | Count | Source | 37 | 10 | : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {} |
_ZN5doris6BucketIdEC2Eddmmm Line | Count | Source | 37 | 10 | : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {} |
|
38 | | |
39 | | T lower; |
40 | | T upper; |
41 | | size_t ndv; |
42 | | size_t count; |
43 | | size_t pre_sum; |
44 | | }; |
45 | | |
46 | | /** |
47 | | * Checks if it is possible to assign the provided value_map to the given |
48 | | * number of buckets such that no bucket has a size larger than max_bucket_size. |
49 | | * |
50 | | * @param value_map A mapping of values to their counts. |
51 | | * @param max_bucket_size The maximum size that any bucket is allowed to have. |
52 | | * @param num_buckets The number of buckets that we want to assign values to. |
53 | | * |
54 | | * @return true if the values can be assigned to the buckets, false otherwise. |
55 | | */ |
56 | | template <typename T> |
57 | | bool can_assign_into_buckets(const std::map<T, size_t>& value_map, const size_t max_bucket_size, |
58 | 188 | const size_t num_buckets) { |
59 | 188 | if (value_map.empty()) { |
60 | 1 | return false; |
61 | 187 | }; |
62 | | |
63 | 187 | size_t used_buckets = 1; |
64 | 187 | size_t current_bucket_size = 0; |
65 | | |
66 | 28.9k | for (const auto& [value, count] : value_map) { |
67 | 28.9k | current_bucket_size += count; |
68 | | |
69 | | // If adding the current value to the current bucket would exceed max_bucket_size, |
70 | | // then we start a new bucket. |
71 | 28.9k | if (current_bucket_size > max_bucket_size) { |
72 | 701 | ++used_buckets; |
73 | 701 | current_bucket_size = count; |
74 | 701 | } |
75 | | |
76 | | // If we have used more buckets than num_buckets, we cannot assign the values to buckets. |
77 | 28.9k | if (used_buckets > num_buckets) { |
78 | 95 | return false; |
79 | 95 | } |
80 | 28.9k | } |
81 | | |
82 | 92 | return true; |
83 | 187 | } _ZN5doris23can_assign_into_bucketsIiEEbRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEmm Line | Count | Source | 58 | 65 | const size_t num_buckets) { | 59 | 65 | if (value_map.empty()) { | 60 | 1 | return false; | 61 | 64 | }; | 62 | | | 63 | 64 | size_t used_buckets = 1; | 64 | 64 | size_t current_bucket_size = 0; | 65 | | | 66 | 1.37k | for (const auto& [value, count] : value_map) { | 67 | 1.37k | current_bucket_size += count; | 68 | | | 69 | | // If adding the current value to the current bucket would exceed max_bucket_size, | 70 | | // then we start a new bucket. | 71 | 1.37k | if (current_bucket_size > max_bucket_size) { | 72 | 161 | ++used_buckets; | 73 | 161 | current_bucket_size = count; | 74 | 161 | } | 75 | | | 76 | | // If we have used more buckets than num_buckets, we cannot assign the values to buckets. | 77 | 1.37k | if (used_buckets > num_buckets) { | 78 | 29 | return false; | 79 | 29 | } | 80 | 1.37k | } | 81 | | | 82 | 35 | return true; | 83 | 64 | } |
Unexecuted instantiation: _ZN5doris23can_assign_into_bucketsIhEEbRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEmm _ZN5doris23can_assign_into_bucketsIaEEbRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEmm Line | Count | Source | 58 | 13 | const size_t num_buckets) { | 59 | 13 | if (value_map.empty()) { | 60 | 0 | return false; | 61 | 13 | }; | 62 | | | 63 | 13 | size_t used_buckets = 1; | 64 | 13 | size_t current_bucket_size = 0; | 65 | | | 66 | 1.19k | for (const auto& [value, count] : value_map) { | 67 | 1.19k | current_bucket_size += count; | 68 | | | 69 | | // If adding the current value to the current bucket would exceed max_bucket_size, | 70 | | // then we start a new bucket. | 71 | 1.19k | if (current_bucket_size > max_bucket_size) { | 72 | 57 | ++used_buckets; | 73 | 57 | current_bucket_size = count; | 74 | 57 | } | 75 | | | 76 | | // If we have used more buckets than num_buckets, we cannot assign the values to buckets. | 77 | 1.19k | if (used_buckets > num_buckets) { | 78 | 7 | return false; | 79 | 7 | } | 80 | 1.19k | } | 81 | | | 82 | 6 | return true; | 83 | 13 | } |
_ZN5doris23can_assign_into_bucketsIsEEbRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEmm Line | Count | Source | 58 | 13 | const size_t num_buckets) { | 59 | 13 | if (value_map.empty()) { | 60 | 0 | return false; | 61 | 13 | }; | 62 | | | 63 | 13 | size_t used_buckets = 1; | 64 | 13 | size_t current_bucket_size = 0; | 65 | | | 66 | 1.19k | for (const auto& [value, count] : value_map) { | 67 | 1.19k | current_bucket_size += count; | 68 | | | 69 | | // If adding the current value to the current bucket would exceed max_bucket_size, | 70 | | // then we start a new bucket. | 71 | 1.19k | if (current_bucket_size > max_bucket_size) { | 72 | 57 | ++used_buckets; | 73 | 57 | current_bucket_size = count; | 74 | 57 | } | 75 | | | 76 | | // If we have used more buckets than num_buckets, we cannot assign the values to buckets. | 77 | 1.19k | if (used_buckets > num_buckets) { | 78 | 7 | return false; | 79 | 7 | } | 80 | 1.19k | } | 81 | | | 82 | 6 | return true; | 83 | 13 | } |
_ZN5doris23can_assign_into_bucketsIlEEbRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEmm Line | Count | Source | 58 | 13 | const size_t num_buckets) { | 59 | 13 | if (value_map.empty()) { | 60 | 0 | return false; | 61 | 13 | }; | 62 | | | 63 | 13 | size_t used_buckets = 1; | 64 | 13 | size_t current_bucket_size = 0; | 65 | | | 66 | 1.19k | for (const auto& [value, count] : value_map) { | 67 | 1.19k | current_bucket_size += count; | 68 | | | 69 | | // If adding the current value to the current bucket would exceed max_bucket_size, | 70 | | // then we start a new bucket. | 71 | 1.19k | if (current_bucket_size > max_bucket_size) { | 72 | 57 | ++used_buckets; | 73 | 57 | current_bucket_size = count; | 74 | 57 | } | 75 | | | 76 | | // If we have used more buckets than num_buckets, we cannot assign the values to buckets. | 77 | 1.19k | if (used_buckets > num_buckets) { | 78 | 7 | return false; | 79 | 7 | } | 80 | 1.19k | } | 81 | | | 82 | 6 | return true; | 83 | 13 | } |
_ZN5doris23can_assign_into_bucketsInEEbRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEmm Line | Count | Source | 58 | 13 | const size_t num_buckets) { | 59 | 13 | if (value_map.empty()) { | 60 | 0 | return false; | 61 | 13 | }; | 62 | | | 63 | 13 | size_t used_buckets = 1; | 64 | 13 | size_t current_bucket_size = 0; | 65 | | | 66 | 1.19k | for (const auto& [value, count] : value_map) { | 67 | 1.19k | current_bucket_size += count; | 68 | | | 69 | | // If adding the current value to the current bucket would exceed max_bucket_size, | 70 | | // then we start a new bucket. | 71 | 1.19k | if (current_bucket_size > max_bucket_size) { | 72 | 57 | ++used_buckets; | 73 | 57 | current_bucket_size = count; | 74 | 57 | } | 75 | | | 76 | | // If we have used more buckets than num_buckets, we cannot assign the values to buckets. | 77 | 1.19k | if (used_buckets > num_buckets) { | 78 | 7 | return false; | 79 | 7 | } | 80 | 1.19k | } | 81 | | | 82 | 6 | return true; | 83 | 13 | } |
_ZN5doris23can_assign_into_bucketsIfEEbRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEmm Line | Count | Source | 58 | 13 | const size_t num_buckets) { | 59 | 13 | if (value_map.empty()) { | 60 | 0 | return false; | 61 | 13 | }; | 62 | | | 63 | 13 | size_t used_buckets = 1; | 64 | 13 | size_t current_bucket_size = 0; | 65 | | | 66 | 1.19k | for (const auto& [value, count] : value_map) { | 67 | 1.19k | current_bucket_size += count; | 68 | | | 69 | | // If adding the current value to the current bucket would exceed max_bucket_size, | 70 | | // then we start a new bucket. | 71 | 1.19k | if (current_bucket_size > max_bucket_size) { | 72 | 57 | ++used_buckets; | 73 | 57 | current_bucket_size = count; | 74 | 57 | } | 75 | | | 76 | | // If we have used more buckets than num_buckets, we cannot assign the values to buckets. | 77 | 1.19k | if (used_buckets > num_buckets) { | 78 | 7 | return false; | 79 | 7 | } | 80 | 1.19k | } | 81 | | | 82 | 6 | return true; | 83 | 13 | } |
_ZN5doris23can_assign_into_bucketsIdEEbRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEmm Line | Count | Source | 58 | 13 | const size_t num_buckets) { | 59 | 13 | if (value_map.empty()) { | 60 | 0 | return false; | 61 | 13 | }; | 62 | | | 63 | 13 | size_t used_buckets = 1; | 64 | 13 | size_t current_bucket_size = 0; | 65 | | | 66 | 1.19k | for (const auto& [value, count] : value_map) { | 67 | 1.19k | current_bucket_size += count; | 68 | | | 69 | | // If adding the current value to the current bucket would exceed max_bucket_size, | 70 | | // then we start a new bucket. | 71 | 1.19k | if (current_bucket_size > max_bucket_size) { | 72 | 57 | ++used_buckets; | 73 | 57 | current_bucket_size = count; | 74 | 57 | } | 75 | | | 76 | | // If we have used more buckets than num_buckets, we cannot assign the values to buckets. | 77 | 1.19k | if (used_buckets > num_buckets) { | 78 | 7 | return false; | 79 | 7 | } | 80 | 1.19k | } | 81 | | | 82 | 6 | return true; | 83 | 13 | } |
Unexecuted instantiation: _ZN5doris23can_assign_into_bucketsINS_7DecimalIiEEEEbRKSt3mapIT_mSt4lessIS4_ESaISt4pairIKS4_mEEEmm Unexecuted instantiation: _ZN5doris23can_assign_into_bucketsINS_7DecimalIlEEEEbRKSt3mapIT_mSt4lessIS4_ESaISt4pairIKS4_mEEEmm Unexecuted instantiation: _ZN5doris23can_assign_into_bucketsINS_12Decimal128V3EEEbRKSt3mapIT_mSt4lessIS3_ESaISt4pairIKS3_mEEEmm Unexecuted instantiation: _ZN5doris23can_assign_into_bucketsINS_7DecimalIN4wide7integerILm256EiEEEEEEbRKSt3mapIT_mSt4lessIS7_ESaISt4pairIKS7_mEEEmm _ZN5doris23can_assign_into_bucketsINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEEbRKSt3mapIT_mSt4lessIS8_ESaISt4pairIKS8_mEEEmm Line | Count | Source | 58 | 19 | const size_t num_buckets) { | 59 | 19 | if (value_map.empty()) { | 60 | 0 | return false; | 61 | 19 | }; | 62 | | | 63 | 19 | size_t used_buckets = 1; | 64 | 19 | size_t current_bucket_size = 0; | 65 | | | 66 | 18.0k | for (const auto& [value, count] : value_map) { | 67 | 18.0k | current_bucket_size += count; | 68 | | | 69 | | // If adding the current value to the current bucket would exceed max_bucket_size, | 70 | | // then we start a new bucket. | 71 | 18.0k | if (current_bucket_size > max_bucket_size) { | 72 | 84 | ++used_buckets; | 73 | 84 | current_bucket_size = count; | 74 | 84 | } | 75 | | | 76 | | // If we have used more buckets than num_buckets, we cannot assign the values to buckets. | 77 | 18.0k | if (used_buckets > num_buckets) { | 78 | 10 | return false; | 79 | 10 | } | 80 | 18.0k | } | 81 | | | 82 | 9 | return true; | 83 | 19 | } |
_ZN5doris23can_assign_into_bucketsINS_11DateV2ValueINS_15DateV2ValueTypeEEEEEbRKSt3mapIT_mSt4lessIS5_ESaISt4pairIKS5_mEEEmm Line | Count | Source | 58 | 13 | const size_t num_buckets) { | 59 | 13 | if (value_map.empty()) { | 60 | 0 | return false; | 61 | 13 | }; | 62 | | | 63 | 13 | size_t used_buckets = 1; | 64 | 13 | size_t current_bucket_size = 0; | 65 | | | 66 | 1.19k | for (const auto& [value, count] : value_map) { | 67 | 1.19k | current_bucket_size += count; | 68 | | | 69 | | // If adding the current value to the current bucket would exceed max_bucket_size, | 70 | | // then we start a new bucket. | 71 | 1.19k | if (current_bucket_size > max_bucket_size) { | 72 | 57 | ++used_buckets; | 73 | 57 | current_bucket_size = count; | 74 | 57 | } | 75 | | | 76 | | // If we have used more buckets than num_buckets, we cannot assign the values to buckets. | 77 | 1.19k | if (used_buckets > num_buckets) { | 78 | 7 | return false; | 79 | 7 | } | 80 | 1.19k | } | 81 | | | 82 | 6 | return true; | 83 | 13 | } |
_ZN5doris23can_assign_into_bucketsINS_11DateV2ValueINS_19DateTimeV2ValueTypeEEEEEbRKSt3mapIT_mSt4lessIS5_ESaISt4pairIKS5_mEEEmm Line | Count | Source | 58 | 13 | const size_t num_buckets) { | 59 | 13 | if (value_map.empty()) { | 60 | 0 | return false; | 61 | 13 | }; | 62 | | | 63 | 13 | size_t used_buckets = 1; | 64 | 13 | size_t current_bucket_size = 0; | 65 | | | 66 | 1.19k | for (const auto& [value, count] : value_map) { | 67 | 1.19k | current_bucket_size += count; | 68 | | | 69 | | // If adding the current value to the current bucket would exceed max_bucket_size, | 70 | | // then we start a new bucket. | 71 | 1.19k | if (current_bucket_size > max_bucket_size) { | 72 | 57 | ++used_buckets; | 73 | 57 | current_bucket_size = count; | 74 | 57 | } | 75 | | | 76 | | // If we have used more buckets than num_buckets, we cannot assign the values to buckets. | 77 | 1.19k | if (used_buckets > num_buckets) { | 78 | 7 | return false; | 79 | 7 | } | 80 | 1.19k | } | 81 | | | 82 | 6 | return true; | 83 | 13 | } |
|
84 | | |
85 | | /** |
86 | | * Calculates the maximum number of values that can fit into each bucket given a set of values |
87 | | * and the desired number of buckets. |
88 | | * |
89 | | * @tparam T the type of the values in the value map |
90 | | * @param value_map the map of values and their counts |
91 | | * @param num_buckets the desired number of buckets |
92 | | * @return the maximum number of values that can fit into each bucket |
93 | | */ |
94 | | template <typename T> |
95 | 32 | size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) { |
96 | | // Ensure that the value map is not empty |
97 | 32 | assert(!value_map.empty()); |
98 | | |
99 | | // Calculate the total number of values in the map using std::accumulate() |
100 | 32 | size_t total_values = 0; |
101 | 3.83k | for (const auto& [value, count] : value_map) { |
102 | 3.83k | total_values += count; |
103 | 3.83k | } |
104 | | |
105 | | // If there is only one bucket, then all values will be assigned to that bucket |
106 | 32 | if (num_buckets == 1) { |
107 | 3 | return total_values; |
108 | 3 | } |
109 | | |
110 | | // To calculate the maximum value count in each bucket, we first calculate a conservative upper |
111 | | // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed |
112 | | // the actual maximum value count, but it does not underestimate it. The subsequent binary search |
113 | | // algorithm will approach the actual maximum value count. |
114 | 29 | size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1; |
115 | | |
116 | | // Initialize the lower bound to 0 |
117 | 29 | size_t lower_bucket_values = 0; |
118 | | |
119 | | // Perform a binary search to find the maximum number of values that can fit into each bucket |
120 | 29 | int search_step = 0; |
121 | 29 | const int max_search_steps = |
122 | 29 | 10; // Limit the number of search steps to avoid excessive iteration |
123 | | |
124 | 203 | while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) { |
125 | | // Calculate the midpoint of the upper and lower bounds |
126 | 174 | const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2; |
127 | | |
128 | | // Check if the given number of values can be assigned to the desired number of buckets |
129 | 174 | if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) { |
130 | | // If it can, then set the upper bound to the midpoint |
131 | 85 | upper_bucket_values = bucket_values; |
132 | 89 | } else { |
133 | | // If it can't, then set the lower bound to the midpoint |
134 | 89 | lower_bucket_values = bucket_values; |
135 | 89 | } |
136 | | // Increment the search step counter |
137 | 174 | ++search_step; |
138 | 174 | } |
139 | | |
140 | 29 | return upper_bucket_values; |
141 | 32 | } _ZN5doris27calculate_bucket_max_valuesIiEEmRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEm Line | Count | Source | 95 | 14 | size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) { | 96 | | // Ensure that the value map is not empty | 97 | 14 | assert(!value_map.empty()); | 98 | | | 99 | | // Calculate the total number of values in the map using std::accumulate() | 100 | 14 | size_t total_values = 0; | 101 | 233 | for (const auto& [value, count] : value_map) { | 102 | 233 | total_values += count; | 103 | 233 | } | 104 | | | 105 | | // If there is only one bucket, then all values will be assigned to that bucket | 106 | 14 | if (num_buckets == 1) { | 107 | 3 | return total_values; | 108 | 3 | } | 109 | | | 110 | | // To calculate the maximum value count in each bucket, we first calculate a conservative upper | 111 | | // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed | 112 | | // the actual maximum value count, but it does not underestimate it. The subsequent binary search | 113 | | // algorithm will approach the actual maximum value count. | 114 | 11 | size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1; | 115 | | | 116 | | // Initialize the lower bound to 0 | 117 | 11 | size_t lower_bucket_values = 0; | 118 | | | 119 | | // Perform a binary search to find the maximum number of values that can fit into each bucket | 120 | 11 | int search_step = 0; | 121 | 11 | const int max_search_steps = | 122 | 11 | 10; // Limit the number of search steps to avoid excessive iteration | 123 | | | 124 | 62 | while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) { | 125 | | // Calculate the midpoint of the upper and lower bounds | 126 | 51 | const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2; | 127 | | | 128 | | // Check if the given number of values can be assigned to the desired number of buckets | 129 | 51 | if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) { | 130 | | // If it can, then set the upper bound to the midpoint | 131 | 28 | upper_bucket_values = bucket_values; | 132 | 28 | } else { | 133 | | // If it can't, then set the lower bound to the midpoint | 134 | 23 | lower_bucket_values = bucket_values; | 135 | 23 | } | 136 | | // Increment the search step counter | 137 | 51 | ++search_step; | 138 | 51 | } | 139 | | | 140 | 11 | return upper_bucket_values; | 141 | 14 | } |
Unexecuted instantiation: _ZN5doris27calculate_bucket_max_valuesIhEEmRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEm _ZN5doris27calculate_bucket_max_valuesIaEEmRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEm Line | Count | Source | 95 | 2 | size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) { | 96 | | // Ensure that the value map is not empty | 97 | 2 | assert(!value_map.empty()); | 98 | | | 99 | | // Calculate the total number of values in the map using std::accumulate() | 100 | 2 | size_t total_values = 0; | 101 | 200 | for (const auto& [value, count] : value_map) { | 102 | 200 | total_values += count; | 103 | 200 | } | 104 | | | 105 | | // If there is only one bucket, then all values will be assigned to that bucket | 106 | 2 | if (num_buckets == 1) { | 107 | 0 | return total_values; | 108 | 0 | } | 109 | | | 110 | | // To calculate the maximum value count in each bucket, we first calculate a conservative upper | 111 | | // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed | 112 | | // the actual maximum value count, but it does not underestimate it. The subsequent binary search | 113 | | // algorithm will approach the actual maximum value count. | 114 | 2 | size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1; | 115 | | | 116 | | // Initialize the lower bound to 0 | 117 | 2 | size_t lower_bucket_values = 0; | 118 | | | 119 | | // Perform a binary search to find the maximum number of values that can fit into each bucket | 120 | 2 | int search_step = 0; | 121 | 2 | const int max_search_steps = | 122 | 2 | 10; // Limit the number of search steps to avoid excessive iteration | 123 | | | 124 | 15 | while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) { | 125 | | // Calculate the midpoint of the upper and lower bounds | 126 | 13 | const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2; | 127 | | | 128 | | // Check if the given number of values can be assigned to the desired number of buckets | 129 | 13 | if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) { | 130 | | // If it can, then set the upper bound to the midpoint | 131 | 6 | upper_bucket_values = bucket_values; | 132 | 7 | } else { | 133 | | // If it can't, then set the lower bound to the midpoint | 134 | 7 | lower_bucket_values = bucket_values; | 135 | 7 | } | 136 | | // Increment the search step counter | 137 | 13 | ++search_step; | 138 | 13 | } | 139 | | | 140 | 2 | return upper_bucket_values; | 141 | 2 | } |
_ZN5doris27calculate_bucket_max_valuesIsEEmRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEm Line | Count | Source | 95 | 2 | size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) { | 96 | | // Ensure that the value map is not empty | 97 | 2 | assert(!value_map.empty()); | 98 | | | 99 | | // Calculate the total number of values in the map using std::accumulate() | 100 | 2 | size_t total_values = 0; | 101 | 200 | for (const auto& [value, count] : value_map) { | 102 | 200 | total_values += count; | 103 | 200 | } | 104 | | | 105 | | // If there is only one bucket, then all values will be assigned to that bucket | 106 | 2 | if (num_buckets == 1) { | 107 | 0 | return total_values; | 108 | 0 | } | 109 | | | 110 | | // To calculate the maximum value count in each bucket, we first calculate a conservative upper | 111 | | // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed | 112 | | // the actual maximum value count, but it does not underestimate it. The subsequent binary search | 113 | | // algorithm will approach the actual maximum value count. | 114 | 2 | size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1; | 115 | | | 116 | | // Initialize the lower bound to 0 | 117 | 2 | size_t lower_bucket_values = 0; | 118 | | | 119 | | // Perform a binary search to find the maximum number of values that can fit into each bucket | 120 | 2 | int search_step = 0; | 121 | 2 | const int max_search_steps = | 122 | 2 | 10; // Limit the number of search steps to avoid excessive iteration | 123 | | | 124 | 15 | while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) { | 125 | | // Calculate the midpoint of the upper and lower bounds | 126 | 13 | const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2; | 127 | | | 128 | | // Check if the given number of values can be assigned to the desired number of buckets | 129 | 13 | if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) { | 130 | | // If it can, then set the upper bound to the midpoint | 131 | 6 | upper_bucket_values = bucket_values; | 132 | 7 | } else { | 133 | | // If it can't, then set the lower bound to the midpoint | 134 | 7 | lower_bucket_values = bucket_values; | 135 | 7 | } | 136 | | // Increment the search step counter | 137 | 13 | ++search_step; | 138 | 13 | } | 139 | | | 140 | 2 | return upper_bucket_values; | 141 | 2 | } |
_ZN5doris27calculate_bucket_max_valuesIlEEmRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEm Line | Count | Source | 95 | 2 | size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) { | 96 | | // Ensure that the value map is not empty | 97 | 2 | assert(!value_map.empty()); | 98 | | | 99 | | // Calculate the total number of values in the map using std::accumulate() | 100 | 2 | size_t total_values = 0; | 101 | 200 | for (const auto& [value, count] : value_map) { | 102 | 200 | total_values += count; | 103 | 200 | } | 104 | | | 105 | | // If there is only one bucket, then all values will be assigned to that bucket | 106 | 2 | if (num_buckets == 1) { | 107 | 0 | return total_values; | 108 | 0 | } | 109 | | | 110 | | // To calculate the maximum value count in each bucket, we first calculate a conservative upper | 111 | | // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed | 112 | | // the actual maximum value count, but it does not underestimate it. The subsequent binary search | 113 | | // algorithm will approach the actual maximum value count. | 114 | 2 | size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1; | 115 | | | 116 | | // Initialize the lower bound to 0 | 117 | 2 | size_t lower_bucket_values = 0; | 118 | | | 119 | | // Perform a binary search to find the maximum number of values that can fit into each bucket | 120 | 2 | int search_step = 0; | 121 | 2 | const int max_search_steps = | 122 | 2 | 10; // Limit the number of search steps to avoid excessive iteration | 123 | | | 124 | 15 | while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) { | 125 | | // Calculate the midpoint of the upper and lower bounds | 126 | 13 | const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2; | 127 | | | 128 | | // Check if the given number of values can be assigned to the desired number of buckets | 129 | 13 | if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) { | 130 | | // If it can, then set the upper bound to the midpoint | 131 | 6 | upper_bucket_values = bucket_values; | 132 | 7 | } else { | 133 | | // If it can't, then set the lower bound to the midpoint | 134 | 7 | lower_bucket_values = bucket_values; | 135 | 7 | } | 136 | | // Increment the search step counter | 137 | 13 | ++search_step; | 138 | 13 | } | 139 | | | 140 | 2 | return upper_bucket_values; | 141 | 2 | } |
_ZN5doris27calculate_bucket_max_valuesInEEmRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEm Line | Count | Source | 95 | 2 | size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) { | 96 | | // Ensure that the value map is not empty | 97 | 2 | assert(!value_map.empty()); | 98 | | | 99 | | // Calculate the total number of values in the map using std::accumulate() | 100 | 2 | size_t total_values = 0; | 101 | 200 | for (const auto& [value, count] : value_map) { | 102 | 200 | total_values += count; | 103 | 200 | } | 104 | | | 105 | | // If there is only one bucket, then all values will be assigned to that bucket | 106 | 2 | if (num_buckets == 1) { | 107 | 0 | return total_values; | 108 | 0 | } | 109 | | | 110 | | // To calculate the maximum value count in each bucket, we first calculate a conservative upper | 111 | | // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed | 112 | | // the actual maximum value count, but it does not underestimate it. The subsequent binary search | 113 | | // algorithm will approach the actual maximum value count. | 114 | 2 | size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1; | 115 | | | 116 | | // Initialize the lower bound to 0 | 117 | 2 | size_t lower_bucket_values = 0; | 118 | | | 119 | | // Perform a binary search to find the maximum number of values that can fit into each bucket | 120 | 2 | int search_step = 0; | 121 | 2 | const int max_search_steps = | 122 | 2 | 10; // Limit the number of search steps to avoid excessive iteration | 123 | | | 124 | 15 | while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) { | 125 | | // Calculate the midpoint of the upper and lower bounds | 126 | 13 | const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2; | 127 | | | 128 | | // Check if the given number of values can be assigned to the desired number of buckets | 129 | 13 | if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) { | 130 | | // If it can, then set the upper bound to the midpoint | 131 | 6 | upper_bucket_values = bucket_values; | 132 | 7 | } else { | 133 | | // If it can't, then set the lower bound to the midpoint | 134 | 7 | lower_bucket_values = bucket_values; | 135 | 7 | } | 136 | | // Increment the search step counter | 137 | 13 | ++search_step; | 138 | 13 | } | 139 | | | 140 | 2 | return upper_bucket_values; | 141 | 2 | } |
_ZN5doris27calculate_bucket_max_valuesIfEEmRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEm Line | Count | Source | 95 | 2 | size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) { | 96 | | // Ensure that the value map is not empty | 97 | 2 | assert(!value_map.empty()); | 98 | | | 99 | | // Calculate the total number of values in the map using std::accumulate() | 100 | 2 | size_t total_values = 0; | 101 | 200 | for (const auto& [value, count] : value_map) { | 102 | 200 | total_values += count; | 103 | 200 | } | 104 | | | 105 | | // If there is only one bucket, then all values will be assigned to that bucket | 106 | 2 | if (num_buckets == 1) { | 107 | 0 | return total_values; | 108 | 0 | } | 109 | | | 110 | | // To calculate the maximum value count in each bucket, we first calculate a conservative upper | 111 | | // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed | 112 | | // the actual maximum value count, but it does not underestimate it. The subsequent binary search | 113 | | // algorithm will approach the actual maximum value count. | 114 | 2 | size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1; | 115 | | | 116 | | // Initialize the lower bound to 0 | 117 | 2 | size_t lower_bucket_values = 0; | 118 | | | 119 | | // Perform a binary search to find the maximum number of values that can fit into each bucket | 120 | 2 | int search_step = 0; | 121 | 2 | const int max_search_steps = | 122 | 2 | 10; // Limit the number of search steps to avoid excessive iteration | 123 | | | 124 | 15 | while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) { | 125 | | // Calculate the midpoint of the upper and lower bounds | 126 | 13 | const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2; | 127 | | | 128 | | // Check if the given number of values can be assigned to the desired number of buckets | 129 | 13 | if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) { | 130 | | // If it can, then set the upper bound to the midpoint | 131 | 6 | upper_bucket_values = bucket_values; | 132 | 7 | } else { | 133 | | // If it can't, then set the lower bound to the midpoint | 134 | 7 | lower_bucket_values = bucket_values; | 135 | 7 | } | 136 | | // Increment the search step counter | 137 | 13 | ++search_step; | 138 | 13 | } | 139 | | | 140 | 2 | return upper_bucket_values; | 141 | 2 | } |
_ZN5doris27calculate_bucket_max_valuesIdEEmRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEm Line | Count | Source | 95 | 2 | size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) { | 96 | | // Ensure that the value map is not empty | 97 | 2 | assert(!value_map.empty()); | 98 | | | 99 | | // Calculate the total number of values in the map using std::accumulate() | 100 | 2 | size_t total_values = 0; | 101 | 200 | for (const auto& [value, count] : value_map) { | 102 | 200 | total_values += count; | 103 | 200 | } | 104 | | | 105 | | // If there is only one bucket, then all values will be assigned to that bucket | 106 | 2 | if (num_buckets == 1) { | 107 | 0 | return total_values; | 108 | 0 | } | 109 | | | 110 | | // To calculate the maximum value count in each bucket, we first calculate a conservative upper | 111 | | // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed | 112 | | // the actual maximum value count, but it does not underestimate it. The subsequent binary search | 113 | | // algorithm will approach the actual maximum value count. | 114 | 2 | size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1; | 115 | | | 116 | | // Initialize the lower bound to 0 | 117 | 2 | size_t lower_bucket_values = 0; | 118 | | | 119 | | // Perform a binary search to find the maximum number of values that can fit into each bucket | 120 | 2 | int search_step = 0; | 121 | 2 | const int max_search_steps = | 122 | 2 | 10; // Limit the number of search steps to avoid excessive iteration | 123 | | | 124 | 15 | while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) { | 125 | | // Calculate the midpoint of the upper and lower bounds | 126 | 13 | const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2; | 127 | | | 128 | | // Check if the given number of values can be assigned to the desired number of buckets | 129 | 13 | if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) { | 130 | | // If it can, then set the upper bound to the midpoint | 131 | 6 | upper_bucket_values = bucket_values; | 132 | 7 | } else { | 133 | | // If it can't, then set the lower bound to the midpoint | 134 | 7 | lower_bucket_values = bucket_values; | 135 | 7 | } | 136 | | // Increment the search step counter | 137 | 13 | ++search_step; | 138 | 13 | } | 139 | | | 140 | 2 | return upper_bucket_values; | 141 | 2 | } |
Unexecuted instantiation: _ZN5doris27calculate_bucket_max_valuesINS_7DecimalIiEEEEmRKSt3mapIT_mSt4lessIS4_ESaISt4pairIKS4_mEEEm Unexecuted instantiation: _ZN5doris27calculate_bucket_max_valuesINS_7DecimalIlEEEEmRKSt3mapIT_mSt4lessIS4_ESaISt4pairIKS4_mEEEm Unexecuted instantiation: _ZN5doris27calculate_bucket_max_valuesINS_12Decimal128V3EEEmRKSt3mapIT_mSt4lessIS3_ESaISt4pairIKS3_mEEEm Unexecuted instantiation: _ZN5doris27calculate_bucket_max_valuesINS_7DecimalIN4wide7integerILm256EiEEEEEEmRKSt3mapIT_mSt4lessIS7_ESaISt4pairIKS7_mEEEm _ZN5doris27calculate_bucket_max_valuesINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEEmRKSt3mapIT_mSt4lessIS8_ESaISt4pairIKS8_mEEEm Line | Count | Source | 95 | 2 | size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) { | 96 | | // Ensure that the value map is not empty | 97 | 2 | assert(!value_map.empty()); | 98 | | | 99 | | // Calculate the total number of values in the map using std::accumulate() | 100 | 2 | size_t total_values = 0; | 101 | 2.00k | for (const auto& [value, count] : value_map) { | 102 | 2.00k | total_values += count; | 103 | 2.00k | } | 104 | | | 105 | | // If there is only one bucket, then all values will be assigned to that bucket | 106 | 2 | if (num_buckets == 1) { | 107 | 0 | return total_values; | 108 | 0 | } | 109 | | | 110 | | // To calculate the maximum value count in each bucket, we first calculate a conservative upper | 111 | | // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed | 112 | | // the actual maximum value count, but it does not underestimate it. The subsequent binary search | 113 | | // algorithm will approach the actual maximum value count. | 114 | 2 | size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1; | 115 | | | 116 | | // Initialize the lower bound to 0 | 117 | 2 | size_t lower_bucket_values = 0; | 118 | | | 119 | | // Perform a binary search to find the maximum number of values that can fit into each bucket | 120 | 2 | int search_step = 0; | 121 | 2 | const int max_search_steps = | 122 | 2 | 10; // Limit the number of search steps to avoid excessive iteration | 123 | | | 124 | 21 | while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) { | 125 | | // Calculate the midpoint of the upper and lower bounds | 126 | 19 | const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2; | 127 | | | 128 | | // Check if the given number of values can be assigned to the desired number of buckets | 129 | 19 | if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) { | 130 | | // If it can, then set the upper bound to the midpoint | 131 | 9 | upper_bucket_values = bucket_values; | 132 | 10 | } else { | 133 | | // If it can't, then set the lower bound to the midpoint | 134 | 10 | lower_bucket_values = bucket_values; | 135 | 10 | } | 136 | | // Increment the search step counter | 137 | 19 | ++search_step; | 138 | 19 | } | 139 | | | 140 | 2 | return upper_bucket_values; | 141 | 2 | } |
_ZN5doris27calculate_bucket_max_valuesINS_11DateV2ValueINS_15DateV2ValueTypeEEEEEmRKSt3mapIT_mSt4lessIS5_ESaISt4pairIKS5_mEEEm Line | Count | Source | 95 | 2 | size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) { | 96 | | // Ensure that the value map is not empty | 97 | 2 | assert(!value_map.empty()); | 98 | | | 99 | | // Calculate the total number of values in the map using std::accumulate() | 100 | 2 | size_t total_values = 0; | 101 | 200 | for (const auto& [value, count] : value_map) { | 102 | 200 | total_values += count; | 103 | 200 | } | 104 | | | 105 | | // If there is only one bucket, then all values will be assigned to that bucket | 106 | 2 | if (num_buckets == 1) { | 107 | 0 | return total_values; | 108 | 0 | } | 109 | | | 110 | | // To calculate the maximum value count in each bucket, we first calculate a conservative upper | 111 | | // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed | 112 | | // the actual maximum value count, but it does not underestimate it. The subsequent binary search | 113 | | // algorithm will approach the actual maximum value count. | 114 | 2 | size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1; | 115 | | | 116 | | // Initialize the lower bound to 0 | 117 | 2 | size_t lower_bucket_values = 0; | 118 | | | 119 | | // Perform a binary search to find the maximum number of values that can fit into each bucket | 120 | 2 | int search_step = 0; | 121 | 2 | const int max_search_steps = | 122 | 2 | 10; // Limit the number of search steps to avoid excessive iteration | 123 | | | 124 | 15 | while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) { | 125 | | // Calculate the midpoint of the upper and lower bounds | 126 | 13 | const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2; | 127 | | | 128 | | // Check if the given number of values can be assigned to the desired number of buckets | 129 | 13 | if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) { | 130 | | // If it can, then set the upper bound to the midpoint | 131 | 6 | upper_bucket_values = bucket_values; | 132 | 7 | } else { | 133 | | // If it can't, then set the lower bound to the midpoint | 134 | 7 | lower_bucket_values = bucket_values; | 135 | 7 | } | 136 | | // Increment the search step counter | 137 | 13 | ++search_step; | 138 | 13 | } | 139 | | | 140 | 2 | return upper_bucket_values; | 141 | 2 | } |
_ZN5doris27calculate_bucket_max_valuesINS_11DateV2ValueINS_19DateTimeV2ValueTypeEEEEEmRKSt3mapIT_mSt4lessIS5_ESaISt4pairIKS5_mEEEm Line | Count | Source | 95 | 2 | size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) { | 96 | | // Ensure that the value map is not empty | 97 | 2 | assert(!value_map.empty()); | 98 | | | 99 | | // Calculate the total number of values in the map using std::accumulate() | 100 | 2 | size_t total_values = 0; | 101 | 200 | for (const auto& [value, count] : value_map) { | 102 | 200 | total_values += count; | 103 | 200 | } | 104 | | | 105 | | // If there is only one bucket, then all values will be assigned to that bucket | 106 | 2 | if (num_buckets == 1) { | 107 | 0 | return total_values; | 108 | 0 | } | 109 | | | 110 | | // To calculate the maximum value count in each bucket, we first calculate a conservative upper | 111 | | // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed | 112 | | // the actual maximum value count, but it does not underestimate it. The subsequent binary search | 113 | | // algorithm will approach the actual maximum value count. | 114 | 2 | size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1; | 115 | | | 116 | | // Initialize the lower bound to 0 | 117 | 2 | size_t lower_bucket_values = 0; | 118 | | | 119 | | // Perform a binary search to find the maximum number of values that can fit into each bucket | 120 | 2 | int search_step = 0; | 121 | 2 | const int max_search_steps = | 122 | 2 | 10; // Limit the number of search steps to avoid excessive iteration | 123 | | | 124 | 15 | while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) { | 125 | | // Calculate the midpoint of the upper and lower bounds | 126 | 13 | const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2; | 127 | | | 128 | | // Check if the given number of values can be assigned to the desired number of buckets | 129 | 13 | if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) { | 130 | | // If it can, then set the upper bound to the midpoint | 131 | 6 | upper_bucket_values = bucket_values; | 132 | 7 | } else { | 133 | | // If it can't, then set the lower bound to the midpoint | 134 | 7 | lower_bucket_values = bucket_values; | 135 | 7 | } | 136 | | // Increment the search step counter | 137 | 13 | ++search_step; | 138 | 13 | } | 139 | | | 140 | 2 | return upper_bucket_values; | 141 | 2 | } |
|
142 | | |
143 | | /** |
144 | | * Greedy equi-height histogram construction algorithm, inspired by the MySQL |
145 | | * equi_height implementation(https://dev.mysql.com/doc/dev/mysql-server/latest/equi__height_8h.html). |
146 | | * |
147 | | * Given an ordered collection of [value, count] pairs and a maximum bucket |
148 | | * size, construct a histogram by inserting values into a bucket while keeping |
149 | | * track of its size. If the insertion of a value into a non-empty bucket |
150 | | * causes the bucket to exceed the maximum size, create a new empty bucket and |
151 | | * continue. |
152 | | * |
153 | | * The algorithm guarantees a selectivity estimation error of at most ~2 * |
154 | | * #values / #buckets, often less. Values with a higher relative frequency are |
155 | | * guaranteed to be placed in singleton buckets. |
156 | | * |
157 | | * The minimum composite bucket size is used to minimize the worst case |
158 | | * selectivity estimation error. In general, the algorithm will adapt to the |
159 | | * data distribution to minimize the size of composite buckets. The heavy values |
160 | | * can be placed in singleton buckets and the remaining values will be evenly |
161 | | * spread across the remaining buckets, leading to a lower composite bucket size. |
162 | | * |
163 | | * Note: The term "value" refers to an entry in a column and the actual value |
164 | | * of an entry. The ordered_map is an ordered collection of [distinct value, |
165 | | * value count] pairs. For example, a Value_map<String> could contain the pairs ["a", 1], ["b", 2] |
166 | | * to represent one "a" value and two "b" values. |
167 | | * |
168 | | * @param buckets A vector of empty buckets that will be populated with data. |
169 | | * @param ordered_map An ordered map of distinct values and their counts. |
170 | | * @param max_num_buckets The maximum number of buckets that can be used. |
171 | | * |
172 | | * @return True if the buckets were successfully built, false otherwise. |
173 | | */ |
174 | | template <typename T> |
175 | | bool build_histogram(std::vector<Bucket<T>>& buckets, const std::map<T, size_t>& ordered_map, |
176 | 43 | const size_t max_num_buckets) { |
177 | | // If the input map is empty, there is nothing to build. |
178 | 43 | if (ordered_map.empty()) { |
179 | 17 | return false; |
180 | 17 | } |
181 | | |
182 | | // Calculate the maximum number of values that can be assigned to each bucket. |
183 | 26 | auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets); |
184 | | |
185 | | // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional |
186 | | // allocations when inserting buckets. |
187 | 26 | buckets.clear(); |
188 | 26 | buckets.reserve(max_num_buckets); |
189 | | |
190 | | // Initialize bucket variables. |
191 | 26 | size_t distinct_values_count = 0; |
192 | 26 | size_t values_count = 0; |
193 | 26 | size_t cumulative_values = 0; |
194 | | |
195 | | // Record how many values still need to be assigned. |
196 | 26 | auto remaining_distinct_values = ordered_map.size(); |
197 | | |
198 | 26 | auto it = ordered_map.begin(); |
199 | | |
200 | | // Lower value of the current bucket. |
201 | 26 | const T* lower_value = &it->first; |
202 | | |
203 | | // Iterate over the ordered map of distinct values and their counts. |
204 | 3.84k | for (; it != ordered_map.end(); ++it) { |
205 | 3.82k | const auto count = it->second; |
206 | 3.82k | const auto current_value = it->first; |
207 | | |
208 | | // Update the bucket counts and track the number of distinct values assigned. |
209 | 3.82k | distinct_values_count++; |
210 | 3.82k | remaining_distinct_values--; |
211 | 3.82k | values_count += count; |
212 | 3.82k | cumulative_values += count; |
213 | | |
214 | | // Check whether the current value should be added to the current bucket. |
215 | 3.82k | auto next = std::next(it); |
216 | 3.82k | size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1; |
217 | | |
218 | 3.82k | if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets && |
219 | 3.82k | values_count + next->second <= bucket_max_values) { |
220 | | // If the current value is the last in the input map and there are more remaining |
221 | | // distinct values than empty buckets and adding the value does not cause the bucket |
222 | | // to exceed its max size, skip adding the value to the current bucket. |
223 | 3.70k | continue; |
224 | 3.70k | } |
225 | | |
226 | | // Finalize the current bucket and add it to our collection of buckets. |
227 | 114 | auto pre_sum = cumulative_values - values_count; |
228 | | |
229 | 114 | Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count, |
230 | 114 | pre_sum); |
231 | 114 | buckets.push_back(new_bucket); |
232 | | |
233 | | // Reset variables for the next bucket. |
234 | 114 | if (next != ordered_map.end()) { |
235 | 88 | lower_value = &next->first; |
236 | 88 | } |
237 | 114 | values_count = 0; |
238 | 114 | distinct_values_count = 0; |
239 | 114 | } |
240 | | |
241 | 26 | return true; |
242 | 43 | } _ZN5doris15build_histogramIiEEbRSt6vectorINS_6BucketIT_EESaIS4_EERKSt3mapIS3_mSt4lessIS3_ESaISt4pairIKS3_mEEEm Line | Count | Source | 176 | 11 | const size_t max_num_buckets) { | 177 | | // If the input map is empty, there is nothing to build. | 178 | 11 | if (ordered_map.empty()) { | 179 | 3 | return false; | 180 | 3 | } | 181 | | | 182 | | // Calculate the maximum number of values that can be assigned to each bucket. | 183 | 8 | auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets); | 184 | | | 185 | | // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional | 186 | | // allocations when inserting buckets. | 187 | 8 | buckets.clear(); | 188 | 8 | buckets.reserve(max_num_buckets); | 189 | | | 190 | | // Initialize bucket variables. | 191 | 8 | size_t distinct_values_count = 0; | 192 | 8 | size_t values_count = 0; | 193 | 8 | size_t cumulative_values = 0; | 194 | | | 195 | | // Record how many values still need to be assigned. | 196 | 8 | auto remaining_distinct_values = ordered_map.size(); | 197 | | | 198 | 8 | auto it = ordered_map.begin(); | 199 | | | 200 | | // Lower value of the current bucket. | 201 | 8 | const T* lower_value = &it->first; | 202 | | | 203 | | // Iterate over the ordered map of distinct values and their counts. | 204 | 229 | for (; it != ordered_map.end(); ++it) { | 205 | 221 | const auto count = it->second; | 206 | 221 | const auto current_value = it->first; | 207 | | | 208 | | // Update the bucket counts and track the number of distinct values assigned. | 209 | 221 | distinct_values_count++; | 210 | 221 | remaining_distinct_values--; | 211 | 221 | values_count += count; | 212 | 221 | cumulative_values += count; | 213 | | | 214 | | // Check whether the current value should be added to the current bucket. | 215 | 221 | auto next = std::next(it); | 216 | 221 | size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1; | 217 | | | 218 | 221 | if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets && | 219 | 221 | values_count + next->second <= bucket_max_values) { | 220 | | // If the current value is the last in the input map and there are more remaining | 221 | | // distinct values than empty buckets and adding the value does not cause the bucket | 222 | | // to exceed its max size, skip adding the value to the current bucket. | 223 | 197 | continue; | 224 | 197 | } | 225 | | | 226 | | // Finalize the current bucket and add it to our collection of buckets. | 227 | 24 | auto pre_sum = cumulative_values - values_count; | 228 | | | 229 | 24 | Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count, | 230 | 24 | pre_sum); | 231 | 24 | buckets.push_back(new_bucket); | 232 | | | 233 | | // Reset variables for the next bucket. | 234 | 24 | if (next != ordered_map.end()) { | 235 | 16 | lower_value = &next->first; | 236 | 16 | } | 237 | 24 | values_count = 0; | 238 | 24 | distinct_values_count = 0; | 239 | 24 | } | 240 | | | 241 | 8 | return true; | 242 | 11 | } |
Unexecuted instantiation: _ZN5doris15build_histogramIhEEbRSt6vectorINS_6BucketIT_EESaIS4_EERKSt3mapIS3_mSt4lessIS3_ESaISt4pairIKS3_mEEEm _ZN5doris15build_histogramIaEEbRSt6vectorINS_6BucketIT_EESaIS4_EERKSt3mapIS3_mSt4lessIS3_ESaISt4pairIKS3_mEEEm Line | Count | Source | 176 | 4 | const size_t max_num_buckets) { | 177 | | // If the input map is empty, there is nothing to build. | 178 | 4 | if (ordered_map.empty()) { | 179 | 2 | return false; | 180 | 2 | } | 181 | | | 182 | | // Calculate the maximum number of values that can be assigned to each bucket. | 183 | 2 | auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets); | 184 | | | 185 | | // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional | 186 | | // allocations when inserting buckets. | 187 | 2 | buckets.clear(); | 188 | 2 | buckets.reserve(max_num_buckets); | 189 | | | 190 | | // Initialize bucket variables. | 191 | 2 | size_t distinct_values_count = 0; | 192 | 2 | size_t values_count = 0; | 193 | 2 | size_t cumulative_values = 0; | 194 | | | 195 | | // Record how many values still need to be assigned. | 196 | 2 | auto remaining_distinct_values = ordered_map.size(); | 197 | | | 198 | 2 | auto it = ordered_map.begin(); | 199 | | | 200 | | // Lower value of the current bucket. | 201 | 2 | const T* lower_value = &it->first; | 202 | | | 203 | | // Iterate over the ordered map of distinct values and their counts. | 204 | 202 | for (; it != ordered_map.end(); ++it) { | 205 | 200 | const auto count = it->second; | 206 | 200 | const auto current_value = it->first; | 207 | | | 208 | | // Update the bucket counts and track the number of distinct values assigned. | 209 | 200 | distinct_values_count++; | 210 | 200 | remaining_distinct_values--; | 211 | 200 | values_count += count; | 212 | 200 | cumulative_values += count; | 213 | | | 214 | | // Check whether the current value should be added to the current bucket. | 215 | 200 | auto next = std::next(it); | 216 | 200 | size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1; | 217 | | | 218 | 200 | if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets && | 219 | 200 | values_count + next->second <= bucket_max_values) { | 220 | | // If the current value is the last in the input map and there are more remaining | 221 | | // distinct values than empty buckets and adding the value does not cause the bucket | 222 | | // to exceed its max size, skip adding the value to the current bucket. | 223 | 190 | continue; | 224 | 190 | } | 225 | | | 226 | | // Finalize the current bucket and add it to our collection of buckets. | 227 | 10 | auto pre_sum = cumulative_values - values_count; | 228 | | | 229 | 10 | Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count, | 230 | 10 | pre_sum); | 231 | 10 | buckets.push_back(new_bucket); | 232 | | | 233 | | // Reset variables for the next bucket. | 234 | 10 | if (next != ordered_map.end()) { | 235 | 8 | lower_value = &next->first; | 236 | 8 | } | 237 | 10 | values_count = 0; | 238 | 10 | distinct_values_count = 0; | 239 | 10 | } | 240 | | | 241 | 2 | return true; | 242 | 4 | } |
_ZN5doris15build_histogramIsEEbRSt6vectorINS_6BucketIT_EESaIS4_EERKSt3mapIS3_mSt4lessIS3_ESaISt4pairIKS3_mEEEm Line | Count | Source | 176 | 4 | const size_t max_num_buckets) { | 177 | | // If the input map is empty, there is nothing to build. | 178 | 4 | if (ordered_map.empty()) { | 179 | 2 | return false; | 180 | 2 | } | 181 | | | 182 | | // Calculate the maximum number of values that can be assigned to each bucket. | 183 | 2 | auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets); | 184 | | | 185 | | // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional | 186 | | // allocations when inserting buckets. | 187 | 2 | buckets.clear(); | 188 | 2 | buckets.reserve(max_num_buckets); | 189 | | | 190 | | // Initialize bucket variables. | 191 | 2 | size_t distinct_values_count = 0; | 192 | 2 | size_t values_count = 0; | 193 | 2 | size_t cumulative_values = 0; | 194 | | | 195 | | // Record how many values still need to be assigned. | 196 | 2 | auto remaining_distinct_values = ordered_map.size(); | 197 | | | 198 | 2 | auto it = ordered_map.begin(); | 199 | | | 200 | | // Lower value of the current bucket. | 201 | 2 | const T* lower_value = &it->first; | 202 | | | 203 | | // Iterate over the ordered map of distinct values and their counts. | 204 | 202 | for (; it != ordered_map.end(); ++it) { | 205 | 200 | const auto count = it->second; | 206 | 200 | const auto current_value = it->first; | 207 | | | 208 | | // Update the bucket counts and track the number of distinct values assigned. | 209 | 200 | distinct_values_count++; | 210 | 200 | remaining_distinct_values--; | 211 | 200 | values_count += count; | 212 | 200 | cumulative_values += count; | 213 | | | 214 | | // Check whether the current value should be added to the current bucket. | 215 | 200 | auto next = std::next(it); | 216 | 200 | size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1; | 217 | | | 218 | 200 | if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets && | 219 | 200 | values_count + next->second <= bucket_max_values) { | 220 | | // If the current value is the last in the input map and there are more remaining | 221 | | // distinct values than empty buckets and adding the value does not cause the bucket | 222 | | // to exceed its max size, skip adding the value to the current bucket. | 223 | 190 | continue; | 224 | 190 | } | 225 | | | 226 | | // Finalize the current bucket and add it to our collection of buckets. | 227 | 10 | auto pre_sum = cumulative_values - values_count; | 228 | | | 229 | 10 | Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count, | 230 | 10 | pre_sum); | 231 | 10 | buckets.push_back(new_bucket); | 232 | | | 233 | | // Reset variables for the next bucket. | 234 | 10 | if (next != ordered_map.end()) { | 235 | 8 | lower_value = &next->first; | 236 | 8 | } | 237 | 10 | values_count = 0; | 238 | 10 | distinct_values_count = 0; | 239 | 10 | } | 240 | | | 241 | 2 | return true; | 242 | 4 | } |
_ZN5doris15build_histogramIlEEbRSt6vectorINS_6BucketIT_EESaIS4_EERKSt3mapIS3_mSt4lessIS3_ESaISt4pairIKS3_mEEEm Line | Count | Source | 176 | 4 | const size_t max_num_buckets) { | 177 | | // If the input map is empty, there is nothing to build. | 178 | 4 | if (ordered_map.empty()) { | 179 | 2 | return false; | 180 | 2 | } | 181 | | | 182 | | // Calculate the maximum number of values that can be assigned to each bucket. | 183 | 2 | auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets); | 184 | | | 185 | | // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional | 186 | | // allocations when inserting buckets. | 187 | 2 | buckets.clear(); | 188 | 2 | buckets.reserve(max_num_buckets); | 189 | | | 190 | | // Initialize bucket variables. | 191 | 2 | size_t distinct_values_count = 0; | 192 | 2 | size_t values_count = 0; | 193 | 2 | size_t cumulative_values = 0; | 194 | | | 195 | | // Record how many values still need to be assigned. | 196 | 2 | auto remaining_distinct_values = ordered_map.size(); | 197 | | | 198 | 2 | auto it = ordered_map.begin(); | 199 | | | 200 | | // Lower value of the current bucket. | 201 | 2 | const T* lower_value = &it->first; | 202 | | | 203 | | // Iterate over the ordered map of distinct values and their counts. | 204 | 202 | for (; it != ordered_map.end(); ++it) { | 205 | 200 | const auto count = it->second; | 206 | 200 | const auto current_value = it->first; | 207 | | | 208 | | // Update the bucket counts and track the number of distinct values assigned. | 209 | 200 | distinct_values_count++; | 210 | 200 | remaining_distinct_values--; | 211 | 200 | values_count += count; | 212 | 200 | cumulative_values += count; | 213 | | | 214 | | // Check whether the current value should be added to the current bucket. | 215 | 200 | auto next = std::next(it); | 216 | 200 | size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1; | 217 | | | 218 | 200 | if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets && | 219 | 200 | values_count + next->second <= bucket_max_values) { | 220 | | // If the current value is the last in the input map and there are more remaining | 221 | | // distinct values than empty buckets and adding the value does not cause the bucket | 222 | | // to exceed its max size, skip adding the value to the current bucket. | 223 | 190 | continue; | 224 | 190 | } | 225 | | | 226 | | // Finalize the current bucket and add it to our collection of buckets. | 227 | 10 | auto pre_sum = cumulative_values - values_count; | 228 | | | 229 | 10 | Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count, | 230 | 10 | pre_sum); | 231 | 10 | buckets.push_back(new_bucket); | 232 | | | 233 | | // Reset variables for the next bucket. | 234 | 10 | if (next != ordered_map.end()) { | 235 | 8 | lower_value = &next->first; | 236 | 8 | } | 237 | 10 | values_count = 0; | 238 | 10 | distinct_values_count = 0; | 239 | 10 | } | 240 | | | 241 | 2 | return true; | 242 | 4 | } |
_ZN5doris15build_histogramInEEbRSt6vectorINS_6BucketIT_EESaIS4_EERKSt3mapIS3_mSt4lessIS3_ESaISt4pairIKS3_mEEEm Line | Count | Source | 176 | 4 | const size_t max_num_buckets) { | 177 | | // If the input map is empty, there is nothing to build. | 178 | 4 | if (ordered_map.empty()) { | 179 | 2 | return false; | 180 | 2 | } | 181 | | | 182 | | // Calculate the maximum number of values that can be assigned to each bucket. | 183 | 2 | auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets); | 184 | | | 185 | | // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional | 186 | | // allocations when inserting buckets. | 187 | 2 | buckets.clear(); | 188 | 2 | buckets.reserve(max_num_buckets); | 189 | | | 190 | | // Initialize bucket variables. | 191 | 2 | size_t distinct_values_count = 0; | 192 | 2 | size_t values_count = 0; | 193 | 2 | size_t cumulative_values = 0; | 194 | | | 195 | | // Record how many values still need to be assigned. | 196 | 2 | auto remaining_distinct_values = ordered_map.size(); | 197 | | | 198 | 2 | auto it = ordered_map.begin(); | 199 | | | 200 | | // Lower value of the current bucket. | 201 | 2 | const T* lower_value = &it->first; | 202 | | | 203 | | // Iterate over the ordered map of distinct values and their counts. | 204 | 202 | for (; it != ordered_map.end(); ++it) { | 205 | 200 | const auto count = it->second; | 206 | 200 | const auto current_value = it->first; | 207 | | | 208 | | // Update the bucket counts and track the number of distinct values assigned. | 209 | 200 | distinct_values_count++; | 210 | 200 | remaining_distinct_values--; | 211 | 200 | values_count += count; | 212 | 200 | cumulative_values += count; | 213 | | | 214 | | // Check whether the current value should be added to the current bucket. | 215 | 200 | auto next = std::next(it); | 216 | 200 | size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1; | 217 | | | 218 | 200 | if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets && | 219 | 200 | values_count + next->second <= bucket_max_values) { | 220 | | // If the current value is the last in the input map and there are more remaining | 221 | | // distinct values than empty buckets and adding the value does not cause the bucket | 222 | | // to exceed its max size, skip adding the value to the current bucket. | 223 | 190 | continue; | 224 | 190 | } | 225 | | | 226 | | // Finalize the current bucket and add it to our collection of buckets. | 227 | 10 | auto pre_sum = cumulative_values - values_count; | 228 | | | 229 | 10 | Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count, | 230 | 10 | pre_sum); | 231 | 10 | buckets.push_back(new_bucket); | 232 | | | 233 | | // Reset variables for the next bucket. | 234 | 10 | if (next != ordered_map.end()) { | 235 | 8 | lower_value = &next->first; | 236 | 8 | } | 237 | 10 | values_count = 0; | 238 | 10 | distinct_values_count = 0; | 239 | 10 | } | 240 | | | 241 | 2 | return true; | 242 | 4 | } |
_ZN5doris15build_histogramIfEEbRSt6vectorINS_6BucketIT_EESaIS4_EERKSt3mapIS3_mSt4lessIS3_ESaISt4pairIKS3_mEEEm Line | Count | Source | 176 | 4 | const size_t max_num_buckets) { | 177 | | // If the input map is empty, there is nothing to build. | 178 | 4 | if (ordered_map.empty()) { | 179 | 2 | return false; | 180 | 2 | } | 181 | | | 182 | | // Calculate the maximum number of values that can be assigned to each bucket. | 183 | 2 | auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets); | 184 | | | 185 | | // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional | 186 | | // allocations when inserting buckets. | 187 | 2 | buckets.clear(); | 188 | 2 | buckets.reserve(max_num_buckets); | 189 | | | 190 | | // Initialize bucket variables. | 191 | 2 | size_t distinct_values_count = 0; | 192 | 2 | size_t values_count = 0; | 193 | 2 | size_t cumulative_values = 0; | 194 | | | 195 | | // Record how many values still need to be assigned. | 196 | 2 | auto remaining_distinct_values = ordered_map.size(); | 197 | | | 198 | 2 | auto it = ordered_map.begin(); | 199 | | | 200 | | // Lower value of the current bucket. | 201 | 2 | const T* lower_value = &it->first; | 202 | | | 203 | | // Iterate over the ordered map of distinct values and their counts. | 204 | 202 | for (; it != ordered_map.end(); ++it) { | 205 | 200 | const auto count = it->second; | 206 | 200 | const auto current_value = it->first; | 207 | | | 208 | | // Update the bucket counts and track the number of distinct values assigned. | 209 | 200 | distinct_values_count++; | 210 | 200 | remaining_distinct_values--; | 211 | 200 | values_count += count; | 212 | 200 | cumulative_values += count; | 213 | | | 214 | | // Check whether the current value should be added to the current bucket. | 215 | 200 | auto next = std::next(it); | 216 | 200 | size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1; | 217 | | | 218 | 200 | if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets && | 219 | 200 | values_count + next->second <= bucket_max_values) { | 220 | | // If the current value is the last in the input map and there are more remaining | 221 | | // distinct values than empty buckets and adding the value does not cause the bucket | 222 | | // to exceed its max size, skip adding the value to the current bucket. | 223 | 190 | continue; | 224 | 190 | } | 225 | | | 226 | | // Finalize the current bucket and add it to our collection of buckets. | 227 | 10 | auto pre_sum = cumulative_values - values_count; | 228 | | | 229 | 10 | Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count, | 230 | 10 | pre_sum); | 231 | 10 | buckets.push_back(new_bucket); | 232 | | | 233 | | // Reset variables for the next bucket. | 234 | 10 | if (next != ordered_map.end()) { | 235 | 8 | lower_value = &next->first; | 236 | 8 | } | 237 | 10 | values_count = 0; | 238 | 10 | distinct_values_count = 0; | 239 | 10 | } | 240 | | | 241 | 2 | return true; | 242 | 4 | } |
_ZN5doris15build_histogramIdEEbRSt6vectorINS_6BucketIT_EESaIS4_EERKSt3mapIS3_mSt4lessIS3_ESaISt4pairIKS3_mEEEm Line | Count | Source | 176 | 4 | const size_t max_num_buckets) { | 177 | | // If the input map is empty, there is nothing to build. | 178 | 4 | if (ordered_map.empty()) { | 179 | 2 | return false; | 180 | 2 | } | 181 | | | 182 | | // Calculate the maximum number of values that can be assigned to each bucket. | 183 | 2 | auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets); | 184 | | | 185 | | // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional | 186 | | // allocations when inserting buckets. | 187 | 2 | buckets.clear(); | 188 | 2 | buckets.reserve(max_num_buckets); | 189 | | | 190 | | // Initialize bucket variables. | 191 | 2 | size_t distinct_values_count = 0; | 192 | 2 | size_t values_count = 0; | 193 | 2 | size_t cumulative_values = 0; | 194 | | | 195 | | // Record how many values still need to be assigned. | 196 | 2 | auto remaining_distinct_values = ordered_map.size(); | 197 | | | 198 | 2 | auto it = ordered_map.begin(); | 199 | | | 200 | | // Lower value of the current bucket. | 201 | 2 | const T* lower_value = &it->first; | 202 | | | 203 | | // Iterate over the ordered map of distinct values and their counts. | 204 | 202 | for (; it != ordered_map.end(); ++it) { | 205 | 200 | const auto count = it->second; | 206 | 200 | const auto current_value = it->first; | 207 | | | 208 | | // Update the bucket counts and track the number of distinct values assigned. | 209 | 200 | distinct_values_count++; | 210 | 200 | remaining_distinct_values--; | 211 | 200 | values_count += count; | 212 | 200 | cumulative_values += count; | 213 | | | 214 | | // Check whether the current value should be added to the current bucket. | 215 | 200 | auto next = std::next(it); | 216 | 200 | size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1; | 217 | | | 218 | 200 | if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets && | 219 | 200 | values_count + next->second <= bucket_max_values) { | 220 | | // If the current value is the last in the input map and there are more remaining | 221 | | // distinct values than empty buckets and adding the value does not cause the bucket | 222 | | // to exceed its max size, skip adding the value to the current bucket. | 223 | 190 | continue; | 224 | 190 | } | 225 | | | 226 | | // Finalize the current bucket and add it to our collection of buckets. | 227 | 10 | auto pre_sum = cumulative_values - values_count; | 228 | | | 229 | 10 | Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count, | 230 | 10 | pre_sum); | 231 | 10 | buckets.push_back(new_bucket); | 232 | | | 233 | | // Reset variables for the next bucket. | 234 | 10 | if (next != ordered_map.end()) { | 235 | 8 | lower_value = &next->first; | 236 | 8 | } | 237 | 10 | values_count = 0; | 238 | 10 | distinct_values_count = 0; | 239 | 10 | } | 240 | | | 241 | 2 | return true; | 242 | 4 | } |
Unexecuted instantiation: _ZN5doris15build_histogramINS_7DecimalIiEEEEbRSt6vectorINS_6BucketIT_EESaIS6_EERKSt3mapIS5_mSt4lessIS5_ESaISt4pairIKS5_mEEEm Unexecuted instantiation: _ZN5doris15build_histogramINS_7DecimalIlEEEEbRSt6vectorINS_6BucketIT_EESaIS6_EERKSt3mapIS5_mSt4lessIS5_ESaISt4pairIKS5_mEEEm Unexecuted instantiation: _ZN5doris15build_histogramINS_12Decimal128V3EEEbRSt6vectorINS_6BucketIT_EESaIS5_EERKSt3mapIS4_mSt4lessIS4_ESaISt4pairIKS4_mEEEm Unexecuted instantiation: _ZN5doris15build_histogramINS_7DecimalIN4wide7integerILm256EiEEEEEEbRSt6vectorINS_6BucketIT_EESaIS9_EERKSt3mapIS8_mSt4lessIS8_ESaISt4pairIKS8_mEEEm _ZN5doris15build_histogramINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEEbRSt6vectorINS_6BucketIT_EESaISA_EERKSt3mapIS9_mSt4lessIS9_ESaISt4pairIKS9_mEEEm Line | Count | Source | 176 | 4 | const size_t max_num_buckets) { | 177 | | // If the input map is empty, there is nothing to build. | 178 | 4 | if (ordered_map.empty()) { | 179 | 2 | return false; | 180 | 2 | } | 181 | | | 182 | | // Calculate the maximum number of values that can be assigned to each bucket. | 183 | 2 | auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets); | 184 | | | 185 | | // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional | 186 | | // allocations when inserting buckets. | 187 | 2 | buckets.clear(); | 188 | 2 | buckets.reserve(max_num_buckets); | 189 | | | 190 | | // Initialize bucket variables. | 191 | 2 | size_t distinct_values_count = 0; | 192 | 2 | size_t values_count = 0; | 193 | 2 | size_t cumulative_values = 0; | 194 | | | 195 | | // Record how many values still need to be assigned. | 196 | 2 | auto remaining_distinct_values = ordered_map.size(); | 197 | | | 198 | 2 | auto it = ordered_map.begin(); | 199 | | | 200 | | // Lower value of the current bucket. | 201 | 2 | const T* lower_value = &it->first; | 202 | | | 203 | | // Iterate over the ordered map of distinct values and their counts. | 204 | 2.00k | for (; it != ordered_map.end(); ++it) { | 205 | 2.00k | const auto count = it->second; | 206 | 2.00k | const auto current_value = it->first; | 207 | | | 208 | | // Update the bucket counts and track the number of distinct values assigned. | 209 | 2.00k | distinct_values_count++; | 210 | 2.00k | remaining_distinct_values--; | 211 | 2.00k | values_count += count; | 212 | 2.00k | cumulative_values += count; | 213 | | | 214 | | // Check whether the current value should be added to the current bucket. | 215 | 2.00k | auto next = std::next(it); | 216 | 2.00k | size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1; | 217 | | | 218 | 2.00k | if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets && | 219 | 2.00k | values_count + next->second <= bucket_max_values) { | 220 | | // If the current value is the last in the input map and there are more remaining | 221 | | // distinct values than empty buckets and adding the value does not cause the bucket | 222 | | // to exceed its max size, skip adding the value to the current bucket. | 223 | 1.99k | continue; | 224 | 1.99k | } | 225 | | | 226 | | // Finalize the current bucket and add it to our collection of buckets. | 227 | 10 | auto pre_sum = cumulative_values - values_count; | 228 | | | 229 | 10 | Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count, | 230 | 10 | pre_sum); | 231 | 10 | buckets.push_back(new_bucket); | 232 | | | 233 | | // Reset variables for the next bucket. | 234 | 10 | if (next != ordered_map.end()) { | 235 | 8 | lower_value = &next->first; | 236 | 8 | } | 237 | 10 | values_count = 0; | 238 | 10 | distinct_values_count = 0; | 239 | 10 | } | 240 | | | 241 | 2 | return true; | 242 | 4 | } |
_ZN5doris15build_histogramINS_11DateV2ValueINS_15DateV2ValueTypeEEEEEbRSt6vectorINS_6BucketIT_EESaIS7_EERKSt3mapIS6_mSt4lessIS6_ESaISt4pairIKS6_mEEEm Line | Count | Source | 176 | 2 | const size_t max_num_buckets) { | 177 | | // If the input map is empty, there is nothing to build. | 178 | 2 | if (ordered_map.empty()) { | 179 | 0 | return false; | 180 | 0 | } | 181 | | | 182 | | // Calculate the maximum number of values that can be assigned to each bucket. | 183 | 2 | auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets); | 184 | | | 185 | | // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional | 186 | | // allocations when inserting buckets. | 187 | 2 | buckets.clear(); | 188 | 2 | buckets.reserve(max_num_buckets); | 189 | | | 190 | | // Initialize bucket variables. | 191 | 2 | size_t distinct_values_count = 0; | 192 | 2 | size_t values_count = 0; | 193 | 2 | size_t cumulative_values = 0; | 194 | | | 195 | | // Record how many values still need to be assigned. | 196 | 2 | auto remaining_distinct_values = ordered_map.size(); | 197 | | | 198 | 2 | auto it = ordered_map.begin(); | 199 | | | 200 | | // Lower value of the current bucket. | 201 | 2 | const T* lower_value = &it->first; | 202 | | | 203 | | // Iterate over the ordered map of distinct values and their counts. | 204 | 202 | for (; it != ordered_map.end(); ++it) { | 205 | 200 | const auto count = it->second; | 206 | 200 | const auto current_value = it->first; | 207 | | | 208 | | // Update the bucket counts and track the number of distinct values assigned. | 209 | 200 | distinct_values_count++; | 210 | 200 | remaining_distinct_values--; | 211 | 200 | values_count += count; | 212 | 200 | cumulative_values += count; | 213 | | | 214 | | // Check whether the current value should be added to the current bucket. | 215 | 200 | auto next = std::next(it); | 216 | 200 | size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1; | 217 | | | 218 | 200 | if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets && | 219 | 200 | values_count + next->second <= bucket_max_values) { | 220 | | // If the current value is the last in the input map and there are more remaining | 221 | | // distinct values than empty buckets and adding the value does not cause the bucket | 222 | | // to exceed its max size, skip adding the value to the current bucket. | 223 | 190 | continue; | 224 | 190 | } | 225 | | | 226 | | // Finalize the current bucket and add it to our collection of buckets. | 227 | 10 | auto pre_sum = cumulative_values - values_count; | 228 | | | 229 | 10 | Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count, | 230 | 10 | pre_sum); | 231 | 10 | buckets.push_back(new_bucket); | 232 | | | 233 | | // Reset variables for the next bucket. | 234 | 10 | if (next != ordered_map.end()) { | 235 | 8 | lower_value = &next->first; | 236 | 8 | } | 237 | 10 | values_count = 0; | 238 | 10 | distinct_values_count = 0; | 239 | 10 | } | 240 | | | 241 | 2 | return true; | 242 | 2 | } |
_ZN5doris15build_histogramINS_11DateV2ValueINS_19DateTimeV2ValueTypeEEEEEbRSt6vectorINS_6BucketIT_EESaIS7_EERKSt3mapIS6_mSt4lessIS6_ESaISt4pairIKS6_mEEEm Line | Count | Source | 176 | 2 | const size_t max_num_buckets) { | 177 | | // If the input map is empty, there is nothing to build. | 178 | 2 | if (ordered_map.empty()) { | 179 | 0 | return false; | 180 | 0 | } | 181 | | | 182 | | // Calculate the maximum number of values that can be assigned to each bucket. | 183 | 2 | auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets); | 184 | | | 185 | | // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional | 186 | | // allocations when inserting buckets. | 187 | 2 | buckets.clear(); | 188 | 2 | buckets.reserve(max_num_buckets); | 189 | | | 190 | | // Initialize bucket variables. | 191 | 2 | size_t distinct_values_count = 0; | 192 | 2 | size_t values_count = 0; | 193 | 2 | size_t cumulative_values = 0; | 194 | | | 195 | | // Record how many values still need to be assigned. | 196 | 2 | auto remaining_distinct_values = ordered_map.size(); | 197 | | | 198 | 2 | auto it = ordered_map.begin(); | 199 | | | 200 | | // Lower value of the current bucket. | 201 | 2 | const T* lower_value = &it->first; | 202 | | | 203 | | // Iterate over the ordered map of distinct values and their counts. | 204 | 202 | for (; it != ordered_map.end(); ++it) { | 205 | 200 | const auto count = it->second; | 206 | 200 | const auto current_value = it->first; | 207 | | | 208 | | // Update the bucket counts and track the number of distinct values assigned. | 209 | 200 | distinct_values_count++; | 210 | 200 | remaining_distinct_values--; | 211 | 200 | values_count += count; | 212 | 200 | cumulative_values += count; | 213 | | | 214 | | // Check whether the current value should be added to the current bucket. | 215 | 200 | auto next = std::next(it); | 216 | 200 | size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1; | 217 | | | 218 | 200 | if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets && | 219 | 200 | values_count + next->second <= bucket_max_values) { | 220 | | // If the current value is the last in the input map and there are more remaining | 221 | | // distinct values than empty buckets and adding the value does not cause the bucket | 222 | | // to exceed its max size, skip adding the value to the current bucket. | 223 | 190 | continue; | 224 | 190 | } | 225 | | | 226 | | // Finalize the current bucket and add it to our collection of buckets. | 227 | 10 | auto pre_sum = cumulative_values - values_count; | 228 | | | 229 | 10 | Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count, | 230 | 10 | pre_sum); | 231 | 10 | buckets.push_back(new_bucket); | 232 | | | 233 | | // Reset variables for the next bucket. | 234 | 10 | if (next != ordered_map.end()) { | 235 | 8 | lower_value = &next->first; | 236 | 8 | } | 237 | 10 | values_count = 0; | 238 | 10 | distinct_values_count = 0; | 239 | 10 | } | 240 | | | 241 | 2 | return true; | 242 | 2 | } |
|
243 | | |
244 | | template <typename T> |
245 | | bool histogram_to_json(rapidjson::StringBuffer& buffer, const std::vector<Bucket<T>>& buckets, |
246 | 38 | const DataTypePtr& data_type) { |
247 | 38 | rapidjson::Document doc; |
248 | 38 | doc.SetObject(); |
249 | 38 | rapidjson::Document::AllocatorType& allocator = doc.GetAllocator(); |
250 | | |
251 | 38 | int num_buckets = cast_set<int>(buckets.size()); |
252 | 38 | doc.AddMember("num_buckets", num_buckets, allocator); |
253 | | |
254 | 38 | rapidjson::Value bucket_arr(rapidjson::kArrayType); |
255 | 38 | bucket_arr.Reserve(num_buckets, allocator); |
256 | | |
257 | 38 | std::stringstream ss1; |
258 | 38 | std::stringstream ss2; |
259 | | |
260 | 38 | rapidjson::Value lower_val; |
261 | 38 | rapidjson::Value upper_val; |
262 | | |
263 | | // Convert bucket's lower and upper to 2 columns |
264 | 38 | MutableColumnPtr lower_column = data_type->create_column(); |
265 | 38 | MutableColumnPtr upper_column = data_type->create_column(); |
266 | 102 | for (const auto& bucket : buckets) { |
267 | | // String type is different, it has to pass in length |
268 | | // if it is string type , directly use string value |
269 | 102 | if constexpr (!std::is_same_v<T, std::string>) { |
270 | 92 | lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0); |
271 | 92 | upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0); |
272 | 92 | } |
273 | 102 | } |
274 | 38 | size_t row_num = 0; |
275 | | |
276 | 38 | auto format_options = DataTypeSerDe::get_default_format_options(); |
277 | 38 | auto time_zone = cctz::utc_time_zone(); |
278 | 38 | format_options.timezone = &time_zone; |
279 | | |
280 | 102 | for (const auto& bucket : buckets) { |
281 | 102 | if constexpr (std::is_same_v<T, std::string>) { |
282 | 10 | lower_val.SetString(bucket.lower.data(), |
283 | 10 | static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator); |
284 | 10 | upper_val.SetString(bucket.upper.data(), |
285 | 10 | static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator); |
286 | 92 | } else { |
287 | 92 | std::string lower_str = data_type->to_string(*lower_column, row_num, format_options); |
288 | 92 | std::string upper_str = data_type->to_string(*upper_column, row_num, format_options); |
289 | 92 | ++row_num; |
290 | 92 | lower_val.SetString(lower_str.data(), |
291 | 92 | static_cast<rapidjson::SizeType>(lower_str.size()), allocator); |
292 | 92 | upper_val.SetString(upper_str.data(), |
293 | 92 | static_cast<rapidjson::SizeType>(upper_str.size()), allocator); |
294 | 92 | } |
295 | 102 | rapidjson::Value bucket_json(rapidjson::kObjectType); |
296 | 102 | bucket_json.AddMember("lower", lower_val, allocator); |
297 | 102 | bucket_json.AddMember("upper", upper_val, allocator); |
298 | 102 | bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator); |
299 | 102 | bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator); |
300 | 102 | bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator); |
301 | | |
302 | 102 | bucket_arr.PushBack(bucket_json, allocator); |
303 | 102 | } |
304 | | |
305 | 38 | doc.AddMember("buckets", bucket_arr, allocator); |
306 | 38 | rapidjson::Writer<rapidjson::StringBuffer> writer(buffer); |
307 | 38 | doc.Accept(writer); |
308 | | |
309 | 38 | return !buckets.empty() && buffer.GetSize() > 0; |
310 | 38 | } _ZN5doris17histogram_to_jsonIiEEbRN9rapidjson19GenericStringBufferINS1_4UTF8IcEENS1_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISB_EERKSt10shared_ptrIKNS_9IDataTypeEE Line | Count | Source | 246 | 6 | const DataTypePtr& data_type) { | 247 | 6 | rapidjson::Document doc; | 248 | 6 | doc.SetObject(); | 249 | 6 | rapidjson::Document::AllocatorType& allocator = doc.GetAllocator(); | 250 | | | 251 | 6 | int num_buckets = cast_set<int>(buckets.size()); | 252 | 6 | doc.AddMember("num_buckets", num_buckets, allocator); | 253 | | | 254 | 6 | rapidjson::Value bucket_arr(rapidjson::kArrayType); | 255 | 6 | bucket_arr.Reserve(num_buckets, allocator); | 256 | | | 257 | 6 | std::stringstream ss1; | 258 | 6 | std::stringstream ss2; | 259 | | | 260 | 6 | rapidjson::Value lower_val; | 261 | 6 | rapidjson::Value upper_val; | 262 | | | 263 | | // Convert bucket's lower and upper to 2 columns | 264 | 6 | MutableColumnPtr lower_column = data_type->create_column(); | 265 | 6 | MutableColumnPtr upper_column = data_type->create_column(); | 266 | 12 | for (const auto& bucket : buckets) { | 267 | | // String type is different, it has to pass in length | 268 | | // if it is string type , directly use string value | 269 | 12 | if constexpr (!std::is_same_v<T, std::string>) { | 270 | 12 | lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0); | 271 | 12 | upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0); | 272 | 12 | } | 273 | 12 | } | 274 | 6 | size_t row_num = 0; | 275 | | | 276 | 6 | auto format_options = DataTypeSerDe::get_default_format_options(); | 277 | 6 | auto time_zone = cctz::utc_time_zone(); | 278 | 6 | format_options.timezone = &time_zone; | 279 | | | 280 | 12 | for (const auto& bucket : buckets) { | 281 | | if constexpr (std::is_same_v<T, std::string>) { | 282 | | lower_val.SetString(bucket.lower.data(), | 283 | | static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator); | 284 | | upper_val.SetString(bucket.upper.data(), | 285 | | static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator); | 286 | 12 | } else { | 287 | 12 | std::string lower_str = data_type->to_string(*lower_column, row_num, format_options); | 288 | 12 | std::string upper_str = data_type->to_string(*upper_column, row_num, format_options); | 289 | 12 | ++row_num; | 290 | 12 | lower_val.SetString(lower_str.data(), | 291 | 12 | static_cast<rapidjson::SizeType>(lower_str.size()), allocator); | 292 | 12 | upper_val.SetString(upper_str.data(), | 293 | 12 | static_cast<rapidjson::SizeType>(upper_str.size()), allocator); | 294 | 12 | } | 295 | 12 | rapidjson::Value bucket_json(rapidjson::kObjectType); | 296 | 12 | bucket_json.AddMember("lower", lower_val, allocator); | 297 | 12 | bucket_json.AddMember("upper", upper_val, allocator); | 298 | 12 | bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator); | 299 | 12 | bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator); | 300 | 12 | bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator); | 301 | | | 302 | 12 | bucket_arr.PushBack(bucket_json, allocator); | 303 | 12 | } | 304 | | | 305 | 6 | doc.AddMember("buckets", bucket_arr, allocator); | 306 | 6 | rapidjson::Writer<rapidjson::StringBuffer> writer(buffer); | 307 | 6 | doc.Accept(writer); | 308 | | | 309 | 6 | return !buckets.empty() && buffer.GetSize() > 0; | 310 | 6 | } |
Unexecuted instantiation: _ZN5doris17histogram_to_jsonIhEEbRN9rapidjson19GenericStringBufferINS1_4UTF8IcEENS1_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISB_EERKSt10shared_ptrIKNS_9IDataTypeEE _ZN5doris17histogram_to_jsonIaEEbRN9rapidjson19GenericStringBufferINS1_4UTF8IcEENS1_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISB_EERKSt10shared_ptrIKNS_9IDataTypeEE Line | Count | Source | 246 | 4 | const DataTypePtr& data_type) { | 247 | 4 | rapidjson::Document doc; | 248 | 4 | doc.SetObject(); | 249 | 4 | rapidjson::Document::AllocatorType& allocator = doc.GetAllocator(); | 250 | | | 251 | 4 | int num_buckets = cast_set<int>(buckets.size()); | 252 | 4 | doc.AddMember("num_buckets", num_buckets, allocator); | 253 | | | 254 | 4 | rapidjson::Value bucket_arr(rapidjson::kArrayType); | 255 | 4 | bucket_arr.Reserve(num_buckets, allocator); | 256 | | | 257 | 4 | std::stringstream ss1; | 258 | 4 | std::stringstream ss2; | 259 | | | 260 | 4 | rapidjson::Value lower_val; | 261 | 4 | rapidjson::Value upper_val; | 262 | | | 263 | | // Convert bucket's lower and upper to 2 columns | 264 | 4 | MutableColumnPtr lower_column = data_type->create_column(); | 265 | 4 | MutableColumnPtr upper_column = data_type->create_column(); | 266 | 10 | for (const auto& bucket : buckets) { | 267 | | // String type is different, it has to pass in length | 268 | | // if it is string type , directly use string value | 269 | 10 | if constexpr (!std::is_same_v<T, std::string>) { | 270 | 10 | lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0); | 271 | 10 | upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0); | 272 | 10 | } | 273 | 10 | } | 274 | 4 | size_t row_num = 0; | 275 | | | 276 | 4 | auto format_options = DataTypeSerDe::get_default_format_options(); | 277 | 4 | auto time_zone = cctz::utc_time_zone(); | 278 | 4 | format_options.timezone = &time_zone; | 279 | | | 280 | 10 | for (const auto& bucket : buckets) { | 281 | | if constexpr (std::is_same_v<T, std::string>) { | 282 | | lower_val.SetString(bucket.lower.data(), | 283 | | static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator); | 284 | | upper_val.SetString(bucket.upper.data(), | 285 | | static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator); | 286 | 10 | } else { | 287 | 10 | std::string lower_str = data_type->to_string(*lower_column, row_num, format_options); | 288 | 10 | std::string upper_str = data_type->to_string(*upper_column, row_num, format_options); | 289 | 10 | ++row_num; | 290 | 10 | lower_val.SetString(lower_str.data(), | 291 | 10 | static_cast<rapidjson::SizeType>(lower_str.size()), allocator); | 292 | 10 | upper_val.SetString(upper_str.data(), | 293 | 10 | static_cast<rapidjson::SizeType>(upper_str.size()), allocator); | 294 | 10 | } | 295 | 10 | rapidjson::Value bucket_json(rapidjson::kObjectType); | 296 | 10 | bucket_json.AddMember("lower", lower_val, allocator); | 297 | 10 | bucket_json.AddMember("upper", upper_val, allocator); | 298 | 10 | bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator); | 299 | 10 | bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator); | 300 | 10 | bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator); | 301 | | | 302 | 10 | bucket_arr.PushBack(bucket_json, allocator); | 303 | 10 | } | 304 | | | 305 | 4 | doc.AddMember("buckets", bucket_arr, allocator); | 306 | 4 | rapidjson::Writer<rapidjson::StringBuffer> writer(buffer); | 307 | 4 | doc.Accept(writer); | 308 | | | 309 | 4 | return !buckets.empty() && buffer.GetSize() > 0; | 310 | 4 | } |
_ZN5doris17histogram_to_jsonIsEEbRN9rapidjson19GenericStringBufferINS1_4UTF8IcEENS1_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISB_EERKSt10shared_ptrIKNS_9IDataTypeEE Line | Count | Source | 246 | 4 | const DataTypePtr& data_type) { | 247 | 4 | rapidjson::Document doc; | 248 | 4 | doc.SetObject(); | 249 | 4 | rapidjson::Document::AllocatorType& allocator = doc.GetAllocator(); | 250 | | | 251 | 4 | int num_buckets = cast_set<int>(buckets.size()); | 252 | 4 | doc.AddMember("num_buckets", num_buckets, allocator); | 253 | | | 254 | 4 | rapidjson::Value bucket_arr(rapidjson::kArrayType); | 255 | 4 | bucket_arr.Reserve(num_buckets, allocator); | 256 | | | 257 | 4 | std::stringstream ss1; | 258 | 4 | std::stringstream ss2; | 259 | | | 260 | 4 | rapidjson::Value lower_val; | 261 | 4 | rapidjson::Value upper_val; | 262 | | | 263 | | // Convert bucket's lower and upper to 2 columns | 264 | 4 | MutableColumnPtr lower_column = data_type->create_column(); | 265 | 4 | MutableColumnPtr upper_column = data_type->create_column(); | 266 | 10 | for (const auto& bucket : buckets) { | 267 | | // String type is different, it has to pass in length | 268 | | // if it is string type , directly use string value | 269 | 10 | if constexpr (!std::is_same_v<T, std::string>) { | 270 | 10 | lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0); | 271 | 10 | upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0); | 272 | 10 | } | 273 | 10 | } | 274 | 4 | size_t row_num = 0; | 275 | | | 276 | 4 | auto format_options = DataTypeSerDe::get_default_format_options(); | 277 | 4 | auto time_zone = cctz::utc_time_zone(); | 278 | 4 | format_options.timezone = &time_zone; | 279 | | | 280 | 10 | for (const auto& bucket : buckets) { | 281 | | if constexpr (std::is_same_v<T, std::string>) { | 282 | | lower_val.SetString(bucket.lower.data(), | 283 | | static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator); | 284 | | upper_val.SetString(bucket.upper.data(), | 285 | | static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator); | 286 | 10 | } else { | 287 | 10 | std::string lower_str = data_type->to_string(*lower_column, row_num, format_options); | 288 | 10 | std::string upper_str = data_type->to_string(*upper_column, row_num, format_options); | 289 | 10 | ++row_num; | 290 | 10 | lower_val.SetString(lower_str.data(), | 291 | 10 | static_cast<rapidjson::SizeType>(lower_str.size()), allocator); | 292 | 10 | upper_val.SetString(upper_str.data(), | 293 | 10 | static_cast<rapidjson::SizeType>(upper_str.size()), allocator); | 294 | 10 | } | 295 | 10 | rapidjson::Value bucket_json(rapidjson::kObjectType); | 296 | 10 | bucket_json.AddMember("lower", lower_val, allocator); | 297 | 10 | bucket_json.AddMember("upper", upper_val, allocator); | 298 | 10 | bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator); | 299 | 10 | bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator); | 300 | 10 | bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator); | 301 | | | 302 | 10 | bucket_arr.PushBack(bucket_json, allocator); | 303 | 10 | } | 304 | | | 305 | 4 | doc.AddMember("buckets", bucket_arr, allocator); | 306 | 4 | rapidjson::Writer<rapidjson::StringBuffer> writer(buffer); | 307 | 4 | doc.Accept(writer); | 308 | | | 309 | 4 | return !buckets.empty() && buffer.GetSize() > 0; | 310 | 4 | } |
_ZN5doris17histogram_to_jsonIlEEbRN9rapidjson19GenericStringBufferINS1_4UTF8IcEENS1_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISB_EERKSt10shared_ptrIKNS_9IDataTypeEE Line | Count | Source | 246 | 4 | const DataTypePtr& data_type) { | 247 | 4 | rapidjson::Document doc; | 248 | 4 | doc.SetObject(); | 249 | 4 | rapidjson::Document::AllocatorType& allocator = doc.GetAllocator(); | 250 | | | 251 | 4 | int num_buckets = cast_set<int>(buckets.size()); | 252 | 4 | doc.AddMember("num_buckets", num_buckets, allocator); | 253 | | | 254 | 4 | rapidjson::Value bucket_arr(rapidjson::kArrayType); | 255 | 4 | bucket_arr.Reserve(num_buckets, allocator); | 256 | | | 257 | 4 | std::stringstream ss1; | 258 | 4 | std::stringstream ss2; | 259 | | | 260 | 4 | rapidjson::Value lower_val; | 261 | 4 | rapidjson::Value upper_val; | 262 | | | 263 | | // Convert bucket's lower and upper to 2 columns | 264 | 4 | MutableColumnPtr lower_column = data_type->create_column(); | 265 | 4 | MutableColumnPtr upper_column = data_type->create_column(); | 266 | 10 | for (const auto& bucket : buckets) { | 267 | | // String type is different, it has to pass in length | 268 | | // if it is string type , directly use string value | 269 | 10 | if constexpr (!std::is_same_v<T, std::string>) { | 270 | 10 | lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0); | 271 | 10 | upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0); | 272 | 10 | } | 273 | 10 | } | 274 | 4 | size_t row_num = 0; | 275 | | | 276 | 4 | auto format_options = DataTypeSerDe::get_default_format_options(); | 277 | 4 | auto time_zone = cctz::utc_time_zone(); | 278 | 4 | format_options.timezone = &time_zone; | 279 | | | 280 | 10 | for (const auto& bucket : buckets) { | 281 | | if constexpr (std::is_same_v<T, std::string>) { | 282 | | lower_val.SetString(bucket.lower.data(), | 283 | | static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator); | 284 | | upper_val.SetString(bucket.upper.data(), | 285 | | static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator); | 286 | 10 | } else { | 287 | 10 | std::string lower_str = data_type->to_string(*lower_column, row_num, format_options); | 288 | 10 | std::string upper_str = data_type->to_string(*upper_column, row_num, format_options); | 289 | 10 | ++row_num; | 290 | 10 | lower_val.SetString(lower_str.data(), | 291 | 10 | static_cast<rapidjson::SizeType>(lower_str.size()), allocator); | 292 | 10 | upper_val.SetString(upper_str.data(), | 293 | 10 | static_cast<rapidjson::SizeType>(upper_str.size()), allocator); | 294 | 10 | } | 295 | 10 | rapidjson::Value bucket_json(rapidjson::kObjectType); | 296 | 10 | bucket_json.AddMember("lower", lower_val, allocator); | 297 | 10 | bucket_json.AddMember("upper", upper_val, allocator); | 298 | 10 | bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator); | 299 | 10 | bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator); | 300 | 10 | bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator); | 301 | | | 302 | 10 | bucket_arr.PushBack(bucket_json, allocator); | 303 | 10 | } | 304 | | | 305 | 4 | doc.AddMember("buckets", bucket_arr, allocator); | 306 | 4 | rapidjson::Writer<rapidjson::StringBuffer> writer(buffer); | 307 | 4 | doc.Accept(writer); | 308 | | | 309 | 4 | return !buckets.empty() && buffer.GetSize() > 0; | 310 | 4 | } |
_ZN5doris17histogram_to_jsonInEEbRN9rapidjson19GenericStringBufferINS1_4UTF8IcEENS1_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISB_EERKSt10shared_ptrIKNS_9IDataTypeEE Line | Count | Source | 246 | 4 | const DataTypePtr& data_type) { | 247 | 4 | rapidjson::Document doc; | 248 | 4 | doc.SetObject(); | 249 | 4 | rapidjson::Document::AllocatorType& allocator = doc.GetAllocator(); | 250 | | | 251 | 4 | int num_buckets = cast_set<int>(buckets.size()); | 252 | 4 | doc.AddMember("num_buckets", num_buckets, allocator); | 253 | | | 254 | 4 | rapidjson::Value bucket_arr(rapidjson::kArrayType); | 255 | 4 | bucket_arr.Reserve(num_buckets, allocator); | 256 | | | 257 | 4 | std::stringstream ss1; | 258 | 4 | std::stringstream ss2; | 259 | | | 260 | 4 | rapidjson::Value lower_val; | 261 | 4 | rapidjson::Value upper_val; | 262 | | | 263 | | // Convert bucket's lower and upper to 2 columns | 264 | 4 | MutableColumnPtr lower_column = data_type->create_column(); | 265 | 4 | MutableColumnPtr upper_column = data_type->create_column(); | 266 | 10 | for (const auto& bucket : buckets) { | 267 | | // String type is different, it has to pass in length | 268 | | // if it is string type , directly use string value | 269 | 10 | if constexpr (!std::is_same_v<T, std::string>) { | 270 | 10 | lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0); | 271 | 10 | upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0); | 272 | 10 | } | 273 | 10 | } | 274 | 4 | size_t row_num = 0; | 275 | | | 276 | 4 | auto format_options = DataTypeSerDe::get_default_format_options(); | 277 | 4 | auto time_zone = cctz::utc_time_zone(); | 278 | 4 | format_options.timezone = &time_zone; | 279 | | | 280 | 10 | for (const auto& bucket : buckets) { | 281 | | if constexpr (std::is_same_v<T, std::string>) { | 282 | | lower_val.SetString(bucket.lower.data(), | 283 | | static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator); | 284 | | upper_val.SetString(bucket.upper.data(), | 285 | | static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator); | 286 | 10 | } else { | 287 | 10 | std::string lower_str = data_type->to_string(*lower_column, row_num, format_options); | 288 | 10 | std::string upper_str = data_type->to_string(*upper_column, row_num, format_options); | 289 | 10 | ++row_num; | 290 | 10 | lower_val.SetString(lower_str.data(), | 291 | 10 | static_cast<rapidjson::SizeType>(lower_str.size()), allocator); | 292 | 10 | upper_val.SetString(upper_str.data(), | 293 | 10 | static_cast<rapidjson::SizeType>(upper_str.size()), allocator); | 294 | 10 | } | 295 | 10 | rapidjson::Value bucket_json(rapidjson::kObjectType); | 296 | 10 | bucket_json.AddMember("lower", lower_val, allocator); | 297 | 10 | bucket_json.AddMember("upper", upper_val, allocator); | 298 | 10 | bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator); | 299 | 10 | bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator); | 300 | 10 | bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator); | 301 | | | 302 | 10 | bucket_arr.PushBack(bucket_json, allocator); | 303 | 10 | } | 304 | | | 305 | 4 | doc.AddMember("buckets", bucket_arr, allocator); | 306 | 4 | rapidjson::Writer<rapidjson::StringBuffer> writer(buffer); | 307 | 4 | doc.Accept(writer); | 308 | | | 309 | 4 | return !buckets.empty() && buffer.GetSize() > 0; | 310 | 4 | } |
_ZN5doris17histogram_to_jsonIfEEbRN9rapidjson19GenericStringBufferINS1_4UTF8IcEENS1_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISB_EERKSt10shared_ptrIKNS_9IDataTypeEE Line | Count | Source | 246 | 4 | const DataTypePtr& data_type) { | 247 | 4 | rapidjson::Document doc; | 248 | 4 | doc.SetObject(); | 249 | 4 | rapidjson::Document::AllocatorType& allocator = doc.GetAllocator(); | 250 | | | 251 | 4 | int num_buckets = cast_set<int>(buckets.size()); | 252 | 4 | doc.AddMember("num_buckets", num_buckets, allocator); | 253 | | | 254 | 4 | rapidjson::Value bucket_arr(rapidjson::kArrayType); | 255 | 4 | bucket_arr.Reserve(num_buckets, allocator); | 256 | | | 257 | 4 | std::stringstream ss1; | 258 | 4 | std::stringstream ss2; | 259 | | | 260 | 4 | rapidjson::Value lower_val; | 261 | 4 | rapidjson::Value upper_val; | 262 | | | 263 | | // Convert bucket's lower and upper to 2 columns | 264 | 4 | MutableColumnPtr lower_column = data_type->create_column(); | 265 | 4 | MutableColumnPtr upper_column = data_type->create_column(); | 266 | 10 | for (const auto& bucket : buckets) { | 267 | | // String type is different, it has to pass in length | 268 | | // if it is string type , directly use string value | 269 | 10 | if constexpr (!std::is_same_v<T, std::string>) { | 270 | 10 | lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0); | 271 | 10 | upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0); | 272 | 10 | } | 273 | 10 | } | 274 | 4 | size_t row_num = 0; | 275 | | | 276 | 4 | auto format_options = DataTypeSerDe::get_default_format_options(); | 277 | 4 | auto time_zone = cctz::utc_time_zone(); | 278 | 4 | format_options.timezone = &time_zone; | 279 | | | 280 | 10 | for (const auto& bucket : buckets) { | 281 | | if constexpr (std::is_same_v<T, std::string>) { | 282 | | lower_val.SetString(bucket.lower.data(), | 283 | | static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator); | 284 | | upper_val.SetString(bucket.upper.data(), | 285 | | static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator); | 286 | 10 | } else { | 287 | 10 | std::string lower_str = data_type->to_string(*lower_column, row_num, format_options); | 288 | 10 | std::string upper_str = data_type->to_string(*upper_column, row_num, format_options); | 289 | 10 | ++row_num; | 290 | 10 | lower_val.SetString(lower_str.data(), | 291 | 10 | static_cast<rapidjson::SizeType>(lower_str.size()), allocator); | 292 | 10 | upper_val.SetString(upper_str.data(), | 293 | 10 | static_cast<rapidjson::SizeType>(upper_str.size()), allocator); | 294 | 10 | } | 295 | 10 | rapidjson::Value bucket_json(rapidjson::kObjectType); | 296 | 10 | bucket_json.AddMember("lower", lower_val, allocator); | 297 | 10 | bucket_json.AddMember("upper", upper_val, allocator); | 298 | 10 | bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator); | 299 | 10 | bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator); | 300 | 10 | bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator); | 301 | | | 302 | 10 | bucket_arr.PushBack(bucket_json, allocator); | 303 | 10 | } | 304 | | | 305 | 4 | doc.AddMember("buckets", bucket_arr, allocator); | 306 | 4 | rapidjson::Writer<rapidjson::StringBuffer> writer(buffer); | 307 | 4 | doc.Accept(writer); | 308 | | | 309 | 4 | return !buckets.empty() && buffer.GetSize() > 0; | 310 | 4 | } |
_ZN5doris17histogram_to_jsonIdEEbRN9rapidjson19GenericStringBufferINS1_4UTF8IcEENS1_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISB_EERKSt10shared_ptrIKNS_9IDataTypeEE Line | Count | Source | 246 | 4 | const DataTypePtr& data_type) { | 247 | 4 | rapidjson::Document doc; | 248 | 4 | doc.SetObject(); | 249 | 4 | rapidjson::Document::AllocatorType& allocator = doc.GetAllocator(); | 250 | | | 251 | 4 | int num_buckets = cast_set<int>(buckets.size()); | 252 | 4 | doc.AddMember("num_buckets", num_buckets, allocator); | 253 | | | 254 | 4 | rapidjson::Value bucket_arr(rapidjson::kArrayType); | 255 | 4 | bucket_arr.Reserve(num_buckets, allocator); | 256 | | | 257 | 4 | std::stringstream ss1; | 258 | 4 | std::stringstream ss2; | 259 | | | 260 | 4 | rapidjson::Value lower_val; | 261 | 4 | rapidjson::Value upper_val; | 262 | | | 263 | | // Convert bucket's lower and upper to 2 columns | 264 | 4 | MutableColumnPtr lower_column = data_type->create_column(); | 265 | 4 | MutableColumnPtr upper_column = data_type->create_column(); | 266 | 10 | for (const auto& bucket : buckets) { | 267 | | // String type is different, it has to pass in length | 268 | | // if it is string type , directly use string value | 269 | 10 | if constexpr (!std::is_same_v<T, std::string>) { | 270 | 10 | lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0); | 271 | 10 | upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0); | 272 | 10 | } | 273 | 10 | } | 274 | 4 | size_t row_num = 0; | 275 | | | 276 | 4 | auto format_options = DataTypeSerDe::get_default_format_options(); | 277 | 4 | auto time_zone = cctz::utc_time_zone(); | 278 | 4 | format_options.timezone = &time_zone; | 279 | | | 280 | 10 | for (const auto& bucket : buckets) { | 281 | | if constexpr (std::is_same_v<T, std::string>) { | 282 | | lower_val.SetString(bucket.lower.data(), | 283 | | static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator); | 284 | | upper_val.SetString(bucket.upper.data(), | 285 | | static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator); | 286 | 10 | } else { | 287 | 10 | std::string lower_str = data_type->to_string(*lower_column, row_num, format_options); | 288 | 10 | std::string upper_str = data_type->to_string(*upper_column, row_num, format_options); | 289 | 10 | ++row_num; | 290 | 10 | lower_val.SetString(lower_str.data(), | 291 | 10 | static_cast<rapidjson::SizeType>(lower_str.size()), allocator); | 292 | 10 | upper_val.SetString(upper_str.data(), | 293 | 10 | static_cast<rapidjson::SizeType>(upper_str.size()), allocator); | 294 | 10 | } | 295 | 10 | rapidjson::Value bucket_json(rapidjson::kObjectType); | 296 | 10 | bucket_json.AddMember("lower", lower_val, allocator); | 297 | 10 | bucket_json.AddMember("upper", upper_val, allocator); | 298 | 10 | bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator); | 299 | 10 | bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator); | 300 | 10 | bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator); | 301 | | | 302 | 10 | bucket_arr.PushBack(bucket_json, allocator); | 303 | 10 | } | 304 | | | 305 | 4 | doc.AddMember("buckets", bucket_arr, allocator); | 306 | 4 | rapidjson::Writer<rapidjson::StringBuffer> writer(buffer); | 307 | 4 | doc.Accept(writer); | 308 | | | 309 | 4 | return !buckets.empty() && buffer.GetSize() > 0; | 310 | 4 | } |
Unexecuted instantiation: _ZN5doris17histogram_to_jsonINS_7DecimalIiEEEEbRN9rapidjson19GenericStringBufferINS3_4UTF8IcEENS3_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISD_EERKSt10shared_ptrIKNS_9IDataTypeEE Unexecuted instantiation: _ZN5doris17histogram_to_jsonINS_7DecimalIlEEEEbRN9rapidjson19GenericStringBufferINS3_4UTF8IcEENS3_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISD_EERKSt10shared_ptrIKNS_9IDataTypeEE Unexecuted instantiation: _ZN5doris17histogram_to_jsonINS_12Decimal128V3EEEbRN9rapidjson19GenericStringBufferINS2_4UTF8IcEENS2_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISC_EERKSt10shared_ptrIKNS_9IDataTypeEE Unexecuted instantiation: _ZN5doris17histogram_to_jsonINS_7DecimalIN4wide7integerILm256EiEEEEEEbRN9rapidjson19GenericStringBufferINS6_4UTF8IcEENS6_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISG_EERKSt10shared_ptrIKNS_9IDataTypeEE _ZN5doris17histogram_to_jsonINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEEbRN9rapidjson19GenericStringBufferINS7_4UTF8IcEENS7_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISH_EERKSt10shared_ptrIKNS_9IDataTypeEE Line | Count | Source | 246 | 4 | const DataTypePtr& data_type) { | 247 | 4 | rapidjson::Document doc; | 248 | 4 | doc.SetObject(); | 249 | 4 | rapidjson::Document::AllocatorType& allocator = doc.GetAllocator(); | 250 | | | 251 | 4 | int num_buckets = cast_set<int>(buckets.size()); | 252 | 4 | doc.AddMember("num_buckets", num_buckets, allocator); | 253 | | | 254 | 4 | rapidjson::Value bucket_arr(rapidjson::kArrayType); | 255 | 4 | bucket_arr.Reserve(num_buckets, allocator); | 256 | | | 257 | 4 | std::stringstream ss1; | 258 | 4 | std::stringstream ss2; | 259 | | | 260 | 4 | rapidjson::Value lower_val; | 261 | 4 | rapidjson::Value upper_val; | 262 | | | 263 | | // Convert bucket's lower and upper to 2 columns | 264 | 4 | MutableColumnPtr lower_column = data_type->create_column(); | 265 | 4 | MutableColumnPtr upper_column = data_type->create_column(); | 266 | 10 | for (const auto& bucket : buckets) { | 267 | | // String type is different, it has to pass in length | 268 | | // if it is string type , directly use string value | 269 | | if constexpr (!std::is_same_v<T, std::string>) { | 270 | | lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0); | 271 | | upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0); | 272 | | } | 273 | 10 | } | 274 | 4 | size_t row_num = 0; | 275 | | | 276 | 4 | auto format_options = DataTypeSerDe::get_default_format_options(); | 277 | 4 | auto time_zone = cctz::utc_time_zone(); | 278 | 4 | format_options.timezone = &time_zone; | 279 | | | 280 | 10 | for (const auto& bucket : buckets) { | 281 | 10 | if constexpr (std::is_same_v<T, std::string>) { | 282 | 10 | lower_val.SetString(bucket.lower.data(), | 283 | 10 | static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator); | 284 | 10 | upper_val.SetString(bucket.upper.data(), | 285 | 10 | static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator); | 286 | | } else { | 287 | | std::string lower_str = data_type->to_string(*lower_column, row_num, format_options); | 288 | | std::string upper_str = data_type->to_string(*upper_column, row_num, format_options); | 289 | | ++row_num; | 290 | | lower_val.SetString(lower_str.data(), | 291 | | static_cast<rapidjson::SizeType>(lower_str.size()), allocator); | 292 | | upper_val.SetString(upper_str.data(), | 293 | | static_cast<rapidjson::SizeType>(upper_str.size()), allocator); | 294 | | } | 295 | 10 | rapidjson::Value bucket_json(rapidjson::kObjectType); | 296 | 10 | bucket_json.AddMember("lower", lower_val, allocator); | 297 | 10 | bucket_json.AddMember("upper", upper_val, allocator); | 298 | 10 | bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator); | 299 | 10 | bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator); | 300 | 10 | bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator); | 301 | | | 302 | 10 | bucket_arr.PushBack(bucket_json, allocator); | 303 | 10 | } | 304 | | | 305 | 4 | doc.AddMember("buckets", bucket_arr, allocator); | 306 | 4 | rapidjson::Writer<rapidjson::StringBuffer> writer(buffer); | 307 | 4 | doc.Accept(writer); | 308 | | | 309 | 4 | return !buckets.empty() && buffer.GetSize() > 0; | 310 | 4 | } |
_ZN5doris17histogram_to_jsonINS_11DateV2ValueINS_15DateV2ValueTypeEEEEEbRN9rapidjson19GenericStringBufferINS4_4UTF8IcEENS4_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISE_EERKSt10shared_ptrIKNS_9IDataTypeEE Line | Count | Source | 246 | 2 | const DataTypePtr& data_type) { | 247 | 2 | rapidjson::Document doc; | 248 | 2 | doc.SetObject(); | 249 | 2 | rapidjson::Document::AllocatorType& allocator = doc.GetAllocator(); | 250 | | | 251 | 2 | int num_buckets = cast_set<int>(buckets.size()); | 252 | 2 | doc.AddMember("num_buckets", num_buckets, allocator); | 253 | | | 254 | 2 | rapidjson::Value bucket_arr(rapidjson::kArrayType); | 255 | 2 | bucket_arr.Reserve(num_buckets, allocator); | 256 | | | 257 | 2 | std::stringstream ss1; | 258 | 2 | std::stringstream ss2; | 259 | | | 260 | 2 | rapidjson::Value lower_val; | 261 | 2 | rapidjson::Value upper_val; | 262 | | | 263 | | // Convert bucket's lower and upper to 2 columns | 264 | 2 | MutableColumnPtr lower_column = data_type->create_column(); | 265 | 2 | MutableColumnPtr upper_column = data_type->create_column(); | 266 | 10 | for (const auto& bucket : buckets) { | 267 | | // String type is different, it has to pass in length | 268 | | // if it is string type , directly use string value | 269 | 10 | if constexpr (!std::is_same_v<T, std::string>) { | 270 | 10 | lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0); | 271 | 10 | upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0); | 272 | 10 | } | 273 | 10 | } | 274 | 2 | size_t row_num = 0; | 275 | | | 276 | 2 | auto format_options = DataTypeSerDe::get_default_format_options(); | 277 | 2 | auto time_zone = cctz::utc_time_zone(); | 278 | 2 | format_options.timezone = &time_zone; | 279 | | | 280 | 10 | for (const auto& bucket : buckets) { | 281 | | if constexpr (std::is_same_v<T, std::string>) { | 282 | | lower_val.SetString(bucket.lower.data(), | 283 | | static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator); | 284 | | upper_val.SetString(bucket.upper.data(), | 285 | | static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator); | 286 | 10 | } else { | 287 | 10 | std::string lower_str = data_type->to_string(*lower_column, row_num, format_options); | 288 | 10 | std::string upper_str = data_type->to_string(*upper_column, row_num, format_options); | 289 | 10 | ++row_num; | 290 | 10 | lower_val.SetString(lower_str.data(), | 291 | 10 | static_cast<rapidjson::SizeType>(lower_str.size()), allocator); | 292 | 10 | upper_val.SetString(upper_str.data(), | 293 | 10 | static_cast<rapidjson::SizeType>(upper_str.size()), allocator); | 294 | 10 | } | 295 | 10 | rapidjson::Value bucket_json(rapidjson::kObjectType); | 296 | 10 | bucket_json.AddMember("lower", lower_val, allocator); | 297 | 10 | bucket_json.AddMember("upper", upper_val, allocator); | 298 | 10 | bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator); | 299 | 10 | bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator); | 300 | 10 | bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator); | 301 | | | 302 | 10 | bucket_arr.PushBack(bucket_json, allocator); | 303 | 10 | } | 304 | | | 305 | 2 | doc.AddMember("buckets", bucket_arr, allocator); | 306 | 2 | rapidjson::Writer<rapidjson::StringBuffer> writer(buffer); | 307 | 2 | doc.Accept(writer); | 308 | | | 309 | 2 | return !buckets.empty() && buffer.GetSize() > 0; | 310 | 2 | } |
_ZN5doris17histogram_to_jsonINS_11DateV2ValueINS_19DateTimeV2ValueTypeEEEEEbRN9rapidjson19GenericStringBufferINS4_4UTF8IcEENS4_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISE_EERKSt10shared_ptrIKNS_9IDataTypeEE Line | Count | Source | 246 | 2 | const DataTypePtr& data_type) { | 247 | 2 | rapidjson::Document doc; | 248 | 2 | doc.SetObject(); | 249 | 2 | rapidjson::Document::AllocatorType& allocator = doc.GetAllocator(); | 250 | | | 251 | 2 | int num_buckets = cast_set<int>(buckets.size()); | 252 | 2 | doc.AddMember("num_buckets", num_buckets, allocator); | 253 | | | 254 | 2 | rapidjson::Value bucket_arr(rapidjson::kArrayType); | 255 | 2 | bucket_arr.Reserve(num_buckets, allocator); | 256 | | | 257 | 2 | std::stringstream ss1; | 258 | 2 | std::stringstream ss2; | 259 | | | 260 | 2 | rapidjson::Value lower_val; | 261 | 2 | rapidjson::Value upper_val; | 262 | | | 263 | | // Convert bucket's lower and upper to 2 columns | 264 | 2 | MutableColumnPtr lower_column = data_type->create_column(); | 265 | 2 | MutableColumnPtr upper_column = data_type->create_column(); | 266 | 10 | for (const auto& bucket : buckets) { | 267 | | // String type is different, it has to pass in length | 268 | | // if it is string type , directly use string value | 269 | 10 | if constexpr (!std::is_same_v<T, std::string>) { | 270 | 10 | lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0); | 271 | 10 | upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0); | 272 | 10 | } | 273 | 10 | } | 274 | 2 | size_t row_num = 0; | 275 | | | 276 | 2 | auto format_options = DataTypeSerDe::get_default_format_options(); | 277 | 2 | auto time_zone = cctz::utc_time_zone(); | 278 | 2 | format_options.timezone = &time_zone; | 279 | | | 280 | 10 | for (const auto& bucket : buckets) { | 281 | | if constexpr (std::is_same_v<T, std::string>) { | 282 | | lower_val.SetString(bucket.lower.data(), | 283 | | static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator); | 284 | | upper_val.SetString(bucket.upper.data(), | 285 | | static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator); | 286 | 10 | } else { | 287 | 10 | std::string lower_str = data_type->to_string(*lower_column, row_num, format_options); | 288 | 10 | std::string upper_str = data_type->to_string(*upper_column, row_num, format_options); | 289 | 10 | ++row_num; | 290 | 10 | lower_val.SetString(lower_str.data(), | 291 | 10 | static_cast<rapidjson::SizeType>(lower_str.size()), allocator); | 292 | 10 | upper_val.SetString(upper_str.data(), | 293 | 10 | static_cast<rapidjson::SizeType>(upper_str.size()), allocator); | 294 | 10 | } | 295 | 10 | rapidjson::Value bucket_json(rapidjson::kObjectType); | 296 | 10 | bucket_json.AddMember("lower", lower_val, allocator); | 297 | 10 | bucket_json.AddMember("upper", upper_val, allocator); | 298 | 10 | bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator); | 299 | 10 | bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator); | 300 | 10 | bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator); | 301 | | | 302 | 10 | bucket_arr.PushBack(bucket_json, allocator); | 303 | 10 | } | 304 | | | 305 | 2 | doc.AddMember("buckets", bucket_arr, allocator); | 306 | 2 | rapidjson::Writer<rapidjson::StringBuffer> writer(buffer); | 307 | 2 | doc.Accept(writer); | 308 | | | 309 | 2 | return !buckets.empty() && buffer.GetSize() > 0; | 310 | 2 | } |
|
311 | | #include "common/compile_check_end.h" |
312 | | } // namespace doris |