Coverage Report

Created: 2026-04-16 17:24

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exec/common/histogram_helpers.hpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <rapidjson/document.h>
21
#include <rapidjson/prettywriter.h>
22
#include <rapidjson/stringbuffer.h>
23
24
#include <boost/dynamic_bitset.hpp>
25
26
#include "common/cast_set.h"
27
#include "core/data_type/data_type_decimal.h"
28
29
namespace doris {
30
template <typename T>
31
struct Bucket {
32
public:
33
    Bucket() = default;
34
    Bucket(T lower, T upper, size_t ndv, size_t count, size_t pre_sum)
35
116
            : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {}
Unexecuted instantiation: _ZN5doris6BucketINS_7DecimalIiEEEC2ES2_S2_mmm
Unexecuted instantiation: _ZN5doris6BucketINS_7DecimalIlEEEC2ES2_S2_mmm
Unexecuted instantiation: _ZN5doris6BucketINS_12Decimal128V3EEC2ES1_S1_mmm
Unexecuted instantiation: _ZN5doris6BucketINS_7DecimalIN4wide7integerILm256EiEEEEEC2ES5_S5_mmm
_ZN5doris6BucketINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEC2ES6_S6_mmm
Line
Count
Source
35
10
            : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {}
_ZN5doris6BucketINS_11DateV2ValueINS_15DateV2ValueTypeEEEEC2ES3_S3_mmm
Line
Count
Source
35
10
            : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {}
_ZN5doris6BucketINS_11DateV2ValueINS_19DateTimeV2ValueTypeEEEEC2ES3_S3_mmm
Line
Count
Source
35
10
            : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {}
_ZN5doris6BucketIiEC2Eiimmm
Line
Count
Source
35
26
            : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {}
Unexecuted instantiation: _ZN5doris6BucketIhEC2Ehhmmm
_ZN5doris6BucketIaEC2Eaammm
Line
Count
Source
35
10
            : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {}
_ZN5doris6BucketIsEC2Essmmm
Line
Count
Source
35
10
            : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {}
_ZN5doris6BucketIlEC2Ellmmm
Line
Count
Source
35
10
            : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {}
_ZN5doris6BucketInEC2Ennmmm
Line
Count
Source
35
10
            : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {}
_ZN5doris6BucketIfEC2Effmmm
Line
Count
Source
35
10
            : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {}
_ZN5doris6BucketIdEC2Eddmmm
Line
Count
Source
35
10
            : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {}
36
37
    T lower;
38
    T upper;
39
    size_t ndv;
40
    size_t count;
41
    size_t pre_sum;
42
};
43
44
/**
45
 * Checks if it is possible to assign the provided value_map to the given
46
 * number of buckets such that no bucket has a size larger than max_bucket_size.
47
 *
48
 * @param value_map A mapping of values to their counts.
49
 * @param max_bucket_size The maximum size that any bucket is allowed to have.
50
 * @param num_buckets The number of buckets that we want to assign values to.
51
 *
52
 * @return true if the values can be assigned to the buckets, false otherwise.
53
 */
54
template <typename T>
55
bool can_assign_into_buckets(const std::map<T, size_t>& value_map, const size_t max_bucket_size,
56
188
                             const size_t num_buckets) {
57
188
    if (value_map.empty()) {
58
1
        return false;
59
187
    };
60
61
187
    size_t used_buckets = 1;
62
187
    size_t current_bucket_size = 0;
63
64
28.9k
    for (const auto& [value, count] : value_map) {
65
28.9k
        current_bucket_size += count;
66
67
        // If adding the current value to the current bucket would exceed max_bucket_size,
68
        // then we start a new bucket.
69
28.9k
        if (current_bucket_size > max_bucket_size) {
70
701
            ++used_buckets;
71
701
            current_bucket_size = count;
72
701
        }
73
74
        // If we have used more buckets than num_buckets, we cannot assign the values to buckets.
75
28.9k
        if (used_buckets > num_buckets) {
76
95
            return false;
77
95
        }
78
28.9k
    }
79
80
92
    return true;
81
187
}
_ZN5doris23can_assign_into_bucketsIiEEbRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEmm
Line
Count
Source
56
65
                             const size_t num_buckets) {
57
65
    if (value_map.empty()) {
58
1
        return false;
59
64
    };
60
61
64
    size_t used_buckets = 1;
62
64
    size_t current_bucket_size = 0;
63
64
1.37k
    for (const auto& [value, count] : value_map) {
65
1.37k
        current_bucket_size += count;
66
67
        // If adding the current value to the current bucket would exceed max_bucket_size,
68
        // then we start a new bucket.
69
1.37k
        if (current_bucket_size > max_bucket_size) {
70
161
            ++used_buckets;
71
161
            current_bucket_size = count;
72
161
        }
73
74
        // If we have used more buckets than num_buckets, we cannot assign the values to buckets.
75
1.37k
        if (used_buckets > num_buckets) {
76
29
            return false;
77
29
        }
78
1.37k
    }
79
80
35
    return true;
81
64
}
Unexecuted instantiation: _ZN5doris23can_assign_into_bucketsIhEEbRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEmm
_ZN5doris23can_assign_into_bucketsIaEEbRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEmm
Line
Count
Source
56
13
                             const size_t num_buckets) {
57
13
    if (value_map.empty()) {
58
0
        return false;
59
13
    };
60
61
13
    size_t used_buckets = 1;
62
13
    size_t current_bucket_size = 0;
63
64
1.19k
    for (const auto& [value, count] : value_map) {
65
1.19k
        current_bucket_size += count;
66
67
        // If adding the current value to the current bucket would exceed max_bucket_size,
68
        // then we start a new bucket.
69
1.19k
        if (current_bucket_size > max_bucket_size) {
70
57
            ++used_buckets;
71
57
            current_bucket_size = count;
72
57
        }
73
74
        // If we have used more buckets than num_buckets, we cannot assign the values to buckets.
75
1.19k
        if (used_buckets > num_buckets) {
76
7
            return false;
77
7
        }
78
1.19k
    }
79
80
6
    return true;
81
13
}
_ZN5doris23can_assign_into_bucketsIsEEbRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEmm
Line
Count
Source
56
13
                             const size_t num_buckets) {
57
13
    if (value_map.empty()) {
58
0
        return false;
59
13
    };
60
61
13
    size_t used_buckets = 1;
62
13
    size_t current_bucket_size = 0;
63
64
1.19k
    for (const auto& [value, count] : value_map) {
65
1.19k
        current_bucket_size += count;
66
67
        // If adding the current value to the current bucket would exceed max_bucket_size,
68
        // then we start a new bucket.
69
1.19k
        if (current_bucket_size > max_bucket_size) {
70
57
            ++used_buckets;
71
57
            current_bucket_size = count;
72
57
        }
73
74
        // If we have used more buckets than num_buckets, we cannot assign the values to buckets.
75
1.19k
        if (used_buckets > num_buckets) {
76
7
            return false;
77
7
        }
78
1.19k
    }
79
80
6
    return true;
81
13
}
_ZN5doris23can_assign_into_bucketsIlEEbRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEmm
Line
Count
Source
56
13
                             const size_t num_buckets) {
57
13
    if (value_map.empty()) {
58
0
        return false;
59
13
    };
60
61
13
    size_t used_buckets = 1;
62
13
    size_t current_bucket_size = 0;
63
64
1.19k
    for (const auto& [value, count] : value_map) {
65
1.19k
        current_bucket_size += count;
66
67
        // If adding the current value to the current bucket would exceed max_bucket_size,
68
        // then we start a new bucket.
69
1.19k
        if (current_bucket_size > max_bucket_size) {
70
57
            ++used_buckets;
71
57
            current_bucket_size = count;
72
57
        }
73
74
        // If we have used more buckets than num_buckets, we cannot assign the values to buckets.
75
1.19k
        if (used_buckets > num_buckets) {
76
7
            return false;
77
7
        }
78
1.19k
    }
79
80
6
    return true;
81
13
}
_ZN5doris23can_assign_into_bucketsInEEbRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEmm
Line
Count
Source
56
13
                             const size_t num_buckets) {
57
13
    if (value_map.empty()) {
58
0
        return false;
59
13
    };
60
61
13
    size_t used_buckets = 1;
62
13
    size_t current_bucket_size = 0;
63
64
1.19k
    for (const auto& [value, count] : value_map) {
65
1.19k
        current_bucket_size += count;
66
67
        // If adding the current value to the current bucket would exceed max_bucket_size,
68
        // then we start a new bucket.
69
1.19k
        if (current_bucket_size > max_bucket_size) {
70
57
            ++used_buckets;
71
57
            current_bucket_size = count;
72
57
        }
73
74
        // If we have used more buckets than num_buckets, we cannot assign the values to buckets.
75
1.19k
        if (used_buckets > num_buckets) {
76
7
            return false;
77
7
        }
78
1.19k
    }
79
80
6
    return true;
81
13
}
_ZN5doris23can_assign_into_bucketsIfEEbRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEmm
Line
Count
Source
56
13
                             const size_t num_buckets) {
57
13
    if (value_map.empty()) {
58
0
        return false;
59
13
    };
60
61
13
    size_t used_buckets = 1;
62
13
    size_t current_bucket_size = 0;
63
64
1.19k
    for (const auto& [value, count] : value_map) {
65
1.19k
        current_bucket_size += count;
66
67
        // If adding the current value to the current bucket would exceed max_bucket_size,
68
        // then we start a new bucket.
69
1.19k
        if (current_bucket_size > max_bucket_size) {
70
57
            ++used_buckets;
71
57
            current_bucket_size = count;
72
57
        }
73
74
        // If we have used more buckets than num_buckets, we cannot assign the values to buckets.
75
1.19k
        if (used_buckets > num_buckets) {
76
7
            return false;
77
7
        }
78
1.19k
    }
79
80
6
    return true;
81
13
}
_ZN5doris23can_assign_into_bucketsIdEEbRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEmm
Line
Count
Source
56
13
                             const size_t num_buckets) {
57
13
    if (value_map.empty()) {
58
0
        return false;
59
13
    };
60
61
13
    size_t used_buckets = 1;
62
13
    size_t current_bucket_size = 0;
63
64
1.19k
    for (const auto& [value, count] : value_map) {
65
1.19k
        current_bucket_size += count;
66
67
        // If adding the current value to the current bucket would exceed max_bucket_size,
68
        // then we start a new bucket.
69
1.19k
        if (current_bucket_size > max_bucket_size) {
70
57
            ++used_buckets;
71
57
            current_bucket_size = count;
72
57
        }
73
74
        // If we have used more buckets than num_buckets, we cannot assign the values to buckets.
75
1.19k
        if (used_buckets > num_buckets) {
76
7
            return false;
77
7
        }
78
1.19k
    }
79
80
6
    return true;
81
13
}
Unexecuted instantiation: _ZN5doris23can_assign_into_bucketsINS_7DecimalIiEEEEbRKSt3mapIT_mSt4lessIS4_ESaISt4pairIKS4_mEEEmm
Unexecuted instantiation: _ZN5doris23can_assign_into_bucketsINS_7DecimalIlEEEEbRKSt3mapIT_mSt4lessIS4_ESaISt4pairIKS4_mEEEmm
Unexecuted instantiation: _ZN5doris23can_assign_into_bucketsINS_12Decimal128V3EEEbRKSt3mapIT_mSt4lessIS3_ESaISt4pairIKS3_mEEEmm
Unexecuted instantiation: _ZN5doris23can_assign_into_bucketsINS_7DecimalIN4wide7integerILm256EiEEEEEEbRKSt3mapIT_mSt4lessIS7_ESaISt4pairIKS7_mEEEmm
_ZN5doris23can_assign_into_bucketsINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEEbRKSt3mapIT_mSt4lessIS8_ESaISt4pairIKS8_mEEEmm
Line
Count
Source
56
19
                             const size_t num_buckets) {
57
19
    if (value_map.empty()) {
58
0
        return false;
59
19
    };
60
61
19
    size_t used_buckets = 1;
62
19
    size_t current_bucket_size = 0;
63
64
18.0k
    for (const auto& [value, count] : value_map) {
65
18.0k
        current_bucket_size += count;
66
67
        // If adding the current value to the current bucket would exceed max_bucket_size,
68
        // then we start a new bucket.
69
18.0k
        if (current_bucket_size > max_bucket_size) {
70
84
            ++used_buckets;
71
84
            current_bucket_size = count;
72
84
        }
73
74
        // If we have used more buckets than num_buckets, we cannot assign the values to buckets.
75
18.0k
        if (used_buckets > num_buckets) {
76
10
            return false;
77
10
        }
78
18.0k
    }
79
80
9
    return true;
81
19
}
_ZN5doris23can_assign_into_bucketsINS_11DateV2ValueINS_15DateV2ValueTypeEEEEEbRKSt3mapIT_mSt4lessIS5_ESaISt4pairIKS5_mEEEmm
Line
Count
Source
56
13
                             const size_t num_buckets) {
57
13
    if (value_map.empty()) {
58
0
        return false;
59
13
    };
60
61
13
    size_t used_buckets = 1;
62
13
    size_t current_bucket_size = 0;
63
64
1.19k
    for (const auto& [value, count] : value_map) {
65
1.19k
        current_bucket_size += count;
66
67
        // If adding the current value to the current bucket would exceed max_bucket_size,
68
        // then we start a new bucket.
69
1.19k
        if (current_bucket_size > max_bucket_size) {
70
57
            ++used_buckets;
71
57
            current_bucket_size = count;
72
57
        }
73
74
        // If we have used more buckets than num_buckets, we cannot assign the values to buckets.
75
1.19k
        if (used_buckets > num_buckets) {
76
7
            return false;
77
7
        }
78
1.19k
    }
79
80
6
    return true;
81
13
}
_ZN5doris23can_assign_into_bucketsINS_11DateV2ValueINS_19DateTimeV2ValueTypeEEEEEbRKSt3mapIT_mSt4lessIS5_ESaISt4pairIKS5_mEEEmm
Line
Count
Source
56
13
                             const size_t num_buckets) {
57
13
    if (value_map.empty()) {
58
0
        return false;
59
13
    };
60
61
13
    size_t used_buckets = 1;
62
13
    size_t current_bucket_size = 0;
63
64
1.19k
    for (const auto& [value, count] : value_map) {
65
1.19k
        current_bucket_size += count;
66
67
        // If adding the current value to the current bucket would exceed max_bucket_size,
68
        // then we start a new bucket.
69
1.19k
        if (current_bucket_size > max_bucket_size) {
70
57
            ++used_buckets;
71
57
            current_bucket_size = count;
72
57
        }
73
74
        // If we have used more buckets than num_buckets, we cannot assign the values to buckets.
75
1.19k
        if (used_buckets > num_buckets) {
76
7
            return false;
77
7
        }
78
1.19k
    }
79
80
6
    return true;
81
13
}
82
83
/**
84
 * Calculates the maximum number of values that can fit into each bucket given a set of values
85
 * and the desired number of buckets.
86
 *
87
 * @tparam T the type of the values in the value map
88
 * @param value_map the map of values and their counts
89
 * @param num_buckets the desired number of buckets
90
 * @return the maximum number of values that can fit into each bucket
91
 */
92
template <typename T>
93
32
size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) {
94
    // Ensure that the value map is not empty
95
32
    assert(!value_map.empty());
96
97
    // Calculate the total number of values in the map using std::accumulate()
98
32
    size_t total_values = 0;
99
3.83k
    for (const auto& [value, count] : value_map) {
100
3.83k
        total_values += count;
101
3.83k
    }
102
103
    // If there is only one bucket, then all values will be assigned to that bucket
104
32
    if (num_buckets == 1) {
105
3
        return total_values;
106
3
    }
107
108
    // To calculate the maximum value count in each bucket, we first calculate a conservative upper
109
    // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed
110
    // the actual maximum value count, but it does not underestimate it. The subsequent binary search
111
    // algorithm will approach the actual maximum value count.
112
29
    size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1;
113
114
    // Initialize the lower bound to 0
115
29
    size_t lower_bucket_values = 0;
116
117
    // Perform a binary search to find the maximum number of values that can fit into each bucket
118
29
    int search_step = 0;
119
29
    const int max_search_steps =
120
29
            10; // Limit the number of search steps to avoid excessive iteration
121
122
203
    while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) {
123
        // Calculate the midpoint of the upper and lower bounds
124
174
        const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2;
125
126
        // Check if the given number of values can be assigned to the desired number of buckets
127
174
        if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) {
128
            // If it can, then set the upper bound to the midpoint
129
85
            upper_bucket_values = bucket_values;
130
89
        } else {
131
            // If it can't, then set the lower bound to the midpoint
132
89
            lower_bucket_values = bucket_values;
133
89
        }
134
        // Increment the search step counter
135
174
        ++search_step;
136
174
    }
137
138
29
    return upper_bucket_values;
139
32
}
_ZN5doris27calculate_bucket_max_valuesIiEEmRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEm
Line
Count
Source
93
14
size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) {
94
    // Ensure that the value map is not empty
95
14
    assert(!value_map.empty());
96
97
    // Calculate the total number of values in the map using std::accumulate()
98
14
    size_t total_values = 0;
99
233
    for (const auto& [value, count] : value_map) {
100
233
        total_values += count;
101
233
    }
102
103
    // If there is only one bucket, then all values will be assigned to that bucket
104
14
    if (num_buckets == 1) {
105
3
        return total_values;
106
3
    }
107
108
    // To calculate the maximum value count in each bucket, we first calculate a conservative upper
109
    // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed
110
    // the actual maximum value count, but it does not underestimate it. The subsequent binary search
111
    // algorithm will approach the actual maximum value count.
112
11
    size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1;
113
114
    // Initialize the lower bound to 0
115
11
    size_t lower_bucket_values = 0;
116
117
    // Perform a binary search to find the maximum number of values that can fit into each bucket
118
11
    int search_step = 0;
119
11
    const int max_search_steps =
120
11
            10; // Limit the number of search steps to avoid excessive iteration
121
122
62
    while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) {
123
        // Calculate the midpoint of the upper and lower bounds
124
51
        const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2;
125
126
        // Check if the given number of values can be assigned to the desired number of buckets
127
51
        if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) {
128
            // If it can, then set the upper bound to the midpoint
129
28
            upper_bucket_values = bucket_values;
130
28
        } else {
131
            // If it can't, then set the lower bound to the midpoint
132
23
            lower_bucket_values = bucket_values;
133
23
        }
134
        // Increment the search step counter
135
51
        ++search_step;
136
51
    }
137
138
11
    return upper_bucket_values;
139
14
}
Unexecuted instantiation: _ZN5doris27calculate_bucket_max_valuesIhEEmRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEm
_ZN5doris27calculate_bucket_max_valuesIaEEmRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEm
Line
Count
Source
93
2
size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) {
94
    // Ensure that the value map is not empty
95
2
    assert(!value_map.empty());
96
97
    // Calculate the total number of values in the map using std::accumulate()
98
2
    size_t total_values = 0;
99
200
    for (const auto& [value, count] : value_map) {
100
200
        total_values += count;
101
200
    }
102
103
    // If there is only one bucket, then all values will be assigned to that bucket
104
2
    if (num_buckets == 1) {
105
0
        return total_values;
106
0
    }
107
108
    // To calculate the maximum value count in each bucket, we first calculate a conservative upper
109
    // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed
110
    // the actual maximum value count, but it does not underestimate it. The subsequent binary search
111
    // algorithm will approach the actual maximum value count.
112
2
    size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1;
113
114
    // Initialize the lower bound to 0
115
2
    size_t lower_bucket_values = 0;
116
117
    // Perform a binary search to find the maximum number of values that can fit into each bucket
118
2
    int search_step = 0;
119
2
    const int max_search_steps =
120
2
            10; // Limit the number of search steps to avoid excessive iteration
121
122
15
    while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) {
123
        // Calculate the midpoint of the upper and lower bounds
124
13
        const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2;
125
126
        // Check if the given number of values can be assigned to the desired number of buckets
127
13
        if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) {
128
            // If it can, then set the upper bound to the midpoint
129
6
            upper_bucket_values = bucket_values;
130
7
        } else {
131
            // If it can't, then set the lower bound to the midpoint
132
7
            lower_bucket_values = bucket_values;
133
7
        }
134
        // Increment the search step counter
135
13
        ++search_step;
136
13
    }
137
138
2
    return upper_bucket_values;
139
2
}
_ZN5doris27calculate_bucket_max_valuesIsEEmRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEm
Line
Count
Source
93
2
size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) {
94
    // Ensure that the value map is not empty
95
2
    assert(!value_map.empty());
96
97
    // Calculate the total number of values in the map using std::accumulate()
98
2
    size_t total_values = 0;
99
200
    for (const auto& [value, count] : value_map) {
100
200
        total_values += count;
101
200
    }
102
103
    // If there is only one bucket, then all values will be assigned to that bucket
104
2
    if (num_buckets == 1) {
105
0
        return total_values;
106
0
    }
107
108
    // To calculate the maximum value count in each bucket, we first calculate a conservative upper
109
    // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed
110
    // the actual maximum value count, but it does not underestimate it. The subsequent binary search
111
    // algorithm will approach the actual maximum value count.
112
2
    size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1;
113
114
    // Initialize the lower bound to 0
115
2
    size_t lower_bucket_values = 0;
116
117
    // Perform a binary search to find the maximum number of values that can fit into each bucket
118
2
    int search_step = 0;
119
2
    const int max_search_steps =
120
2
            10; // Limit the number of search steps to avoid excessive iteration
121
122
15
    while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) {
123
        // Calculate the midpoint of the upper and lower bounds
124
13
        const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2;
125
126
        // Check if the given number of values can be assigned to the desired number of buckets
127
13
        if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) {
128
            // If it can, then set the upper bound to the midpoint
129
6
            upper_bucket_values = bucket_values;
130
7
        } else {
131
            // If it can't, then set the lower bound to the midpoint
132
7
            lower_bucket_values = bucket_values;
133
7
        }
134
        // Increment the search step counter
135
13
        ++search_step;
136
13
    }
137
138
2
    return upper_bucket_values;
139
2
}
_ZN5doris27calculate_bucket_max_valuesIlEEmRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEm
Line
Count
Source
93
2
size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) {
94
    // Ensure that the value map is not empty
95
2
    assert(!value_map.empty());
96
97
    // Calculate the total number of values in the map using std::accumulate()
98
2
    size_t total_values = 0;
99
200
    for (const auto& [value, count] : value_map) {
100
200
        total_values += count;
101
200
    }
102
103
    // If there is only one bucket, then all values will be assigned to that bucket
104
2
    if (num_buckets == 1) {
105
0
        return total_values;
106
0
    }
107
108
    // To calculate the maximum value count in each bucket, we first calculate a conservative upper
109
    // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed
110
    // the actual maximum value count, but it does not underestimate it. The subsequent binary search
111
    // algorithm will approach the actual maximum value count.
112
2
    size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1;
113
114
    // Initialize the lower bound to 0
115
2
    size_t lower_bucket_values = 0;
116
117
    // Perform a binary search to find the maximum number of values that can fit into each bucket
118
2
    int search_step = 0;
119
2
    const int max_search_steps =
120
2
            10; // Limit the number of search steps to avoid excessive iteration
121
122
15
    while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) {
123
        // Calculate the midpoint of the upper and lower bounds
124
13
        const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2;
125
126
        // Check if the given number of values can be assigned to the desired number of buckets
127
13
        if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) {
128
            // If it can, then set the upper bound to the midpoint
129
6
            upper_bucket_values = bucket_values;
130
7
        } else {
131
            // If it can't, then set the lower bound to the midpoint
132
7
            lower_bucket_values = bucket_values;
133
7
        }
134
        // Increment the search step counter
135
13
        ++search_step;
136
13
    }
137
138
2
    return upper_bucket_values;
139
2
}
_ZN5doris27calculate_bucket_max_valuesInEEmRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEm
Line
Count
Source
93
2
size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) {
94
    // Ensure that the value map is not empty
95
2
    assert(!value_map.empty());
96
97
    // Calculate the total number of values in the map using std::accumulate()
98
2
    size_t total_values = 0;
99
200
    for (const auto& [value, count] : value_map) {
100
200
        total_values += count;
101
200
    }
102
103
    // If there is only one bucket, then all values will be assigned to that bucket
104
2
    if (num_buckets == 1) {
105
0
        return total_values;
106
0
    }
107
108
    // To calculate the maximum value count in each bucket, we first calculate a conservative upper
109
    // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed
110
    // the actual maximum value count, but it does not underestimate it. The subsequent binary search
111
    // algorithm will approach the actual maximum value count.
112
2
    size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1;
113
114
    // Initialize the lower bound to 0
115
2
    size_t lower_bucket_values = 0;
116
117
    // Perform a binary search to find the maximum number of values that can fit into each bucket
118
2
    int search_step = 0;
119
2
    const int max_search_steps =
120
2
            10; // Limit the number of search steps to avoid excessive iteration
121
122
15
    while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) {
123
        // Calculate the midpoint of the upper and lower bounds
124
13
        const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2;
125
126
        // Check if the given number of values can be assigned to the desired number of buckets
127
13
        if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) {
128
            // If it can, then set the upper bound to the midpoint
129
6
            upper_bucket_values = bucket_values;
130
7
        } else {
131
            // If it can't, then set the lower bound to the midpoint
132
7
            lower_bucket_values = bucket_values;
133
7
        }
134
        // Increment the search step counter
135
13
        ++search_step;
136
13
    }
137
138
2
    return upper_bucket_values;
139
2
}
_ZN5doris27calculate_bucket_max_valuesIfEEmRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEm
Line
Count
Source
93
2
size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) {
94
    // Ensure that the value map is not empty
95
2
    assert(!value_map.empty());
96
97
    // Calculate the total number of values in the map using std::accumulate()
98
2
    size_t total_values = 0;
99
200
    for (const auto& [value, count] : value_map) {
100
200
        total_values += count;
101
200
    }
102
103
    // If there is only one bucket, then all values will be assigned to that bucket
104
2
    if (num_buckets == 1) {
105
0
        return total_values;
106
0
    }
107
108
    // To calculate the maximum value count in each bucket, we first calculate a conservative upper
109
    // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed
110
    // the actual maximum value count, but it does not underestimate it. The subsequent binary search
111
    // algorithm will approach the actual maximum value count.
112
2
    size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1;
113
114
    // Initialize the lower bound to 0
115
2
    size_t lower_bucket_values = 0;
116
117
    // Perform a binary search to find the maximum number of values that can fit into each bucket
118
2
    int search_step = 0;
119
2
    const int max_search_steps =
120
2
            10; // Limit the number of search steps to avoid excessive iteration
121
122
15
    while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) {
123
        // Calculate the midpoint of the upper and lower bounds
124
13
        const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2;
125
126
        // Check if the given number of values can be assigned to the desired number of buckets
127
13
        if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) {
128
            // If it can, then set the upper bound to the midpoint
129
6
            upper_bucket_values = bucket_values;
130
7
        } else {
131
            // If it can't, then set the lower bound to the midpoint
132
7
            lower_bucket_values = bucket_values;
133
7
        }
134
        // Increment the search step counter
135
13
        ++search_step;
136
13
    }
137
138
2
    return upper_bucket_values;
139
2
}
_ZN5doris27calculate_bucket_max_valuesIdEEmRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEm
Line
Count
Source
93
2
size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) {
94
    // Ensure that the value map is not empty
95
2
    assert(!value_map.empty());
96
97
    // Calculate the total number of values in the map using std::accumulate()
98
2
    size_t total_values = 0;
99
200
    for (const auto& [value, count] : value_map) {
100
200
        total_values += count;
101
200
    }
102
103
    // If there is only one bucket, then all values will be assigned to that bucket
104
2
    if (num_buckets == 1) {
105
0
        return total_values;
106
0
    }
107
108
    // To calculate the maximum value count in each bucket, we first calculate a conservative upper
109
    // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed
110
    // the actual maximum value count, but it does not underestimate it. The subsequent binary search
111
    // algorithm will approach the actual maximum value count.
112
2
    size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1;
113
114
    // Initialize the lower bound to 0
115
2
    size_t lower_bucket_values = 0;
116
117
    // Perform a binary search to find the maximum number of values that can fit into each bucket
118
2
    int search_step = 0;
119
2
    const int max_search_steps =
120
2
            10; // Limit the number of search steps to avoid excessive iteration
121
122
15
    while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) {
123
        // Calculate the midpoint of the upper and lower bounds
124
13
        const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2;
125
126
        // Check if the given number of values can be assigned to the desired number of buckets
127
13
        if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) {
128
            // If it can, then set the upper bound to the midpoint
129
6
            upper_bucket_values = bucket_values;
130
7
        } else {
131
            // If it can't, then set the lower bound to the midpoint
132
7
            lower_bucket_values = bucket_values;
133
7
        }
134
        // Increment the search step counter
135
13
        ++search_step;
136
13
    }
137
138
2
    return upper_bucket_values;
139
2
}
Unexecuted instantiation: _ZN5doris27calculate_bucket_max_valuesINS_7DecimalIiEEEEmRKSt3mapIT_mSt4lessIS4_ESaISt4pairIKS4_mEEEm
Unexecuted instantiation: _ZN5doris27calculate_bucket_max_valuesINS_7DecimalIlEEEEmRKSt3mapIT_mSt4lessIS4_ESaISt4pairIKS4_mEEEm
Unexecuted instantiation: _ZN5doris27calculate_bucket_max_valuesINS_12Decimal128V3EEEmRKSt3mapIT_mSt4lessIS3_ESaISt4pairIKS3_mEEEm
Unexecuted instantiation: _ZN5doris27calculate_bucket_max_valuesINS_7DecimalIN4wide7integerILm256EiEEEEEEmRKSt3mapIT_mSt4lessIS7_ESaISt4pairIKS7_mEEEm
_ZN5doris27calculate_bucket_max_valuesINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEEmRKSt3mapIT_mSt4lessIS8_ESaISt4pairIKS8_mEEEm
Line
Count
Source
93
2
size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) {
94
    // Ensure that the value map is not empty
95
2
    assert(!value_map.empty());
96
97
    // Calculate the total number of values in the map using std::accumulate()
98
2
    size_t total_values = 0;
99
2.00k
    for (const auto& [value, count] : value_map) {
100
2.00k
        total_values += count;
101
2.00k
    }
102
103
    // If there is only one bucket, then all values will be assigned to that bucket
104
2
    if (num_buckets == 1) {
105
0
        return total_values;
106
0
    }
107
108
    // To calculate the maximum value count in each bucket, we first calculate a conservative upper
109
    // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed
110
    // the actual maximum value count, but it does not underestimate it. The subsequent binary search
111
    // algorithm will approach the actual maximum value count.
112
2
    size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1;
113
114
    // Initialize the lower bound to 0
115
2
    size_t lower_bucket_values = 0;
116
117
    // Perform a binary search to find the maximum number of values that can fit into each bucket
118
2
    int search_step = 0;
119
2
    const int max_search_steps =
120
2
            10; // Limit the number of search steps to avoid excessive iteration
121
122
21
    while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) {
123
        // Calculate the midpoint of the upper and lower bounds
124
19
        const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2;
125
126
        // Check if the given number of values can be assigned to the desired number of buckets
127
19
        if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) {
128
            // If it can, then set the upper bound to the midpoint
129
9
            upper_bucket_values = bucket_values;
130
10
        } else {
131
            // If it can't, then set the lower bound to the midpoint
132
10
            lower_bucket_values = bucket_values;
133
10
        }
134
        // Increment the search step counter
135
19
        ++search_step;
136
19
    }
137
138
2
    return upper_bucket_values;
139
2
}
_ZN5doris27calculate_bucket_max_valuesINS_11DateV2ValueINS_15DateV2ValueTypeEEEEEmRKSt3mapIT_mSt4lessIS5_ESaISt4pairIKS5_mEEEm
Line
Count
Source
93
2
size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) {
94
    // Ensure that the value map is not empty
95
2
    assert(!value_map.empty());
96
97
    // Calculate the total number of values in the map using std::accumulate()
98
2
    size_t total_values = 0;
99
200
    for (const auto& [value, count] : value_map) {
100
200
        total_values += count;
101
200
    }
102
103
    // If there is only one bucket, then all values will be assigned to that bucket
104
2
    if (num_buckets == 1) {
105
0
        return total_values;
106
0
    }
107
108
    // To calculate the maximum value count in each bucket, we first calculate a conservative upper
109
    // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed
110
    // the actual maximum value count, but it does not underestimate it. The subsequent binary search
111
    // algorithm will approach the actual maximum value count.
112
2
    size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1;
113
114
    // Initialize the lower bound to 0
115
2
    size_t lower_bucket_values = 0;
116
117
    // Perform a binary search to find the maximum number of values that can fit into each bucket
118
2
    int search_step = 0;
119
2
    const int max_search_steps =
120
2
            10; // Limit the number of search steps to avoid excessive iteration
121
122
15
    while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) {
123
        // Calculate the midpoint of the upper and lower bounds
124
13
        const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2;
125
126
        // Check if the given number of values can be assigned to the desired number of buckets
127
13
        if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) {
128
            // If it can, then set the upper bound to the midpoint
129
6
            upper_bucket_values = bucket_values;
130
7
        } else {
131
            // If it can't, then set the lower bound to the midpoint
132
7
            lower_bucket_values = bucket_values;
133
7
        }
134
        // Increment the search step counter
135
13
        ++search_step;
136
13
    }
137
138
2
    return upper_bucket_values;
139
2
}
_ZN5doris27calculate_bucket_max_valuesINS_11DateV2ValueINS_19DateTimeV2ValueTypeEEEEEmRKSt3mapIT_mSt4lessIS5_ESaISt4pairIKS5_mEEEm
Line
Count
Source
93
2
size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) {
94
    // Ensure that the value map is not empty
95
2
    assert(!value_map.empty());
96
97
    // Calculate the total number of values in the map using std::accumulate()
98
2
    size_t total_values = 0;
99
200
    for (const auto& [value, count] : value_map) {
100
200
        total_values += count;
101
200
    }
102
103
    // If there is only one bucket, then all values will be assigned to that bucket
104
2
    if (num_buckets == 1) {
105
0
        return total_values;
106
0
    }
107
108
    // To calculate the maximum value count in each bucket, we first calculate a conservative upper
109
    // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed
110
    // the actual maximum value count, but it does not underestimate it. The subsequent binary search
111
    // algorithm will approach the actual maximum value count.
112
2
    size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1;
113
114
    // Initialize the lower bound to 0
115
2
    size_t lower_bucket_values = 0;
116
117
    // Perform a binary search to find the maximum number of values that can fit into each bucket
118
2
    int search_step = 0;
119
2
    const int max_search_steps =
120
2
            10; // Limit the number of search steps to avoid excessive iteration
121
122
15
    while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) {
123
        // Calculate the midpoint of the upper and lower bounds
124
13
        const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2;
125
126
        // Check if the given number of values can be assigned to the desired number of buckets
127
13
        if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) {
128
            // If it can, then set the upper bound to the midpoint
129
6
            upper_bucket_values = bucket_values;
130
7
        } else {
131
            // If it can't, then set the lower bound to the midpoint
132
7
            lower_bucket_values = bucket_values;
133
7
        }
134
        // Increment the search step counter
135
13
        ++search_step;
136
13
    }
137
138
2
    return upper_bucket_values;
139
2
}
140
141
/**
142
 * Greedy equi-height histogram construction algorithm, inspired by the MySQL
143
 * equi_height implementation(https://dev.mysql.com/doc/dev/mysql-server/latest/equi__height_8h.html).
144
 *
145
 * Given an ordered collection of [value, count] pairs and a maximum bucket
146
 * size, construct a histogram by inserting values into a bucket while keeping
147
 * track of its size. If the insertion of a value into a non-empty bucket
148
 * causes the bucket to exceed the maximum size, create a new empty bucket and
149
 * continue.
150
 *
151
 * The algorithm guarantees a selectivity estimation error of at most ~2 *
152
 * #values / #buckets, often less. Values with a higher relative frequency are
153
 * guaranteed to be placed in singleton buckets.
154
 *
155
 * The minimum composite bucket size is used to minimize the worst case
156
 * selectivity estimation error. In general, the algorithm will adapt to the
157
 * data distribution to minimize the size of composite buckets. The heavy values
158
 * can be placed in singleton buckets and the remaining values will be evenly
159
 * spread across the remaining buckets, leading to a lower composite bucket size.
160
 *
161
 * Note: The term "value" refers to an entry in a column and the actual value
162
 * of an entry. The ordered_map is an ordered collection of [distinct value,
163
 * value count] pairs. For example, a Value_map<String> could contain the pairs ["a", 1], ["b", 2]
164
 * to represent one "a" value and two "b" values.
165
 *
166
 * @param buckets A vector of empty buckets that will be populated with data.
167
 * @param ordered_map An ordered map of distinct values and their counts.
168
 * @param max_num_buckets The maximum number of buckets that can be used.
169
 *
170
 * @return True if the buckets were successfully built, false otherwise.
171
 */
172
template <typename T>
173
bool build_histogram(std::vector<Bucket<T>>& buckets, const std::map<T, size_t>& ordered_map,
174
43
                     const size_t max_num_buckets) {
175
    // If the input map is empty, there is nothing to build.
176
43
    if (ordered_map.empty()) {
177
17
        return false;
178
17
    }
179
180
    // Calculate the maximum number of values that can be assigned to each bucket.
181
26
    auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets);
182
183
    // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional
184
    // allocations when inserting buckets.
185
26
    buckets.clear();
186
26
    buckets.reserve(max_num_buckets);
187
188
    // Initialize bucket variables.
189
26
    size_t distinct_values_count = 0;
190
26
    size_t values_count = 0;
191
26
    size_t cumulative_values = 0;
192
193
    // Record how many values still need to be assigned.
194
26
    auto remaining_distinct_values = ordered_map.size();
195
196
26
    auto it = ordered_map.begin();
197
198
    // Lower value of the current bucket.
199
26
    const T* lower_value = &it->first;
200
201
    // Iterate over the ordered map of distinct values and their counts.
202
3.84k
    for (; it != ordered_map.end(); ++it) {
203
3.82k
        const auto count = it->second;
204
3.82k
        const auto current_value = it->first;
205
206
        // Update the bucket counts and track the number of distinct values assigned.
207
3.82k
        distinct_values_count++;
208
3.82k
        remaining_distinct_values--;
209
3.82k
        values_count += count;
210
3.82k
        cumulative_values += count;
211
212
        // Check whether the current value should be added to the current bucket.
213
3.82k
        auto next = std::next(it);
214
3.82k
        size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1;
215
216
3.82k
        if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets &&
217
3.82k
            values_count + next->second <= bucket_max_values) {
218
            // If the current value is the last in the input map and there are more remaining
219
            // distinct values than empty buckets and adding the value does not cause the bucket
220
            // to exceed its max size, skip adding the value to the current bucket.
221
3.70k
            continue;
222
3.70k
        }
223
224
        // Finalize the current bucket and add it to our collection of buckets.
225
114
        auto pre_sum = cumulative_values - values_count;
226
227
114
        Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count,
228
114
                             pre_sum);
229
114
        buckets.push_back(new_bucket);
230
231
        // Reset variables for the next bucket.
232
114
        if (next != ordered_map.end()) {
233
88
            lower_value = &next->first;
234
88
        }
235
114
        values_count = 0;
236
114
        distinct_values_count = 0;
237
114
    }
238
239
26
    return true;
240
43
}
_ZN5doris15build_histogramIiEEbRSt6vectorINS_6BucketIT_EESaIS4_EERKSt3mapIS3_mSt4lessIS3_ESaISt4pairIKS3_mEEEm
Line
Count
Source
174
11
                     const size_t max_num_buckets) {
175
    // If the input map is empty, there is nothing to build.
176
11
    if (ordered_map.empty()) {
177
3
        return false;
178
3
    }
179
180
    // Calculate the maximum number of values that can be assigned to each bucket.
181
8
    auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets);
182
183
    // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional
184
    // allocations when inserting buckets.
185
8
    buckets.clear();
186
8
    buckets.reserve(max_num_buckets);
187
188
    // Initialize bucket variables.
189
8
    size_t distinct_values_count = 0;
190
8
    size_t values_count = 0;
191
8
    size_t cumulative_values = 0;
192
193
    // Record how many values still need to be assigned.
194
8
    auto remaining_distinct_values = ordered_map.size();
195
196
8
    auto it = ordered_map.begin();
197
198
    // Lower value of the current bucket.
199
8
    const T* lower_value = &it->first;
200
201
    // Iterate over the ordered map of distinct values and their counts.
202
229
    for (; it != ordered_map.end(); ++it) {
203
221
        const auto count = it->second;
204
221
        const auto current_value = it->first;
205
206
        // Update the bucket counts and track the number of distinct values assigned.
207
221
        distinct_values_count++;
208
221
        remaining_distinct_values--;
209
221
        values_count += count;
210
221
        cumulative_values += count;
211
212
        // Check whether the current value should be added to the current bucket.
213
221
        auto next = std::next(it);
214
221
        size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1;
215
216
221
        if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets &&
217
221
            values_count + next->second <= bucket_max_values) {
218
            // If the current value is the last in the input map and there are more remaining
219
            // distinct values than empty buckets and adding the value does not cause the bucket
220
            // to exceed its max size, skip adding the value to the current bucket.
221
197
            continue;
222
197
        }
223
224
        // Finalize the current bucket and add it to our collection of buckets.
225
24
        auto pre_sum = cumulative_values - values_count;
226
227
24
        Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count,
228
24
                             pre_sum);
229
24
        buckets.push_back(new_bucket);
230
231
        // Reset variables for the next bucket.
232
24
        if (next != ordered_map.end()) {
233
16
            lower_value = &next->first;
234
16
        }
235
24
        values_count = 0;
236
24
        distinct_values_count = 0;
237
24
    }
238
239
8
    return true;
240
11
}
Unexecuted instantiation: _ZN5doris15build_histogramIhEEbRSt6vectorINS_6BucketIT_EESaIS4_EERKSt3mapIS3_mSt4lessIS3_ESaISt4pairIKS3_mEEEm
_ZN5doris15build_histogramIaEEbRSt6vectorINS_6BucketIT_EESaIS4_EERKSt3mapIS3_mSt4lessIS3_ESaISt4pairIKS3_mEEEm
Line
Count
Source
174
4
                     const size_t max_num_buckets) {
175
    // If the input map is empty, there is nothing to build.
176
4
    if (ordered_map.empty()) {
177
2
        return false;
178
2
    }
179
180
    // Calculate the maximum number of values that can be assigned to each bucket.
181
2
    auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets);
182
183
    // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional
184
    // allocations when inserting buckets.
185
2
    buckets.clear();
186
2
    buckets.reserve(max_num_buckets);
187
188
    // Initialize bucket variables.
189
2
    size_t distinct_values_count = 0;
190
2
    size_t values_count = 0;
191
2
    size_t cumulative_values = 0;
192
193
    // Record how many values still need to be assigned.
194
2
    auto remaining_distinct_values = ordered_map.size();
195
196
2
    auto it = ordered_map.begin();
197
198
    // Lower value of the current bucket.
199
2
    const T* lower_value = &it->first;
200
201
    // Iterate over the ordered map of distinct values and their counts.
202
202
    for (; it != ordered_map.end(); ++it) {
203
200
        const auto count = it->second;
204
200
        const auto current_value = it->first;
205
206
        // Update the bucket counts and track the number of distinct values assigned.
207
200
        distinct_values_count++;
208
200
        remaining_distinct_values--;
209
200
        values_count += count;
210
200
        cumulative_values += count;
211
212
        // Check whether the current value should be added to the current bucket.
213
200
        auto next = std::next(it);
214
200
        size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1;
215
216
200
        if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets &&
217
200
            values_count + next->second <= bucket_max_values) {
218
            // If the current value is the last in the input map and there are more remaining
219
            // distinct values than empty buckets and adding the value does not cause the bucket
220
            // to exceed its max size, skip adding the value to the current bucket.
221
190
            continue;
222
190
        }
223
224
        // Finalize the current bucket and add it to our collection of buckets.
225
10
        auto pre_sum = cumulative_values - values_count;
226
227
10
        Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count,
228
10
                             pre_sum);
229
10
        buckets.push_back(new_bucket);
230
231
        // Reset variables for the next bucket.
232
10
        if (next != ordered_map.end()) {
233
8
            lower_value = &next->first;
234
8
        }
235
10
        values_count = 0;
236
10
        distinct_values_count = 0;
237
10
    }
238
239
2
    return true;
240
4
}
_ZN5doris15build_histogramIsEEbRSt6vectorINS_6BucketIT_EESaIS4_EERKSt3mapIS3_mSt4lessIS3_ESaISt4pairIKS3_mEEEm
Line
Count
Source
174
4
                     const size_t max_num_buckets) {
175
    // If the input map is empty, there is nothing to build.
176
4
    if (ordered_map.empty()) {
177
2
        return false;
178
2
    }
179
180
    // Calculate the maximum number of values that can be assigned to each bucket.
181
2
    auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets);
182
183
    // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional
184
    // allocations when inserting buckets.
185
2
    buckets.clear();
186
2
    buckets.reserve(max_num_buckets);
187
188
    // Initialize bucket variables.
189
2
    size_t distinct_values_count = 0;
190
2
    size_t values_count = 0;
191
2
    size_t cumulative_values = 0;
192
193
    // Record how many values still need to be assigned.
194
2
    auto remaining_distinct_values = ordered_map.size();
195
196
2
    auto it = ordered_map.begin();
197
198
    // Lower value of the current bucket.
199
2
    const T* lower_value = &it->first;
200
201
    // Iterate over the ordered map of distinct values and their counts.
202
202
    for (; it != ordered_map.end(); ++it) {
203
200
        const auto count = it->second;
204
200
        const auto current_value = it->first;
205
206
        // Update the bucket counts and track the number of distinct values assigned.
207
200
        distinct_values_count++;
208
200
        remaining_distinct_values--;
209
200
        values_count += count;
210
200
        cumulative_values += count;
211
212
        // Check whether the current value should be added to the current bucket.
213
200
        auto next = std::next(it);
214
200
        size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1;
215
216
200
        if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets &&
217
200
            values_count + next->second <= bucket_max_values) {
218
            // If the current value is the last in the input map and there are more remaining
219
            // distinct values than empty buckets and adding the value does not cause the bucket
220
            // to exceed its max size, skip adding the value to the current bucket.
221
190
            continue;
222
190
        }
223
224
        // Finalize the current bucket and add it to our collection of buckets.
225
10
        auto pre_sum = cumulative_values - values_count;
226
227
10
        Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count,
228
10
                             pre_sum);
229
10
        buckets.push_back(new_bucket);
230
231
        // Reset variables for the next bucket.
232
10
        if (next != ordered_map.end()) {
233
8
            lower_value = &next->first;
234
8
        }
235
10
        values_count = 0;
236
10
        distinct_values_count = 0;
237
10
    }
238
239
2
    return true;
240
4
}
_ZN5doris15build_histogramIlEEbRSt6vectorINS_6BucketIT_EESaIS4_EERKSt3mapIS3_mSt4lessIS3_ESaISt4pairIKS3_mEEEm
Line
Count
Source
174
4
                     const size_t max_num_buckets) {
175
    // If the input map is empty, there is nothing to build.
176
4
    if (ordered_map.empty()) {
177
2
        return false;
178
2
    }
179
180
    // Calculate the maximum number of values that can be assigned to each bucket.
181
2
    auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets);
182
183
    // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional
184
    // allocations when inserting buckets.
185
2
    buckets.clear();
186
2
    buckets.reserve(max_num_buckets);
187
188
    // Initialize bucket variables.
189
2
    size_t distinct_values_count = 0;
190
2
    size_t values_count = 0;
191
2
    size_t cumulative_values = 0;
192
193
    // Record how many values still need to be assigned.
194
2
    auto remaining_distinct_values = ordered_map.size();
195
196
2
    auto it = ordered_map.begin();
197
198
    // Lower value of the current bucket.
199
2
    const T* lower_value = &it->first;
200
201
    // Iterate over the ordered map of distinct values and their counts.
202
202
    for (; it != ordered_map.end(); ++it) {
203
200
        const auto count = it->second;
204
200
        const auto current_value = it->first;
205
206
        // Update the bucket counts and track the number of distinct values assigned.
207
200
        distinct_values_count++;
208
200
        remaining_distinct_values--;
209
200
        values_count += count;
210
200
        cumulative_values += count;
211
212
        // Check whether the current value should be added to the current bucket.
213
200
        auto next = std::next(it);
214
200
        size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1;
215
216
200
        if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets &&
217
200
            values_count + next->second <= bucket_max_values) {
218
            // If the current value is the last in the input map and there are more remaining
219
            // distinct values than empty buckets and adding the value does not cause the bucket
220
            // to exceed its max size, skip adding the value to the current bucket.
221
190
            continue;
222
190
        }
223
224
        // Finalize the current bucket and add it to our collection of buckets.
225
10
        auto pre_sum = cumulative_values - values_count;
226
227
10
        Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count,
228
10
                             pre_sum);
229
10
        buckets.push_back(new_bucket);
230
231
        // Reset variables for the next bucket.
232
10
        if (next != ordered_map.end()) {
233
8
            lower_value = &next->first;
234
8
        }
235
10
        values_count = 0;
236
10
        distinct_values_count = 0;
237
10
    }
238
239
2
    return true;
240
4
}
_ZN5doris15build_histogramInEEbRSt6vectorINS_6BucketIT_EESaIS4_EERKSt3mapIS3_mSt4lessIS3_ESaISt4pairIKS3_mEEEm
Line
Count
Source
174
4
                     const size_t max_num_buckets) {
175
    // If the input map is empty, there is nothing to build.
176
4
    if (ordered_map.empty()) {
177
2
        return false;
178
2
    }
179
180
    // Calculate the maximum number of values that can be assigned to each bucket.
181
2
    auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets);
182
183
    // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional
184
    // allocations when inserting buckets.
185
2
    buckets.clear();
186
2
    buckets.reserve(max_num_buckets);
187
188
    // Initialize bucket variables.
189
2
    size_t distinct_values_count = 0;
190
2
    size_t values_count = 0;
191
2
    size_t cumulative_values = 0;
192
193
    // Record how many values still need to be assigned.
194
2
    auto remaining_distinct_values = ordered_map.size();
195
196
2
    auto it = ordered_map.begin();
197
198
    // Lower value of the current bucket.
199
2
    const T* lower_value = &it->first;
200
201
    // Iterate over the ordered map of distinct values and their counts.
202
202
    for (; it != ordered_map.end(); ++it) {
203
200
        const auto count = it->second;
204
200
        const auto current_value = it->first;
205
206
        // Update the bucket counts and track the number of distinct values assigned.
207
200
        distinct_values_count++;
208
200
        remaining_distinct_values--;
209
200
        values_count += count;
210
200
        cumulative_values += count;
211
212
        // Check whether the current value should be added to the current bucket.
213
200
        auto next = std::next(it);
214
200
        size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1;
215
216
200
        if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets &&
217
200
            values_count + next->second <= bucket_max_values) {
218
            // If the current value is the last in the input map and there are more remaining
219
            // distinct values than empty buckets and adding the value does not cause the bucket
220
            // to exceed its max size, skip adding the value to the current bucket.
221
190
            continue;
222
190
        }
223
224
        // Finalize the current bucket and add it to our collection of buckets.
225
10
        auto pre_sum = cumulative_values - values_count;
226
227
10
        Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count,
228
10
                             pre_sum);
229
10
        buckets.push_back(new_bucket);
230
231
        // Reset variables for the next bucket.
232
10
        if (next != ordered_map.end()) {
233
8
            lower_value = &next->first;
234
8
        }
235
10
        values_count = 0;
236
10
        distinct_values_count = 0;
237
10
    }
238
239
2
    return true;
240
4
}
_ZN5doris15build_histogramIfEEbRSt6vectorINS_6BucketIT_EESaIS4_EERKSt3mapIS3_mSt4lessIS3_ESaISt4pairIKS3_mEEEm
Line
Count
Source
174
4
                     const size_t max_num_buckets) {
175
    // If the input map is empty, there is nothing to build.
176
4
    if (ordered_map.empty()) {
177
2
        return false;
178
2
    }
179
180
    // Calculate the maximum number of values that can be assigned to each bucket.
181
2
    auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets);
182
183
    // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional
184
    // allocations when inserting buckets.
185
2
    buckets.clear();
186
2
    buckets.reserve(max_num_buckets);
187
188
    // Initialize bucket variables.
189
2
    size_t distinct_values_count = 0;
190
2
    size_t values_count = 0;
191
2
    size_t cumulative_values = 0;
192
193
    // Record how many values still need to be assigned.
194
2
    auto remaining_distinct_values = ordered_map.size();
195
196
2
    auto it = ordered_map.begin();
197
198
    // Lower value of the current bucket.
199
2
    const T* lower_value = &it->first;
200
201
    // Iterate over the ordered map of distinct values and their counts.
202
202
    for (; it != ordered_map.end(); ++it) {
203
200
        const auto count = it->second;
204
200
        const auto current_value = it->first;
205
206
        // Update the bucket counts and track the number of distinct values assigned.
207
200
        distinct_values_count++;
208
200
        remaining_distinct_values--;
209
200
        values_count += count;
210
200
        cumulative_values += count;
211
212
        // Check whether the current value should be added to the current bucket.
213
200
        auto next = std::next(it);
214
200
        size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1;
215
216
200
        if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets &&
217
200
            values_count + next->second <= bucket_max_values) {
218
            // If the current value is the last in the input map and there are more remaining
219
            // distinct values than empty buckets and adding the value does not cause the bucket
220
            // to exceed its max size, skip adding the value to the current bucket.
221
190
            continue;
222
190
        }
223
224
        // Finalize the current bucket and add it to our collection of buckets.
225
10
        auto pre_sum = cumulative_values - values_count;
226
227
10
        Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count,
228
10
                             pre_sum);
229
10
        buckets.push_back(new_bucket);
230
231
        // Reset variables for the next bucket.
232
10
        if (next != ordered_map.end()) {
233
8
            lower_value = &next->first;
234
8
        }
235
10
        values_count = 0;
236
10
        distinct_values_count = 0;
237
10
    }
238
239
2
    return true;
240
4
}
_ZN5doris15build_histogramIdEEbRSt6vectorINS_6BucketIT_EESaIS4_EERKSt3mapIS3_mSt4lessIS3_ESaISt4pairIKS3_mEEEm
Line
Count
Source
174
4
                     const size_t max_num_buckets) {
175
    // If the input map is empty, there is nothing to build.
176
4
    if (ordered_map.empty()) {
177
2
        return false;
178
2
    }
179
180
    // Calculate the maximum number of values that can be assigned to each bucket.
181
2
    auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets);
182
183
    // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional
184
    // allocations when inserting buckets.
185
2
    buckets.clear();
186
2
    buckets.reserve(max_num_buckets);
187
188
    // Initialize bucket variables.
189
2
    size_t distinct_values_count = 0;
190
2
    size_t values_count = 0;
191
2
    size_t cumulative_values = 0;
192
193
    // Record how many values still need to be assigned.
194
2
    auto remaining_distinct_values = ordered_map.size();
195
196
2
    auto it = ordered_map.begin();
197
198
    // Lower value of the current bucket.
199
2
    const T* lower_value = &it->first;
200
201
    // Iterate over the ordered map of distinct values and their counts.
202
202
    for (; it != ordered_map.end(); ++it) {
203
200
        const auto count = it->second;
204
200
        const auto current_value = it->first;
205
206
        // Update the bucket counts and track the number of distinct values assigned.
207
200
        distinct_values_count++;
208
200
        remaining_distinct_values--;
209
200
        values_count += count;
210
200
        cumulative_values += count;
211
212
        // Check whether the current value should be added to the current bucket.
213
200
        auto next = std::next(it);
214
200
        size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1;
215
216
200
        if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets &&
217
200
            values_count + next->second <= bucket_max_values) {
218
            // If the current value is the last in the input map and there are more remaining
219
            // distinct values than empty buckets and adding the value does not cause the bucket
220
            // to exceed its max size, skip adding the value to the current bucket.
221
190
            continue;
222
190
        }
223
224
        // Finalize the current bucket and add it to our collection of buckets.
225
10
        auto pre_sum = cumulative_values - values_count;
226
227
10
        Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count,
228
10
                             pre_sum);
229
10
        buckets.push_back(new_bucket);
230
231
        // Reset variables for the next bucket.
232
10
        if (next != ordered_map.end()) {
233
8
            lower_value = &next->first;
234
8
        }
235
10
        values_count = 0;
236
10
        distinct_values_count = 0;
237
10
    }
238
239
2
    return true;
240
4
}
Unexecuted instantiation: _ZN5doris15build_histogramINS_7DecimalIiEEEEbRSt6vectorINS_6BucketIT_EESaIS6_EERKSt3mapIS5_mSt4lessIS5_ESaISt4pairIKS5_mEEEm
Unexecuted instantiation: _ZN5doris15build_histogramINS_7DecimalIlEEEEbRSt6vectorINS_6BucketIT_EESaIS6_EERKSt3mapIS5_mSt4lessIS5_ESaISt4pairIKS5_mEEEm
Unexecuted instantiation: _ZN5doris15build_histogramINS_12Decimal128V3EEEbRSt6vectorINS_6BucketIT_EESaIS5_EERKSt3mapIS4_mSt4lessIS4_ESaISt4pairIKS4_mEEEm
Unexecuted instantiation: _ZN5doris15build_histogramINS_7DecimalIN4wide7integerILm256EiEEEEEEbRSt6vectorINS_6BucketIT_EESaIS9_EERKSt3mapIS8_mSt4lessIS8_ESaISt4pairIKS8_mEEEm
_ZN5doris15build_histogramINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEEbRSt6vectorINS_6BucketIT_EESaISA_EERKSt3mapIS9_mSt4lessIS9_ESaISt4pairIKS9_mEEEm
Line
Count
Source
174
4
                     const size_t max_num_buckets) {
175
    // If the input map is empty, there is nothing to build.
176
4
    if (ordered_map.empty()) {
177
2
        return false;
178
2
    }
179
180
    // Calculate the maximum number of values that can be assigned to each bucket.
181
2
    auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets);
182
183
    // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional
184
    // allocations when inserting buckets.
185
2
    buckets.clear();
186
2
    buckets.reserve(max_num_buckets);
187
188
    // Initialize bucket variables.
189
2
    size_t distinct_values_count = 0;
190
2
    size_t values_count = 0;
191
2
    size_t cumulative_values = 0;
192
193
    // Record how many values still need to be assigned.
194
2
    auto remaining_distinct_values = ordered_map.size();
195
196
2
    auto it = ordered_map.begin();
197
198
    // Lower value of the current bucket.
199
2
    const T* lower_value = &it->first;
200
201
    // Iterate over the ordered map of distinct values and their counts.
202
2.00k
    for (; it != ordered_map.end(); ++it) {
203
2.00k
        const auto count = it->second;
204
2.00k
        const auto current_value = it->first;
205
206
        // Update the bucket counts and track the number of distinct values assigned.
207
2.00k
        distinct_values_count++;
208
2.00k
        remaining_distinct_values--;
209
2.00k
        values_count += count;
210
2.00k
        cumulative_values += count;
211
212
        // Check whether the current value should be added to the current bucket.
213
2.00k
        auto next = std::next(it);
214
2.00k
        size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1;
215
216
2.00k
        if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets &&
217
2.00k
            values_count + next->second <= bucket_max_values) {
218
            // If the current value is the last in the input map and there are more remaining
219
            // distinct values than empty buckets and adding the value does not cause the bucket
220
            // to exceed its max size, skip adding the value to the current bucket.
221
1.99k
            continue;
222
1.99k
        }
223
224
        // Finalize the current bucket and add it to our collection of buckets.
225
10
        auto pre_sum = cumulative_values - values_count;
226
227
10
        Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count,
228
10
                             pre_sum);
229
10
        buckets.push_back(new_bucket);
230
231
        // Reset variables for the next bucket.
232
10
        if (next != ordered_map.end()) {
233
8
            lower_value = &next->first;
234
8
        }
235
10
        values_count = 0;
236
10
        distinct_values_count = 0;
237
10
    }
238
239
2
    return true;
240
4
}
_ZN5doris15build_histogramINS_11DateV2ValueINS_15DateV2ValueTypeEEEEEbRSt6vectorINS_6BucketIT_EESaIS7_EERKSt3mapIS6_mSt4lessIS6_ESaISt4pairIKS6_mEEEm
Line
Count
Source
174
2
                     const size_t max_num_buckets) {
175
    // If the input map is empty, there is nothing to build.
176
2
    if (ordered_map.empty()) {
177
0
        return false;
178
0
    }
179
180
    // Calculate the maximum number of values that can be assigned to each bucket.
181
2
    auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets);
182
183
    // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional
184
    // allocations when inserting buckets.
185
2
    buckets.clear();
186
2
    buckets.reserve(max_num_buckets);
187
188
    // Initialize bucket variables.
189
2
    size_t distinct_values_count = 0;
190
2
    size_t values_count = 0;
191
2
    size_t cumulative_values = 0;
192
193
    // Record how many values still need to be assigned.
194
2
    auto remaining_distinct_values = ordered_map.size();
195
196
2
    auto it = ordered_map.begin();
197
198
    // Lower value of the current bucket.
199
2
    const T* lower_value = &it->first;
200
201
    // Iterate over the ordered map of distinct values and their counts.
202
202
    for (; it != ordered_map.end(); ++it) {
203
200
        const auto count = it->second;
204
200
        const auto current_value = it->first;
205
206
        // Update the bucket counts and track the number of distinct values assigned.
207
200
        distinct_values_count++;
208
200
        remaining_distinct_values--;
209
200
        values_count += count;
210
200
        cumulative_values += count;
211
212
        // Check whether the current value should be added to the current bucket.
213
200
        auto next = std::next(it);
214
200
        size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1;
215
216
200
        if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets &&
217
200
            values_count + next->second <= bucket_max_values) {
218
            // If the current value is the last in the input map and there are more remaining
219
            // distinct values than empty buckets and adding the value does not cause the bucket
220
            // to exceed its max size, skip adding the value to the current bucket.
221
190
            continue;
222
190
        }
223
224
        // Finalize the current bucket and add it to our collection of buckets.
225
10
        auto pre_sum = cumulative_values - values_count;
226
227
10
        Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count,
228
10
                             pre_sum);
229
10
        buckets.push_back(new_bucket);
230
231
        // Reset variables for the next bucket.
232
10
        if (next != ordered_map.end()) {
233
8
            lower_value = &next->first;
234
8
        }
235
10
        values_count = 0;
236
10
        distinct_values_count = 0;
237
10
    }
238
239
2
    return true;
240
2
}
_ZN5doris15build_histogramINS_11DateV2ValueINS_19DateTimeV2ValueTypeEEEEEbRSt6vectorINS_6BucketIT_EESaIS7_EERKSt3mapIS6_mSt4lessIS6_ESaISt4pairIKS6_mEEEm
Line
Count
Source
174
2
                     const size_t max_num_buckets) {
175
    // If the input map is empty, there is nothing to build.
176
2
    if (ordered_map.empty()) {
177
0
        return false;
178
0
    }
179
180
    // Calculate the maximum number of values that can be assigned to each bucket.
181
2
    auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets);
182
183
    // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional
184
    // allocations when inserting buckets.
185
2
    buckets.clear();
186
2
    buckets.reserve(max_num_buckets);
187
188
    // Initialize bucket variables.
189
2
    size_t distinct_values_count = 0;
190
2
    size_t values_count = 0;
191
2
    size_t cumulative_values = 0;
192
193
    // Record how many values still need to be assigned.
194
2
    auto remaining_distinct_values = ordered_map.size();
195
196
2
    auto it = ordered_map.begin();
197
198
    // Lower value of the current bucket.
199
2
    const T* lower_value = &it->first;
200
201
    // Iterate over the ordered map of distinct values and their counts.
202
202
    for (; it != ordered_map.end(); ++it) {
203
200
        const auto count = it->second;
204
200
        const auto current_value = it->first;
205
206
        // Update the bucket counts and track the number of distinct values assigned.
207
200
        distinct_values_count++;
208
200
        remaining_distinct_values--;
209
200
        values_count += count;
210
200
        cumulative_values += count;
211
212
        // Check whether the current value should be added to the current bucket.
213
200
        auto next = std::next(it);
214
200
        size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1;
215
216
200
        if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets &&
217
200
            values_count + next->second <= bucket_max_values) {
218
            // If the current value is the last in the input map and there are more remaining
219
            // distinct values than empty buckets and adding the value does not cause the bucket
220
            // to exceed its max size, skip adding the value to the current bucket.
221
190
            continue;
222
190
        }
223
224
        // Finalize the current bucket and add it to our collection of buckets.
225
10
        auto pre_sum = cumulative_values - values_count;
226
227
10
        Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count,
228
10
                             pre_sum);
229
10
        buckets.push_back(new_bucket);
230
231
        // Reset variables for the next bucket.
232
10
        if (next != ordered_map.end()) {
233
8
            lower_value = &next->first;
234
8
        }
235
10
        values_count = 0;
236
10
        distinct_values_count = 0;
237
10
    }
238
239
2
    return true;
240
2
}
241
242
template <typename T>
243
bool histogram_to_json(rapidjson::StringBuffer& buffer, const std::vector<Bucket<T>>& buckets,
244
38
                       const DataTypePtr& data_type) {
245
38
    rapidjson::Document doc;
246
38
    doc.SetObject();
247
38
    rapidjson::Document::AllocatorType& allocator = doc.GetAllocator();
248
249
38
    int num_buckets = cast_set<int>(buckets.size());
250
38
    doc.AddMember("num_buckets", num_buckets, allocator);
251
252
38
    rapidjson::Value bucket_arr(rapidjson::kArrayType);
253
38
    bucket_arr.Reserve(num_buckets, allocator);
254
255
38
    std::stringstream ss1;
256
38
    std::stringstream ss2;
257
258
38
    rapidjson::Value lower_val;
259
38
    rapidjson::Value upper_val;
260
261
    // Convert bucket's lower and upper to 2 columns
262
38
    MutableColumnPtr lower_column = data_type->create_column();
263
38
    MutableColumnPtr upper_column = data_type->create_column();
264
102
    for (const auto& bucket : buckets) {
265
        // String type is different, it has to pass in length
266
        // if it is string type , directly use string value
267
102
        if constexpr (!std::is_same_v<T, std::string>) {
268
92
            lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0);
269
92
            upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0);
270
92
        }
271
102
    }
272
38
    size_t row_num = 0;
273
274
38
    auto format_options = DataTypeSerDe::get_default_format_options();
275
38
    auto time_zone = cctz::utc_time_zone();
276
38
    format_options.timezone = &time_zone;
277
278
102
    for (const auto& bucket : buckets) {
279
102
        if constexpr (std::is_same_v<T, std::string>) {
280
10
            lower_val.SetString(bucket.lower.data(),
281
10
                                static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator);
282
10
            upper_val.SetString(bucket.upper.data(),
283
10
                                static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator);
284
92
        } else {
285
92
            std::string lower_str = data_type->to_string(*lower_column, row_num, format_options);
286
92
            std::string upper_str = data_type->to_string(*upper_column, row_num, format_options);
287
92
            ++row_num;
288
92
            lower_val.SetString(lower_str.data(),
289
92
                                static_cast<rapidjson::SizeType>(lower_str.size()), allocator);
290
92
            upper_val.SetString(upper_str.data(),
291
92
                                static_cast<rapidjson::SizeType>(upper_str.size()), allocator);
292
92
        }
293
102
        rapidjson::Value bucket_json(rapidjson::kObjectType);
294
102
        bucket_json.AddMember("lower", lower_val, allocator);
295
102
        bucket_json.AddMember("upper", upper_val, allocator);
296
102
        bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator);
297
102
        bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator);
298
102
        bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator);
299
300
102
        bucket_arr.PushBack(bucket_json, allocator);
301
102
    }
302
303
38
    doc.AddMember("buckets", bucket_arr, allocator);
304
38
    rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
305
38
    doc.Accept(writer);
306
307
38
    return !buckets.empty() && buffer.GetSize() > 0;
308
38
}
_ZN5doris17histogram_to_jsonIiEEbRN9rapidjson19GenericStringBufferINS1_4UTF8IcEENS1_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISB_EERKSt10shared_ptrIKNS_9IDataTypeEE
Line
Count
Source
244
6
                       const DataTypePtr& data_type) {
245
6
    rapidjson::Document doc;
246
6
    doc.SetObject();
247
6
    rapidjson::Document::AllocatorType& allocator = doc.GetAllocator();
248
249
6
    int num_buckets = cast_set<int>(buckets.size());
250
6
    doc.AddMember("num_buckets", num_buckets, allocator);
251
252
6
    rapidjson::Value bucket_arr(rapidjson::kArrayType);
253
6
    bucket_arr.Reserve(num_buckets, allocator);
254
255
6
    std::stringstream ss1;
256
6
    std::stringstream ss2;
257
258
6
    rapidjson::Value lower_val;
259
6
    rapidjson::Value upper_val;
260
261
    // Convert bucket's lower and upper to 2 columns
262
6
    MutableColumnPtr lower_column = data_type->create_column();
263
6
    MutableColumnPtr upper_column = data_type->create_column();
264
12
    for (const auto& bucket : buckets) {
265
        // String type is different, it has to pass in length
266
        // if it is string type , directly use string value
267
12
        if constexpr (!std::is_same_v<T, std::string>) {
268
12
            lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0);
269
12
            upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0);
270
12
        }
271
12
    }
272
6
    size_t row_num = 0;
273
274
6
    auto format_options = DataTypeSerDe::get_default_format_options();
275
6
    auto time_zone = cctz::utc_time_zone();
276
6
    format_options.timezone = &time_zone;
277
278
12
    for (const auto& bucket : buckets) {
279
        if constexpr (std::is_same_v<T, std::string>) {
280
            lower_val.SetString(bucket.lower.data(),
281
                                static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator);
282
            upper_val.SetString(bucket.upper.data(),
283
                                static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator);
284
12
        } else {
285
12
            std::string lower_str = data_type->to_string(*lower_column, row_num, format_options);
286
12
            std::string upper_str = data_type->to_string(*upper_column, row_num, format_options);
287
12
            ++row_num;
288
12
            lower_val.SetString(lower_str.data(),
289
12
                                static_cast<rapidjson::SizeType>(lower_str.size()), allocator);
290
12
            upper_val.SetString(upper_str.data(),
291
12
                                static_cast<rapidjson::SizeType>(upper_str.size()), allocator);
292
12
        }
293
12
        rapidjson::Value bucket_json(rapidjson::kObjectType);
294
12
        bucket_json.AddMember("lower", lower_val, allocator);
295
12
        bucket_json.AddMember("upper", upper_val, allocator);
296
12
        bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator);
297
12
        bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator);
298
12
        bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator);
299
300
12
        bucket_arr.PushBack(bucket_json, allocator);
301
12
    }
302
303
6
    doc.AddMember("buckets", bucket_arr, allocator);
304
6
    rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
305
6
    doc.Accept(writer);
306
307
6
    return !buckets.empty() && buffer.GetSize() > 0;
308
6
}
Unexecuted instantiation: _ZN5doris17histogram_to_jsonIhEEbRN9rapidjson19GenericStringBufferINS1_4UTF8IcEENS1_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISB_EERKSt10shared_ptrIKNS_9IDataTypeEE
_ZN5doris17histogram_to_jsonIaEEbRN9rapidjson19GenericStringBufferINS1_4UTF8IcEENS1_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISB_EERKSt10shared_ptrIKNS_9IDataTypeEE
Line
Count
Source
244
4
                       const DataTypePtr& data_type) {
245
4
    rapidjson::Document doc;
246
4
    doc.SetObject();
247
4
    rapidjson::Document::AllocatorType& allocator = doc.GetAllocator();
248
249
4
    int num_buckets = cast_set<int>(buckets.size());
250
4
    doc.AddMember("num_buckets", num_buckets, allocator);
251
252
4
    rapidjson::Value bucket_arr(rapidjson::kArrayType);
253
4
    bucket_arr.Reserve(num_buckets, allocator);
254
255
4
    std::stringstream ss1;
256
4
    std::stringstream ss2;
257
258
4
    rapidjson::Value lower_val;
259
4
    rapidjson::Value upper_val;
260
261
    // Convert bucket's lower and upper to 2 columns
262
4
    MutableColumnPtr lower_column = data_type->create_column();
263
4
    MutableColumnPtr upper_column = data_type->create_column();
264
10
    for (const auto& bucket : buckets) {
265
        // String type is different, it has to pass in length
266
        // if it is string type , directly use string value
267
10
        if constexpr (!std::is_same_v<T, std::string>) {
268
10
            lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0);
269
10
            upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0);
270
10
        }
271
10
    }
272
4
    size_t row_num = 0;
273
274
4
    auto format_options = DataTypeSerDe::get_default_format_options();
275
4
    auto time_zone = cctz::utc_time_zone();
276
4
    format_options.timezone = &time_zone;
277
278
10
    for (const auto& bucket : buckets) {
279
        if constexpr (std::is_same_v<T, std::string>) {
280
            lower_val.SetString(bucket.lower.data(),
281
                                static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator);
282
            upper_val.SetString(bucket.upper.data(),
283
                                static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator);
284
10
        } else {
285
10
            std::string lower_str = data_type->to_string(*lower_column, row_num, format_options);
286
10
            std::string upper_str = data_type->to_string(*upper_column, row_num, format_options);
287
10
            ++row_num;
288
10
            lower_val.SetString(lower_str.data(),
289
10
                                static_cast<rapidjson::SizeType>(lower_str.size()), allocator);
290
10
            upper_val.SetString(upper_str.data(),
291
10
                                static_cast<rapidjson::SizeType>(upper_str.size()), allocator);
292
10
        }
293
10
        rapidjson::Value bucket_json(rapidjson::kObjectType);
294
10
        bucket_json.AddMember("lower", lower_val, allocator);
295
10
        bucket_json.AddMember("upper", upper_val, allocator);
296
10
        bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator);
297
10
        bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator);
298
10
        bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator);
299
300
10
        bucket_arr.PushBack(bucket_json, allocator);
301
10
    }
302
303
4
    doc.AddMember("buckets", bucket_arr, allocator);
304
4
    rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
305
4
    doc.Accept(writer);
306
307
4
    return !buckets.empty() && buffer.GetSize() > 0;
308
4
}
_ZN5doris17histogram_to_jsonIsEEbRN9rapidjson19GenericStringBufferINS1_4UTF8IcEENS1_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISB_EERKSt10shared_ptrIKNS_9IDataTypeEE
Line
Count
Source
244
4
                       const DataTypePtr& data_type) {
245
4
    rapidjson::Document doc;
246
4
    doc.SetObject();
247
4
    rapidjson::Document::AllocatorType& allocator = doc.GetAllocator();
248
249
4
    int num_buckets = cast_set<int>(buckets.size());
250
4
    doc.AddMember("num_buckets", num_buckets, allocator);
251
252
4
    rapidjson::Value bucket_arr(rapidjson::kArrayType);
253
4
    bucket_arr.Reserve(num_buckets, allocator);
254
255
4
    std::stringstream ss1;
256
4
    std::stringstream ss2;
257
258
4
    rapidjson::Value lower_val;
259
4
    rapidjson::Value upper_val;
260
261
    // Convert bucket's lower and upper to 2 columns
262
4
    MutableColumnPtr lower_column = data_type->create_column();
263
4
    MutableColumnPtr upper_column = data_type->create_column();
264
10
    for (const auto& bucket : buckets) {
265
        // String type is different, it has to pass in length
266
        // if it is string type , directly use string value
267
10
        if constexpr (!std::is_same_v<T, std::string>) {
268
10
            lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0);
269
10
            upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0);
270
10
        }
271
10
    }
272
4
    size_t row_num = 0;
273
274
4
    auto format_options = DataTypeSerDe::get_default_format_options();
275
4
    auto time_zone = cctz::utc_time_zone();
276
4
    format_options.timezone = &time_zone;
277
278
10
    for (const auto& bucket : buckets) {
279
        if constexpr (std::is_same_v<T, std::string>) {
280
            lower_val.SetString(bucket.lower.data(),
281
                                static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator);
282
            upper_val.SetString(bucket.upper.data(),
283
                                static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator);
284
10
        } else {
285
10
            std::string lower_str = data_type->to_string(*lower_column, row_num, format_options);
286
10
            std::string upper_str = data_type->to_string(*upper_column, row_num, format_options);
287
10
            ++row_num;
288
10
            lower_val.SetString(lower_str.data(),
289
10
                                static_cast<rapidjson::SizeType>(lower_str.size()), allocator);
290
10
            upper_val.SetString(upper_str.data(),
291
10
                                static_cast<rapidjson::SizeType>(upper_str.size()), allocator);
292
10
        }
293
10
        rapidjson::Value bucket_json(rapidjson::kObjectType);
294
10
        bucket_json.AddMember("lower", lower_val, allocator);
295
10
        bucket_json.AddMember("upper", upper_val, allocator);
296
10
        bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator);
297
10
        bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator);
298
10
        bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator);
299
300
10
        bucket_arr.PushBack(bucket_json, allocator);
301
10
    }
302
303
4
    doc.AddMember("buckets", bucket_arr, allocator);
304
4
    rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
305
4
    doc.Accept(writer);
306
307
4
    return !buckets.empty() && buffer.GetSize() > 0;
308
4
}
_ZN5doris17histogram_to_jsonIlEEbRN9rapidjson19GenericStringBufferINS1_4UTF8IcEENS1_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISB_EERKSt10shared_ptrIKNS_9IDataTypeEE
Line
Count
Source
244
4
                       const DataTypePtr& data_type) {
245
4
    rapidjson::Document doc;
246
4
    doc.SetObject();
247
4
    rapidjson::Document::AllocatorType& allocator = doc.GetAllocator();
248
249
4
    int num_buckets = cast_set<int>(buckets.size());
250
4
    doc.AddMember("num_buckets", num_buckets, allocator);
251
252
4
    rapidjson::Value bucket_arr(rapidjson::kArrayType);
253
4
    bucket_arr.Reserve(num_buckets, allocator);
254
255
4
    std::stringstream ss1;
256
4
    std::stringstream ss2;
257
258
4
    rapidjson::Value lower_val;
259
4
    rapidjson::Value upper_val;
260
261
    // Convert bucket's lower and upper to 2 columns
262
4
    MutableColumnPtr lower_column = data_type->create_column();
263
4
    MutableColumnPtr upper_column = data_type->create_column();
264
10
    for (const auto& bucket : buckets) {
265
        // String type is different, it has to pass in length
266
        // if it is string type , directly use string value
267
10
        if constexpr (!std::is_same_v<T, std::string>) {
268
10
            lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0);
269
10
            upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0);
270
10
        }
271
10
    }
272
4
    size_t row_num = 0;
273
274
4
    auto format_options = DataTypeSerDe::get_default_format_options();
275
4
    auto time_zone = cctz::utc_time_zone();
276
4
    format_options.timezone = &time_zone;
277
278
10
    for (const auto& bucket : buckets) {
279
        if constexpr (std::is_same_v<T, std::string>) {
280
            lower_val.SetString(bucket.lower.data(),
281
                                static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator);
282
            upper_val.SetString(bucket.upper.data(),
283
                                static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator);
284
10
        } else {
285
10
            std::string lower_str = data_type->to_string(*lower_column, row_num, format_options);
286
10
            std::string upper_str = data_type->to_string(*upper_column, row_num, format_options);
287
10
            ++row_num;
288
10
            lower_val.SetString(lower_str.data(),
289
10
                                static_cast<rapidjson::SizeType>(lower_str.size()), allocator);
290
10
            upper_val.SetString(upper_str.data(),
291
10
                                static_cast<rapidjson::SizeType>(upper_str.size()), allocator);
292
10
        }
293
10
        rapidjson::Value bucket_json(rapidjson::kObjectType);
294
10
        bucket_json.AddMember("lower", lower_val, allocator);
295
10
        bucket_json.AddMember("upper", upper_val, allocator);
296
10
        bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator);
297
10
        bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator);
298
10
        bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator);
299
300
10
        bucket_arr.PushBack(bucket_json, allocator);
301
10
    }
302
303
4
    doc.AddMember("buckets", bucket_arr, allocator);
304
4
    rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
305
4
    doc.Accept(writer);
306
307
4
    return !buckets.empty() && buffer.GetSize() > 0;
308
4
}
_ZN5doris17histogram_to_jsonInEEbRN9rapidjson19GenericStringBufferINS1_4UTF8IcEENS1_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISB_EERKSt10shared_ptrIKNS_9IDataTypeEE
Line
Count
Source
244
4
                       const DataTypePtr& data_type) {
245
4
    rapidjson::Document doc;
246
4
    doc.SetObject();
247
4
    rapidjson::Document::AllocatorType& allocator = doc.GetAllocator();
248
249
4
    int num_buckets = cast_set<int>(buckets.size());
250
4
    doc.AddMember("num_buckets", num_buckets, allocator);
251
252
4
    rapidjson::Value bucket_arr(rapidjson::kArrayType);
253
4
    bucket_arr.Reserve(num_buckets, allocator);
254
255
4
    std::stringstream ss1;
256
4
    std::stringstream ss2;
257
258
4
    rapidjson::Value lower_val;
259
4
    rapidjson::Value upper_val;
260
261
    // Convert bucket's lower and upper to 2 columns
262
4
    MutableColumnPtr lower_column = data_type->create_column();
263
4
    MutableColumnPtr upper_column = data_type->create_column();
264
10
    for (const auto& bucket : buckets) {
265
        // String type is different, it has to pass in length
266
        // if it is string type , directly use string value
267
10
        if constexpr (!std::is_same_v<T, std::string>) {
268
10
            lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0);
269
10
            upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0);
270
10
        }
271
10
    }
272
4
    size_t row_num = 0;
273
274
4
    auto format_options = DataTypeSerDe::get_default_format_options();
275
4
    auto time_zone = cctz::utc_time_zone();
276
4
    format_options.timezone = &time_zone;
277
278
10
    for (const auto& bucket : buckets) {
279
        if constexpr (std::is_same_v<T, std::string>) {
280
            lower_val.SetString(bucket.lower.data(),
281
                                static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator);
282
            upper_val.SetString(bucket.upper.data(),
283
                                static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator);
284
10
        } else {
285
10
            std::string lower_str = data_type->to_string(*lower_column, row_num, format_options);
286
10
            std::string upper_str = data_type->to_string(*upper_column, row_num, format_options);
287
10
            ++row_num;
288
10
            lower_val.SetString(lower_str.data(),
289
10
                                static_cast<rapidjson::SizeType>(lower_str.size()), allocator);
290
10
            upper_val.SetString(upper_str.data(),
291
10
                                static_cast<rapidjson::SizeType>(upper_str.size()), allocator);
292
10
        }
293
10
        rapidjson::Value bucket_json(rapidjson::kObjectType);
294
10
        bucket_json.AddMember("lower", lower_val, allocator);
295
10
        bucket_json.AddMember("upper", upper_val, allocator);
296
10
        bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator);
297
10
        bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator);
298
10
        bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator);
299
300
10
        bucket_arr.PushBack(bucket_json, allocator);
301
10
    }
302
303
4
    doc.AddMember("buckets", bucket_arr, allocator);
304
4
    rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
305
4
    doc.Accept(writer);
306
307
4
    return !buckets.empty() && buffer.GetSize() > 0;
308
4
}
_ZN5doris17histogram_to_jsonIfEEbRN9rapidjson19GenericStringBufferINS1_4UTF8IcEENS1_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISB_EERKSt10shared_ptrIKNS_9IDataTypeEE
Line
Count
Source
244
4
                       const DataTypePtr& data_type) {
245
4
    rapidjson::Document doc;
246
4
    doc.SetObject();
247
4
    rapidjson::Document::AllocatorType& allocator = doc.GetAllocator();
248
249
4
    int num_buckets = cast_set<int>(buckets.size());
250
4
    doc.AddMember("num_buckets", num_buckets, allocator);
251
252
4
    rapidjson::Value bucket_arr(rapidjson::kArrayType);
253
4
    bucket_arr.Reserve(num_buckets, allocator);
254
255
4
    std::stringstream ss1;
256
4
    std::stringstream ss2;
257
258
4
    rapidjson::Value lower_val;
259
4
    rapidjson::Value upper_val;
260
261
    // Convert bucket's lower and upper to 2 columns
262
4
    MutableColumnPtr lower_column = data_type->create_column();
263
4
    MutableColumnPtr upper_column = data_type->create_column();
264
10
    for (const auto& bucket : buckets) {
265
        // String type is different, it has to pass in length
266
        // if it is string type , directly use string value
267
10
        if constexpr (!std::is_same_v<T, std::string>) {
268
10
            lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0);
269
10
            upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0);
270
10
        }
271
10
    }
272
4
    size_t row_num = 0;
273
274
4
    auto format_options = DataTypeSerDe::get_default_format_options();
275
4
    auto time_zone = cctz::utc_time_zone();
276
4
    format_options.timezone = &time_zone;
277
278
10
    for (const auto& bucket : buckets) {
279
        if constexpr (std::is_same_v<T, std::string>) {
280
            lower_val.SetString(bucket.lower.data(),
281
                                static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator);
282
            upper_val.SetString(bucket.upper.data(),
283
                                static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator);
284
10
        } else {
285
10
            std::string lower_str = data_type->to_string(*lower_column, row_num, format_options);
286
10
            std::string upper_str = data_type->to_string(*upper_column, row_num, format_options);
287
10
            ++row_num;
288
10
            lower_val.SetString(lower_str.data(),
289
10
                                static_cast<rapidjson::SizeType>(lower_str.size()), allocator);
290
10
            upper_val.SetString(upper_str.data(),
291
10
                                static_cast<rapidjson::SizeType>(upper_str.size()), allocator);
292
10
        }
293
10
        rapidjson::Value bucket_json(rapidjson::kObjectType);
294
10
        bucket_json.AddMember("lower", lower_val, allocator);
295
10
        bucket_json.AddMember("upper", upper_val, allocator);
296
10
        bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator);
297
10
        bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator);
298
10
        bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator);
299
300
10
        bucket_arr.PushBack(bucket_json, allocator);
301
10
    }
302
303
4
    doc.AddMember("buckets", bucket_arr, allocator);
304
4
    rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
305
4
    doc.Accept(writer);
306
307
4
    return !buckets.empty() && buffer.GetSize() > 0;
308
4
}
_ZN5doris17histogram_to_jsonIdEEbRN9rapidjson19GenericStringBufferINS1_4UTF8IcEENS1_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISB_EERKSt10shared_ptrIKNS_9IDataTypeEE
Line
Count
Source
244
4
                       const DataTypePtr& data_type) {
245
4
    rapidjson::Document doc;
246
4
    doc.SetObject();
247
4
    rapidjson::Document::AllocatorType& allocator = doc.GetAllocator();
248
249
4
    int num_buckets = cast_set<int>(buckets.size());
250
4
    doc.AddMember("num_buckets", num_buckets, allocator);
251
252
4
    rapidjson::Value bucket_arr(rapidjson::kArrayType);
253
4
    bucket_arr.Reserve(num_buckets, allocator);
254
255
4
    std::stringstream ss1;
256
4
    std::stringstream ss2;
257
258
4
    rapidjson::Value lower_val;
259
4
    rapidjson::Value upper_val;
260
261
    // Convert bucket's lower and upper to 2 columns
262
4
    MutableColumnPtr lower_column = data_type->create_column();
263
4
    MutableColumnPtr upper_column = data_type->create_column();
264
10
    for (const auto& bucket : buckets) {
265
        // String type is different, it has to pass in length
266
        // if it is string type , directly use string value
267
10
        if constexpr (!std::is_same_v<T, std::string>) {
268
10
            lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0);
269
10
            upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0);
270
10
        }
271
10
    }
272
4
    size_t row_num = 0;
273
274
4
    auto format_options = DataTypeSerDe::get_default_format_options();
275
4
    auto time_zone = cctz::utc_time_zone();
276
4
    format_options.timezone = &time_zone;
277
278
10
    for (const auto& bucket : buckets) {
279
        if constexpr (std::is_same_v<T, std::string>) {
280
            lower_val.SetString(bucket.lower.data(),
281
                                static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator);
282
            upper_val.SetString(bucket.upper.data(),
283
                                static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator);
284
10
        } else {
285
10
            std::string lower_str = data_type->to_string(*lower_column, row_num, format_options);
286
10
            std::string upper_str = data_type->to_string(*upper_column, row_num, format_options);
287
10
            ++row_num;
288
10
            lower_val.SetString(lower_str.data(),
289
10
                                static_cast<rapidjson::SizeType>(lower_str.size()), allocator);
290
10
            upper_val.SetString(upper_str.data(),
291
10
                                static_cast<rapidjson::SizeType>(upper_str.size()), allocator);
292
10
        }
293
10
        rapidjson::Value bucket_json(rapidjson::kObjectType);
294
10
        bucket_json.AddMember("lower", lower_val, allocator);
295
10
        bucket_json.AddMember("upper", upper_val, allocator);
296
10
        bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator);
297
10
        bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator);
298
10
        bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator);
299
300
10
        bucket_arr.PushBack(bucket_json, allocator);
301
10
    }
302
303
4
    doc.AddMember("buckets", bucket_arr, allocator);
304
4
    rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
305
4
    doc.Accept(writer);
306
307
4
    return !buckets.empty() && buffer.GetSize() > 0;
308
4
}
Unexecuted instantiation: _ZN5doris17histogram_to_jsonINS_7DecimalIiEEEEbRN9rapidjson19GenericStringBufferINS3_4UTF8IcEENS3_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISD_EERKSt10shared_ptrIKNS_9IDataTypeEE
Unexecuted instantiation: _ZN5doris17histogram_to_jsonINS_7DecimalIlEEEEbRN9rapidjson19GenericStringBufferINS3_4UTF8IcEENS3_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISD_EERKSt10shared_ptrIKNS_9IDataTypeEE
Unexecuted instantiation: _ZN5doris17histogram_to_jsonINS_12Decimal128V3EEEbRN9rapidjson19GenericStringBufferINS2_4UTF8IcEENS2_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISC_EERKSt10shared_ptrIKNS_9IDataTypeEE
Unexecuted instantiation: _ZN5doris17histogram_to_jsonINS_7DecimalIN4wide7integerILm256EiEEEEEEbRN9rapidjson19GenericStringBufferINS6_4UTF8IcEENS6_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISG_EERKSt10shared_ptrIKNS_9IDataTypeEE
_ZN5doris17histogram_to_jsonINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEEbRN9rapidjson19GenericStringBufferINS7_4UTF8IcEENS7_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISH_EERKSt10shared_ptrIKNS_9IDataTypeEE
Line
Count
Source
244
4
                       const DataTypePtr& data_type) {
245
4
    rapidjson::Document doc;
246
4
    doc.SetObject();
247
4
    rapidjson::Document::AllocatorType& allocator = doc.GetAllocator();
248
249
4
    int num_buckets = cast_set<int>(buckets.size());
250
4
    doc.AddMember("num_buckets", num_buckets, allocator);
251
252
4
    rapidjson::Value bucket_arr(rapidjson::kArrayType);
253
4
    bucket_arr.Reserve(num_buckets, allocator);
254
255
4
    std::stringstream ss1;
256
4
    std::stringstream ss2;
257
258
4
    rapidjson::Value lower_val;
259
4
    rapidjson::Value upper_val;
260
261
    // Convert bucket's lower and upper to 2 columns
262
4
    MutableColumnPtr lower_column = data_type->create_column();
263
4
    MutableColumnPtr upper_column = data_type->create_column();
264
10
    for (const auto& bucket : buckets) {
265
        // String type is different, it has to pass in length
266
        // if it is string type , directly use string value
267
        if constexpr (!std::is_same_v<T, std::string>) {
268
            lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0);
269
            upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0);
270
        }
271
10
    }
272
4
    size_t row_num = 0;
273
274
4
    auto format_options = DataTypeSerDe::get_default_format_options();
275
4
    auto time_zone = cctz::utc_time_zone();
276
4
    format_options.timezone = &time_zone;
277
278
10
    for (const auto& bucket : buckets) {
279
10
        if constexpr (std::is_same_v<T, std::string>) {
280
10
            lower_val.SetString(bucket.lower.data(),
281
10
                                static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator);
282
10
            upper_val.SetString(bucket.upper.data(),
283
10
                                static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator);
284
        } else {
285
            std::string lower_str = data_type->to_string(*lower_column, row_num, format_options);
286
            std::string upper_str = data_type->to_string(*upper_column, row_num, format_options);
287
            ++row_num;
288
            lower_val.SetString(lower_str.data(),
289
                                static_cast<rapidjson::SizeType>(lower_str.size()), allocator);
290
            upper_val.SetString(upper_str.data(),
291
                                static_cast<rapidjson::SizeType>(upper_str.size()), allocator);
292
        }
293
10
        rapidjson::Value bucket_json(rapidjson::kObjectType);
294
10
        bucket_json.AddMember("lower", lower_val, allocator);
295
10
        bucket_json.AddMember("upper", upper_val, allocator);
296
10
        bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator);
297
10
        bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator);
298
10
        bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator);
299
300
10
        bucket_arr.PushBack(bucket_json, allocator);
301
10
    }
302
303
4
    doc.AddMember("buckets", bucket_arr, allocator);
304
4
    rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
305
4
    doc.Accept(writer);
306
307
4
    return !buckets.empty() && buffer.GetSize() > 0;
308
4
}
_ZN5doris17histogram_to_jsonINS_11DateV2ValueINS_15DateV2ValueTypeEEEEEbRN9rapidjson19GenericStringBufferINS4_4UTF8IcEENS4_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISE_EERKSt10shared_ptrIKNS_9IDataTypeEE
Line
Count
Source
244
2
                       const DataTypePtr& data_type) {
245
2
    rapidjson::Document doc;
246
2
    doc.SetObject();
247
2
    rapidjson::Document::AllocatorType& allocator = doc.GetAllocator();
248
249
2
    int num_buckets = cast_set<int>(buckets.size());
250
2
    doc.AddMember("num_buckets", num_buckets, allocator);
251
252
2
    rapidjson::Value bucket_arr(rapidjson::kArrayType);
253
2
    bucket_arr.Reserve(num_buckets, allocator);
254
255
2
    std::stringstream ss1;
256
2
    std::stringstream ss2;
257
258
2
    rapidjson::Value lower_val;
259
2
    rapidjson::Value upper_val;
260
261
    // Convert bucket's lower and upper to 2 columns
262
2
    MutableColumnPtr lower_column = data_type->create_column();
263
2
    MutableColumnPtr upper_column = data_type->create_column();
264
10
    for (const auto& bucket : buckets) {
265
        // String type is different, it has to pass in length
266
        // if it is string type , directly use string value
267
10
        if constexpr (!std::is_same_v<T, std::string>) {
268
10
            lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0);
269
10
            upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0);
270
10
        }
271
10
    }
272
2
    size_t row_num = 0;
273
274
2
    auto format_options = DataTypeSerDe::get_default_format_options();
275
2
    auto time_zone = cctz::utc_time_zone();
276
2
    format_options.timezone = &time_zone;
277
278
10
    for (const auto& bucket : buckets) {
279
        if constexpr (std::is_same_v<T, std::string>) {
280
            lower_val.SetString(bucket.lower.data(),
281
                                static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator);
282
            upper_val.SetString(bucket.upper.data(),
283
                                static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator);
284
10
        } else {
285
10
            std::string lower_str = data_type->to_string(*lower_column, row_num, format_options);
286
10
            std::string upper_str = data_type->to_string(*upper_column, row_num, format_options);
287
10
            ++row_num;
288
10
            lower_val.SetString(lower_str.data(),
289
10
                                static_cast<rapidjson::SizeType>(lower_str.size()), allocator);
290
10
            upper_val.SetString(upper_str.data(),
291
10
                                static_cast<rapidjson::SizeType>(upper_str.size()), allocator);
292
10
        }
293
10
        rapidjson::Value bucket_json(rapidjson::kObjectType);
294
10
        bucket_json.AddMember("lower", lower_val, allocator);
295
10
        bucket_json.AddMember("upper", upper_val, allocator);
296
10
        bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator);
297
10
        bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator);
298
10
        bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator);
299
300
10
        bucket_arr.PushBack(bucket_json, allocator);
301
10
    }
302
303
2
    doc.AddMember("buckets", bucket_arr, allocator);
304
2
    rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
305
2
    doc.Accept(writer);
306
307
2
    return !buckets.empty() && buffer.GetSize() > 0;
308
2
}
_ZN5doris17histogram_to_jsonINS_11DateV2ValueINS_19DateTimeV2ValueTypeEEEEEbRN9rapidjson19GenericStringBufferINS4_4UTF8IcEENS4_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISE_EERKSt10shared_ptrIKNS_9IDataTypeEE
Line
Count
Source
244
2
                       const DataTypePtr& data_type) {
245
2
    rapidjson::Document doc;
246
2
    doc.SetObject();
247
2
    rapidjson::Document::AllocatorType& allocator = doc.GetAllocator();
248
249
2
    int num_buckets = cast_set<int>(buckets.size());
250
2
    doc.AddMember("num_buckets", num_buckets, allocator);
251
252
2
    rapidjson::Value bucket_arr(rapidjson::kArrayType);
253
2
    bucket_arr.Reserve(num_buckets, allocator);
254
255
2
    std::stringstream ss1;
256
2
    std::stringstream ss2;
257
258
2
    rapidjson::Value lower_val;
259
2
    rapidjson::Value upper_val;
260
261
    // Convert bucket's lower and upper to 2 columns
262
2
    MutableColumnPtr lower_column = data_type->create_column();
263
2
    MutableColumnPtr upper_column = data_type->create_column();
264
10
    for (const auto& bucket : buckets) {
265
        // String type is different, it has to pass in length
266
        // if it is string type , directly use string value
267
10
        if constexpr (!std::is_same_v<T, std::string>) {
268
10
            lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0);
269
10
            upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0);
270
10
        }
271
10
    }
272
2
    size_t row_num = 0;
273
274
2
    auto format_options = DataTypeSerDe::get_default_format_options();
275
2
    auto time_zone = cctz::utc_time_zone();
276
2
    format_options.timezone = &time_zone;
277
278
10
    for (const auto& bucket : buckets) {
279
        if constexpr (std::is_same_v<T, std::string>) {
280
            lower_val.SetString(bucket.lower.data(),
281
                                static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator);
282
            upper_val.SetString(bucket.upper.data(),
283
                                static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator);
284
10
        } else {
285
10
            std::string lower_str = data_type->to_string(*lower_column, row_num, format_options);
286
10
            std::string upper_str = data_type->to_string(*upper_column, row_num, format_options);
287
10
            ++row_num;
288
10
            lower_val.SetString(lower_str.data(),
289
10
                                static_cast<rapidjson::SizeType>(lower_str.size()), allocator);
290
10
            upper_val.SetString(upper_str.data(),
291
10
                                static_cast<rapidjson::SizeType>(upper_str.size()), allocator);
292
10
        }
293
10
        rapidjson::Value bucket_json(rapidjson::kObjectType);
294
10
        bucket_json.AddMember("lower", lower_val, allocator);
295
10
        bucket_json.AddMember("upper", upper_val, allocator);
296
10
        bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator);
297
10
        bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator);
298
10
        bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator);
299
300
10
        bucket_arr.PushBack(bucket_json, allocator);
301
10
    }
302
303
2
    doc.AddMember("buckets", bucket_arr, allocator);
304
2
    rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
305
2
    doc.Accept(writer);
306
307
2
    return !buckets.empty() && buffer.GetSize() > 0;
308
2
}
309
} // namespace  doris