Coverage Report

Created: 2026-03-19 18:23

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exec/common/histogram_helpers.hpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <rapidjson/document.h>
21
#include <rapidjson/prettywriter.h>
22
#include <rapidjson/stringbuffer.h>
23
24
#include <boost/dynamic_bitset.hpp>
25
26
#include "common/cast_set.h"
27
#include "core/data_type/data_type_decimal.h"
28
#include "util/io_helper.h"
29
30
namespace doris {
31
#include "common/compile_check_begin.h"
32
template <typename T>
33
struct Bucket {
34
public:
35
    Bucket() = default;
36
    Bucket(T lower, T upper, size_t ndv, size_t count, size_t pre_sum)
37
232
            : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {}
Unexecuted instantiation: _ZN5doris6BucketINS_7DecimalIiEEEC2ES2_S2_mmm
Unexecuted instantiation: _ZN5doris6BucketINS_7DecimalIlEEEC2ES2_S2_mmm
Unexecuted instantiation: _ZN5doris6BucketINS_12Decimal128V3EEC2ES1_S1_mmm
Unexecuted instantiation: _ZN5doris6BucketINS_7DecimalIN4wide7integerILm256EiEEEEEC2ES5_S5_mmm
_ZN5doris6BucketINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEC2ES6_S6_mmm
Line
Count
Source
37
20
            : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {}
_ZN5doris6BucketINS_11DateV2ValueINS_15DateV2ValueTypeEEEEC2ES3_S3_mmm
Line
Count
Source
37
20
            : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {}
_ZN5doris6BucketINS_11DateV2ValueINS_19DateTimeV2ValueTypeEEEEC2ES3_S3_mmm
Line
Count
Source
37
20
            : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {}
_ZN5doris6BucketIiEC2Eiimmm
Line
Count
Source
37
52
            : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {}
Unexecuted instantiation: _ZN5doris6BucketIhEC2Ehhmmm
_ZN5doris6BucketIaEC2Eaammm
Line
Count
Source
37
20
            : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {}
_ZN5doris6BucketIsEC2Essmmm
Line
Count
Source
37
20
            : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {}
_ZN5doris6BucketIlEC2Ellmmm
Line
Count
Source
37
20
            : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {}
_ZN5doris6BucketInEC2Ennmmm
Line
Count
Source
37
20
            : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {}
_ZN5doris6BucketIfEC2Effmmm
Line
Count
Source
37
20
            : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {}
_ZN5doris6BucketIdEC2Eddmmm
Line
Count
Source
37
20
            : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {}
38
39
    T lower;
40
    T upper;
41
    size_t ndv;
42
    size_t count;
43
    size_t pre_sum;
44
};
45
46
/**
47
 * Checks if it is possible to assign the provided value_map to the given
48
 * number of buckets such that no bucket has a size larger than max_bucket_size.
49
 *
50
 * @param value_map A mapping of values to their counts.
51
 * @param max_bucket_size The maximum size that any bucket is allowed to have.
52
 * @param num_buckets The number of buckets that we want to assign values to.
53
 *
54
 * @return true if the values can be assigned to the buckets, false otherwise.
55
 */
56
template <typename T>
57
bool can_assign_into_buckets(const std::map<T, size_t>& value_map, const size_t max_bucket_size,
58
376
                             const size_t num_buckets) {
59
376
    if (value_map.empty()) {
60
2
        return false;
61
374
    };
62
63
374
    size_t used_buckets = 1;
64
374
    size_t current_bucket_size = 0;
65
66
57.9k
    for (const auto& [value, count] : value_map) {
67
57.9k
        current_bucket_size += count;
68
69
        // If adding the current value to the current bucket would exceed max_bucket_size,
70
        // then we start a new bucket.
71
57.9k
        if (current_bucket_size > max_bucket_size) {
72
1.40k
            ++used_buckets;
73
1.40k
            current_bucket_size = count;
74
1.40k
        }
75
76
        // If we have used more buckets than num_buckets, we cannot assign the values to buckets.
77
57.9k
        if (used_buckets > num_buckets) {
78
190
            return false;
79
190
        }
80
57.9k
    }
81
82
184
    return true;
83
374
}
_ZN5doris23can_assign_into_bucketsIiEEbRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEmm
Line
Count
Source
58
130
                             const size_t num_buckets) {
59
130
    if (value_map.empty()) {
60
2
        return false;
61
128
    };
62
63
128
    size_t used_buckets = 1;
64
128
    size_t current_bucket_size = 0;
65
66
2.75k
    for (const auto& [value, count] : value_map) {
67
2.75k
        current_bucket_size += count;
68
69
        // If adding the current value to the current bucket would exceed max_bucket_size,
70
        // then we start a new bucket.
71
2.75k
        if (current_bucket_size > max_bucket_size) {
72
322
            ++used_buckets;
73
322
            current_bucket_size = count;
74
322
        }
75
76
        // If we have used more buckets than num_buckets, we cannot assign the values to buckets.
77
2.75k
        if (used_buckets > num_buckets) {
78
58
            return false;
79
58
        }
80
2.75k
    }
81
82
70
    return true;
83
128
}
Unexecuted instantiation: _ZN5doris23can_assign_into_bucketsIhEEbRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEmm
_ZN5doris23can_assign_into_bucketsIaEEbRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEmm
Line
Count
Source
58
26
                             const size_t num_buckets) {
59
26
    if (value_map.empty()) {
60
0
        return false;
61
26
    };
62
63
26
    size_t used_buckets = 1;
64
26
    size_t current_bucket_size = 0;
65
66
2.38k
    for (const auto& [value, count] : value_map) {
67
2.38k
        current_bucket_size += count;
68
69
        // If adding the current value to the current bucket would exceed max_bucket_size,
70
        // then we start a new bucket.
71
2.38k
        if (current_bucket_size > max_bucket_size) {
72
114
            ++used_buckets;
73
114
            current_bucket_size = count;
74
114
        }
75
76
        // If we have used more buckets than num_buckets, we cannot assign the values to buckets.
77
2.38k
        if (used_buckets > num_buckets) {
78
14
            return false;
79
14
        }
80
2.38k
    }
81
82
12
    return true;
83
26
}
_ZN5doris23can_assign_into_bucketsIsEEbRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEmm
Line
Count
Source
58
26
                             const size_t num_buckets) {
59
26
    if (value_map.empty()) {
60
0
        return false;
61
26
    };
62
63
26
    size_t used_buckets = 1;
64
26
    size_t current_bucket_size = 0;
65
66
2.38k
    for (const auto& [value, count] : value_map) {
67
2.38k
        current_bucket_size += count;
68
69
        // If adding the current value to the current bucket would exceed max_bucket_size,
70
        // then we start a new bucket.
71
2.38k
        if (current_bucket_size > max_bucket_size) {
72
114
            ++used_buckets;
73
114
            current_bucket_size = count;
74
114
        }
75
76
        // If we have used more buckets than num_buckets, we cannot assign the values to buckets.
77
2.38k
        if (used_buckets > num_buckets) {
78
14
            return false;
79
14
        }
80
2.38k
    }
81
82
12
    return true;
83
26
}
_ZN5doris23can_assign_into_bucketsIlEEbRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEmm
Line
Count
Source
58
26
                             const size_t num_buckets) {
59
26
    if (value_map.empty()) {
60
0
        return false;
61
26
    };
62
63
26
    size_t used_buckets = 1;
64
26
    size_t current_bucket_size = 0;
65
66
2.38k
    for (const auto& [value, count] : value_map) {
67
2.38k
        current_bucket_size += count;
68
69
        // If adding the current value to the current bucket would exceed max_bucket_size,
70
        // then we start a new bucket.
71
2.38k
        if (current_bucket_size > max_bucket_size) {
72
114
            ++used_buckets;
73
114
            current_bucket_size = count;
74
114
        }
75
76
        // If we have used more buckets than num_buckets, we cannot assign the values to buckets.
77
2.38k
        if (used_buckets > num_buckets) {
78
14
            return false;
79
14
        }
80
2.38k
    }
81
82
12
    return true;
83
26
}
_ZN5doris23can_assign_into_bucketsInEEbRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEmm
Line
Count
Source
58
26
                             const size_t num_buckets) {
59
26
    if (value_map.empty()) {
60
0
        return false;
61
26
    };
62
63
26
    size_t used_buckets = 1;
64
26
    size_t current_bucket_size = 0;
65
66
2.38k
    for (const auto& [value, count] : value_map) {
67
2.38k
        current_bucket_size += count;
68
69
        // If adding the current value to the current bucket would exceed max_bucket_size,
70
        // then we start a new bucket.
71
2.38k
        if (current_bucket_size > max_bucket_size) {
72
114
            ++used_buckets;
73
114
            current_bucket_size = count;
74
114
        }
75
76
        // If we have used more buckets than num_buckets, we cannot assign the values to buckets.
77
2.38k
        if (used_buckets > num_buckets) {
78
14
            return false;
79
14
        }
80
2.38k
    }
81
82
12
    return true;
83
26
}
_ZN5doris23can_assign_into_bucketsIfEEbRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEmm
Line
Count
Source
58
26
                             const size_t num_buckets) {
59
26
    if (value_map.empty()) {
60
0
        return false;
61
26
    };
62
63
26
    size_t used_buckets = 1;
64
26
    size_t current_bucket_size = 0;
65
66
2.38k
    for (const auto& [value, count] : value_map) {
67
2.38k
        current_bucket_size += count;
68
69
        // If adding the current value to the current bucket would exceed max_bucket_size,
70
        // then we start a new bucket.
71
2.38k
        if (current_bucket_size > max_bucket_size) {
72
114
            ++used_buckets;
73
114
            current_bucket_size = count;
74
114
        }
75
76
        // If we have used more buckets than num_buckets, we cannot assign the values to buckets.
77
2.38k
        if (used_buckets > num_buckets) {
78
14
            return false;
79
14
        }
80
2.38k
    }
81
82
12
    return true;
83
26
}
_ZN5doris23can_assign_into_bucketsIdEEbRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEmm
Line
Count
Source
58
26
                             const size_t num_buckets) {
59
26
    if (value_map.empty()) {
60
0
        return false;
61
26
    };
62
63
26
    size_t used_buckets = 1;
64
26
    size_t current_bucket_size = 0;
65
66
2.38k
    for (const auto& [value, count] : value_map) {
67
2.38k
        current_bucket_size += count;
68
69
        // If adding the current value to the current bucket would exceed max_bucket_size,
70
        // then we start a new bucket.
71
2.38k
        if (current_bucket_size > max_bucket_size) {
72
114
            ++used_buckets;
73
114
            current_bucket_size = count;
74
114
        }
75
76
        // If we have used more buckets than num_buckets, we cannot assign the values to buckets.
77
2.38k
        if (used_buckets > num_buckets) {
78
14
            return false;
79
14
        }
80
2.38k
    }
81
82
12
    return true;
83
26
}
Unexecuted instantiation: _ZN5doris23can_assign_into_bucketsINS_7DecimalIiEEEEbRKSt3mapIT_mSt4lessIS4_ESaISt4pairIKS4_mEEEmm
Unexecuted instantiation: _ZN5doris23can_assign_into_bucketsINS_7DecimalIlEEEEbRKSt3mapIT_mSt4lessIS4_ESaISt4pairIKS4_mEEEmm
Unexecuted instantiation: _ZN5doris23can_assign_into_bucketsINS_12Decimal128V3EEEbRKSt3mapIT_mSt4lessIS3_ESaISt4pairIKS3_mEEEmm
Unexecuted instantiation: _ZN5doris23can_assign_into_bucketsINS_7DecimalIN4wide7integerILm256EiEEEEEEbRKSt3mapIT_mSt4lessIS7_ESaISt4pairIKS7_mEEEmm
_ZN5doris23can_assign_into_bucketsINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEEbRKSt3mapIT_mSt4lessIS8_ESaISt4pairIKS8_mEEEmm
Line
Count
Source
58
38
                             const size_t num_buckets) {
59
38
    if (value_map.empty()) {
60
0
        return false;
61
38
    };
62
63
38
    size_t used_buckets = 1;
64
38
    size_t current_bucket_size = 0;
65
66
36.1k
    for (const auto& [value, count] : value_map) {
67
36.1k
        current_bucket_size += count;
68
69
        // If adding the current value to the current bucket would exceed max_bucket_size,
70
        // then we start a new bucket.
71
36.1k
        if (current_bucket_size > max_bucket_size) {
72
168
            ++used_buckets;
73
168
            current_bucket_size = count;
74
168
        }
75
76
        // If we have used more buckets than num_buckets, we cannot assign the values to buckets.
77
36.1k
        if (used_buckets > num_buckets) {
78
20
            return false;
79
20
        }
80
36.1k
    }
81
82
18
    return true;
83
38
}
_ZN5doris23can_assign_into_bucketsINS_11DateV2ValueINS_15DateV2ValueTypeEEEEEbRKSt3mapIT_mSt4lessIS5_ESaISt4pairIKS5_mEEEmm
Line
Count
Source
58
26
                             const size_t num_buckets) {
59
26
    if (value_map.empty()) {
60
0
        return false;
61
26
    };
62
63
26
    size_t used_buckets = 1;
64
26
    size_t current_bucket_size = 0;
65
66
2.38k
    for (const auto& [value, count] : value_map) {
67
2.38k
        current_bucket_size += count;
68
69
        // If adding the current value to the current bucket would exceed max_bucket_size,
70
        // then we start a new bucket.
71
2.38k
        if (current_bucket_size > max_bucket_size) {
72
114
            ++used_buckets;
73
114
            current_bucket_size = count;
74
114
        }
75
76
        // If we have used more buckets than num_buckets, we cannot assign the values to buckets.
77
2.38k
        if (used_buckets > num_buckets) {
78
14
            return false;
79
14
        }
80
2.38k
    }
81
82
12
    return true;
83
26
}
_ZN5doris23can_assign_into_bucketsINS_11DateV2ValueINS_19DateTimeV2ValueTypeEEEEEbRKSt3mapIT_mSt4lessIS5_ESaISt4pairIKS5_mEEEmm
Line
Count
Source
58
26
                             const size_t num_buckets) {
59
26
    if (value_map.empty()) {
60
0
        return false;
61
26
    };
62
63
26
    size_t used_buckets = 1;
64
26
    size_t current_bucket_size = 0;
65
66
2.38k
    for (const auto& [value, count] : value_map) {
67
2.38k
        current_bucket_size += count;
68
69
        // If adding the current value to the current bucket would exceed max_bucket_size,
70
        // then we start a new bucket.
71
2.38k
        if (current_bucket_size > max_bucket_size) {
72
114
            ++used_buckets;
73
114
            current_bucket_size = count;
74
114
        }
75
76
        // If we have used more buckets than num_buckets, we cannot assign the values to buckets.
77
2.38k
        if (used_buckets > num_buckets) {
78
14
            return false;
79
14
        }
80
2.38k
    }
81
82
12
    return true;
83
26
}
84
85
/**
86
 * Calculates the maximum number of values that can fit into each bucket given a set of values
87
 * and the desired number of buckets.
88
 *
89
 * @tparam T the type of the values in the value map
90
 * @param value_map the map of values and their counts
91
 * @param num_buckets the desired number of buckets
92
 * @return the maximum number of values that can fit into each bucket
93
 */
94
template <typename T>
95
64
size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) {
96
    // Ensure that the value map is not empty
97
64
    assert(!value_map.empty());
98
99
    // Calculate the total number of values in the map using std::accumulate()
100
64
    size_t total_values = 0;
101
7.66k
    for (const auto& [value, count] : value_map) {
102
7.66k
        total_values += count;
103
7.66k
    }
104
105
    // If there is only one bucket, then all values will be assigned to that bucket
106
64
    if (num_buckets == 1) {
107
6
        return total_values;
108
6
    }
109
110
    // To calculate the maximum value count in each bucket, we first calculate a conservative upper
111
    // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed
112
    // the actual maximum value count, but it does not underestimate it. The subsequent binary search
113
    // algorithm will approach the actual maximum value count.
114
58
    size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1;
115
116
    // Initialize the lower bound to 0
117
58
    size_t lower_bucket_values = 0;
118
119
    // Perform a binary search to find the maximum number of values that can fit into each bucket
120
58
    int search_step = 0;
121
58
    const int max_search_steps =
122
58
            10; // Limit the number of search steps to avoid excessive iteration
123
124
406
    while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) {
125
        // Calculate the midpoint of the upper and lower bounds
126
348
        const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2;
127
128
        // Check if the given number of values can be assigned to the desired number of buckets
129
348
        if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) {
130
            // If it can, then set the upper bound to the midpoint
131
170
            upper_bucket_values = bucket_values;
132
178
        } else {
133
            // If it can't, then set the lower bound to the midpoint
134
178
            lower_bucket_values = bucket_values;
135
178
        }
136
        // Increment the search step counter
137
348
        ++search_step;
138
348
    }
139
140
58
    return upper_bucket_values;
141
64
}
_ZN5doris27calculate_bucket_max_valuesIiEEmRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEm
Line
Count
Source
95
28
size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) {
96
    // Ensure that the value map is not empty
97
28
    assert(!value_map.empty());
98
99
    // Calculate the total number of values in the map using std::accumulate()
100
28
    size_t total_values = 0;
101
466
    for (const auto& [value, count] : value_map) {
102
466
        total_values += count;
103
466
    }
104
105
    // If there is only one bucket, then all values will be assigned to that bucket
106
28
    if (num_buckets == 1) {
107
6
        return total_values;
108
6
    }
109
110
    // To calculate the maximum value count in each bucket, we first calculate a conservative upper
111
    // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed
112
    // the actual maximum value count, but it does not underestimate it. The subsequent binary search
113
    // algorithm will approach the actual maximum value count.
114
22
    size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1;
115
116
    // Initialize the lower bound to 0
117
22
    size_t lower_bucket_values = 0;
118
119
    // Perform a binary search to find the maximum number of values that can fit into each bucket
120
22
    int search_step = 0;
121
22
    const int max_search_steps =
122
22
            10; // Limit the number of search steps to avoid excessive iteration
123
124
124
    while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) {
125
        // Calculate the midpoint of the upper and lower bounds
126
102
        const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2;
127
128
        // Check if the given number of values can be assigned to the desired number of buckets
129
102
        if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) {
130
            // If it can, then set the upper bound to the midpoint
131
56
            upper_bucket_values = bucket_values;
132
56
        } else {
133
            // If it can't, then set the lower bound to the midpoint
134
46
            lower_bucket_values = bucket_values;
135
46
        }
136
        // Increment the search step counter
137
102
        ++search_step;
138
102
    }
139
140
22
    return upper_bucket_values;
141
28
}
Unexecuted instantiation: _ZN5doris27calculate_bucket_max_valuesIhEEmRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEm
_ZN5doris27calculate_bucket_max_valuesIaEEmRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEm
Line
Count
Source
95
4
size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) {
96
    // Ensure that the value map is not empty
97
4
    assert(!value_map.empty());
98
99
    // Calculate the total number of values in the map using std::accumulate()
100
4
    size_t total_values = 0;
101
400
    for (const auto& [value, count] : value_map) {
102
400
        total_values += count;
103
400
    }
104
105
    // If there is only one bucket, then all values will be assigned to that bucket
106
4
    if (num_buckets == 1) {
107
0
        return total_values;
108
0
    }
109
110
    // To calculate the maximum value count in each bucket, we first calculate a conservative upper
111
    // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed
112
    // the actual maximum value count, but it does not underestimate it. The subsequent binary search
113
    // algorithm will approach the actual maximum value count.
114
4
    size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1;
115
116
    // Initialize the lower bound to 0
117
4
    size_t lower_bucket_values = 0;
118
119
    // Perform a binary search to find the maximum number of values that can fit into each bucket
120
4
    int search_step = 0;
121
4
    const int max_search_steps =
122
4
            10; // Limit the number of search steps to avoid excessive iteration
123
124
30
    while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) {
125
        // Calculate the midpoint of the upper and lower bounds
126
26
        const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2;
127
128
        // Check if the given number of values can be assigned to the desired number of buckets
129
26
        if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) {
130
            // If it can, then set the upper bound to the midpoint
131
12
            upper_bucket_values = bucket_values;
132
14
        } else {
133
            // If it can't, then set the lower bound to the midpoint
134
14
            lower_bucket_values = bucket_values;
135
14
        }
136
        // Increment the search step counter
137
26
        ++search_step;
138
26
    }
139
140
4
    return upper_bucket_values;
141
4
}
_ZN5doris27calculate_bucket_max_valuesIsEEmRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEm
Line
Count
Source
95
4
size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) {
96
    // Ensure that the value map is not empty
97
4
    assert(!value_map.empty());
98
99
    // Calculate the total number of values in the map using std::accumulate()
100
4
    size_t total_values = 0;
101
400
    for (const auto& [value, count] : value_map) {
102
400
        total_values += count;
103
400
    }
104
105
    // If there is only one bucket, then all values will be assigned to that bucket
106
4
    if (num_buckets == 1) {
107
0
        return total_values;
108
0
    }
109
110
    // To calculate the maximum value count in each bucket, we first calculate a conservative upper
111
    // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed
112
    // the actual maximum value count, but it does not underestimate it. The subsequent binary search
113
    // algorithm will approach the actual maximum value count.
114
4
    size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1;
115
116
    // Initialize the lower bound to 0
117
4
    size_t lower_bucket_values = 0;
118
119
    // Perform a binary search to find the maximum number of values that can fit into each bucket
120
4
    int search_step = 0;
121
4
    const int max_search_steps =
122
4
            10; // Limit the number of search steps to avoid excessive iteration
123
124
30
    while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) {
125
        // Calculate the midpoint of the upper and lower bounds
126
26
        const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2;
127
128
        // Check if the given number of values can be assigned to the desired number of buckets
129
26
        if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) {
130
            // If it can, then set the upper bound to the midpoint
131
12
            upper_bucket_values = bucket_values;
132
14
        } else {
133
            // If it can't, then set the lower bound to the midpoint
134
14
            lower_bucket_values = bucket_values;
135
14
        }
136
        // Increment the search step counter
137
26
        ++search_step;
138
26
    }
139
140
4
    return upper_bucket_values;
141
4
}
_ZN5doris27calculate_bucket_max_valuesIlEEmRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEm
Line
Count
Source
95
4
size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) {
96
    // Ensure that the value map is not empty
97
4
    assert(!value_map.empty());
98
99
    // Calculate the total number of values in the map using std::accumulate()
100
4
    size_t total_values = 0;
101
400
    for (const auto& [value, count] : value_map) {
102
400
        total_values += count;
103
400
    }
104
105
    // If there is only one bucket, then all values will be assigned to that bucket
106
4
    if (num_buckets == 1) {
107
0
        return total_values;
108
0
    }
109
110
    // To calculate the maximum value count in each bucket, we first calculate a conservative upper
111
    // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed
112
    // the actual maximum value count, but it does not underestimate it. The subsequent binary search
113
    // algorithm will approach the actual maximum value count.
114
4
    size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1;
115
116
    // Initialize the lower bound to 0
117
4
    size_t lower_bucket_values = 0;
118
119
    // Perform a binary search to find the maximum number of values that can fit into each bucket
120
4
    int search_step = 0;
121
4
    const int max_search_steps =
122
4
            10; // Limit the number of search steps to avoid excessive iteration
123
124
30
    while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) {
125
        // Calculate the midpoint of the upper and lower bounds
126
26
        const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2;
127
128
        // Check if the given number of values can be assigned to the desired number of buckets
129
26
        if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) {
130
            // If it can, then set the upper bound to the midpoint
131
12
            upper_bucket_values = bucket_values;
132
14
        } else {
133
            // If it can't, then set the lower bound to the midpoint
134
14
            lower_bucket_values = bucket_values;
135
14
        }
136
        // Increment the search step counter
137
26
        ++search_step;
138
26
    }
139
140
4
    return upper_bucket_values;
141
4
}
_ZN5doris27calculate_bucket_max_valuesInEEmRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEm
Line
Count
Source
95
4
size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) {
96
    // Ensure that the value map is not empty
97
4
    assert(!value_map.empty());
98
99
    // Calculate the total number of values in the map using std::accumulate()
100
4
    size_t total_values = 0;
101
400
    for (const auto& [value, count] : value_map) {
102
400
        total_values += count;
103
400
    }
104
105
    // If there is only one bucket, then all values will be assigned to that bucket
106
4
    if (num_buckets == 1) {
107
0
        return total_values;
108
0
    }
109
110
    // To calculate the maximum value count in each bucket, we first calculate a conservative upper
111
    // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed
112
    // the actual maximum value count, but it does not underestimate it. The subsequent binary search
113
    // algorithm will approach the actual maximum value count.
114
4
    size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1;
115
116
    // Initialize the lower bound to 0
117
4
    size_t lower_bucket_values = 0;
118
119
    // Perform a binary search to find the maximum number of values that can fit into each bucket
120
4
    int search_step = 0;
121
4
    const int max_search_steps =
122
4
            10; // Limit the number of search steps to avoid excessive iteration
123
124
30
    while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) {
125
        // Calculate the midpoint of the upper and lower bounds
126
26
        const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2;
127
128
        // Check if the given number of values can be assigned to the desired number of buckets
129
26
        if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) {
130
            // If it can, then set the upper bound to the midpoint
131
12
            upper_bucket_values = bucket_values;
132
14
        } else {
133
            // If it can't, then set the lower bound to the midpoint
134
14
            lower_bucket_values = bucket_values;
135
14
        }
136
        // Increment the search step counter
137
26
        ++search_step;
138
26
    }
139
140
4
    return upper_bucket_values;
141
4
}
_ZN5doris27calculate_bucket_max_valuesIfEEmRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEm
Line
Count
Source
95
4
size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) {
96
    // Ensure that the value map is not empty
97
4
    assert(!value_map.empty());
98
99
    // Calculate the total number of values in the map using std::accumulate()
100
4
    size_t total_values = 0;
101
400
    for (const auto& [value, count] : value_map) {
102
400
        total_values += count;
103
400
    }
104
105
    // If there is only one bucket, then all values will be assigned to that bucket
106
4
    if (num_buckets == 1) {
107
0
        return total_values;
108
0
    }
109
110
    // To calculate the maximum value count in each bucket, we first calculate a conservative upper
111
    // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed
112
    // the actual maximum value count, but it does not underestimate it. The subsequent binary search
113
    // algorithm will approach the actual maximum value count.
114
4
    size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1;
115
116
    // Initialize the lower bound to 0
117
4
    size_t lower_bucket_values = 0;
118
119
    // Perform a binary search to find the maximum number of values that can fit into each bucket
120
4
    int search_step = 0;
121
4
    const int max_search_steps =
122
4
            10; // Limit the number of search steps to avoid excessive iteration
123
124
30
    while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) {
125
        // Calculate the midpoint of the upper and lower bounds
126
26
        const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2;
127
128
        // Check if the given number of values can be assigned to the desired number of buckets
129
26
        if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) {
130
            // If it can, then set the upper bound to the midpoint
131
12
            upper_bucket_values = bucket_values;
132
14
        } else {
133
            // If it can't, then set the lower bound to the midpoint
134
14
            lower_bucket_values = bucket_values;
135
14
        }
136
        // Increment the search step counter
137
26
        ++search_step;
138
26
    }
139
140
4
    return upper_bucket_values;
141
4
}
_ZN5doris27calculate_bucket_max_valuesIdEEmRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEm
Line
Count
Source
95
4
size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) {
96
    // Ensure that the value map is not empty
97
4
    assert(!value_map.empty());
98
99
    // Calculate the total number of values in the map using std::accumulate()
100
4
    size_t total_values = 0;
101
400
    for (const auto& [value, count] : value_map) {
102
400
        total_values += count;
103
400
    }
104
105
    // If there is only one bucket, then all values will be assigned to that bucket
106
4
    if (num_buckets == 1) {
107
0
        return total_values;
108
0
    }
109
110
    // To calculate the maximum value count in each bucket, we first calculate a conservative upper
111
    // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed
112
    // the actual maximum value count, but it does not underestimate it. The subsequent binary search
113
    // algorithm will approach the actual maximum value count.
114
4
    size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1;
115
116
    // Initialize the lower bound to 0
117
4
    size_t lower_bucket_values = 0;
118
119
    // Perform a binary search to find the maximum number of values that can fit into each bucket
120
4
    int search_step = 0;
121
4
    const int max_search_steps =
122
4
            10; // Limit the number of search steps to avoid excessive iteration
123
124
30
    while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) {
125
        // Calculate the midpoint of the upper and lower bounds
126
26
        const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2;
127
128
        // Check if the given number of values can be assigned to the desired number of buckets
129
26
        if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) {
130
            // If it can, then set the upper bound to the midpoint
131
12
            upper_bucket_values = bucket_values;
132
14
        } else {
133
            // If it can't, then set the lower bound to the midpoint
134
14
            lower_bucket_values = bucket_values;
135
14
        }
136
        // Increment the search step counter
137
26
        ++search_step;
138
26
    }
139
140
4
    return upper_bucket_values;
141
4
}
Unexecuted instantiation: _ZN5doris27calculate_bucket_max_valuesINS_7DecimalIiEEEEmRKSt3mapIT_mSt4lessIS4_ESaISt4pairIKS4_mEEEm
Unexecuted instantiation: _ZN5doris27calculate_bucket_max_valuesINS_7DecimalIlEEEEmRKSt3mapIT_mSt4lessIS4_ESaISt4pairIKS4_mEEEm
Unexecuted instantiation: _ZN5doris27calculate_bucket_max_valuesINS_12Decimal128V3EEEmRKSt3mapIT_mSt4lessIS3_ESaISt4pairIKS3_mEEEm
Unexecuted instantiation: _ZN5doris27calculate_bucket_max_valuesINS_7DecimalIN4wide7integerILm256EiEEEEEEmRKSt3mapIT_mSt4lessIS7_ESaISt4pairIKS7_mEEEm
_ZN5doris27calculate_bucket_max_valuesINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEEmRKSt3mapIT_mSt4lessIS8_ESaISt4pairIKS8_mEEEm
Line
Count
Source
95
4
size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) {
96
    // Ensure that the value map is not empty
97
4
    assert(!value_map.empty());
98
99
    // Calculate the total number of values in the map using std::accumulate()
100
4
    size_t total_values = 0;
101
4.00k
    for (const auto& [value, count] : value_map) {
102
4.00k
        total_values += count;
103
4.00k
    }
104
105
    // If there is only one bucket, then all values will be assigned to that bucket
106
4
    if (num_buckets == 1) {
107
0
        return total_values;
108
0
    }
109
110
    // To calculate the maximum value count in each bucket, we first calculate a conservative upper
111
    // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed
112
    // the actual maximum value count, but it does not underestimate it. The subsequent binary search
113
    // algorithm will approach the actual maximum value count.
114
4
    size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1;
115
116
    // Initialize the lower bound to 0
117
4
    size_t lower_bucket_values = 0;
118
119
    // Perform a binary search to find the maximum number of values that can fit into each bucket
120
4
    int search_step = 0;
121
4
    const int max_search_steps =
122
4
            10; // Limit the number of search steps to avoid excessive iteration
123
124
42
    while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) {
125
        // Calculate the midpoint of the upper and lower bounds
126
38
        const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2;
127
128
        // Check if the given number of values can be assigned to the desired number of buckets
129
38
        if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) {
130
            // If it can, then set the upper bound to the midpoint
131
18
            upper_bucket_values = bucket_values;
132
20
        } else {
133
            // If it can't, then set the lower bound to the midpoint
134
20
            lower_bucket_values = bucket_values;
135
20
        }
136
        // Increment the search step counter
137
38
        ++search_step;
138
38
    }
139
140
4
    return upper_bucket_values;
141
4
}
_ZN5doris27calculate_bucket_max_valuesINS_11DateV2ValueINS_15DateV2ValueTypeEEEEEmRKSt3mapIT_mSt4lessIS5_ESaISt4pairIKS5_mEEEm
Line
Count
Source
95
4
size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) {
96
    // Ensure that the value map is not empty
97
4
    assert(!value_map.empty());
98
99
    // Calculate the total number of values in the map using std::accumulate()
100
4
    size_t total_values = 0;
101
400
    for (const auto& [value, count] : value_map) {
102
400
        total_values += count;
103
400
    }
104
105
    // If there is only one bucket, then all values will be assigned to that bucket
106
4
    if (num_buckets == 1) {
107
0
        return total_values;
108
0
    }
109
110
    // To calculate the maximum value count in each bucket, we first calculate a conservative upper
111
    // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed
112
    // the actual maximum value count, but it does not underestimate it. The subsequent binary search
113
    // algorithm will approach the actual maximum value count.
114
4
    size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1;
115
116
    // Initialize the lower bound to 0
117
4
    size_t lower_bucket_values = 0;
118
119
    // Perform a binary search to find the maximum number of values that can fit into each bucket
120
4
    int search_step = 0;
121
4
    const int max_search_steps =
122
4
            10; // Limit the number of search steps to avoid excessive iteration
123
124
30
    while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) {
125
        // Calculate the midpoint of the upper and lower bounds
126
26
        const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2;
127
128
        // Check if the given number of values can be assigned to the desired number of buckets
129
26
        if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) {
130
            // If it can, then set the upper bound to the midpoint
131
12
            upper_bucket_values = bucket_values;
132
14
        } else {
133
            // If it can't, then set the lower bound to the midpoint
134
14
            lower_bucket_values = bucket_values;
135
14
        }
136
        // Increment the search step counter
137
26
        ++search_step;
138
26
    }
139
140
4
    return upper_bucket_values;
141
4
}
_ZN5doris27calculate_bucket_max_valuesINS_11DateV2ValueINS_19DateTimeV2ValueTypeEEEEEmRKSt3mapIT_mSt4lessIS5_ESaISt4pairIKS5_mEEEm
Line
Count
Source
95
4
size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) {
96
    // Ensure that the value map is not empty
97
4
    assert(!value_map.empty());
98
99
    // Calculate the total number of values in the map using std::accumulate()
100
4
    size_t total_values = 0;
101
400
    for (const auto& [value, count] : value_map) {
102
400
        total_values += count;
103
400
    }
104
105
    // If there is only one bucket, then all values will be assigned to that bucket
106
4
    if (num_buckets == 1) {
107
0
        return total_values;
108
0
    }
109
110
    // To calculate the maximum value count in each bucket, we first calculate a conservative upper
111
    // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed
112
    // the actual maximum value count, but it does not underestimate it. The subsequent binary search
113
    // algorithm will approach the actual maximum value count.
114
4
    size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1;
115
116
    // Initialize the lower bound to 0
117
4
    size_t lower_bucket_values = 0;
118
119
    // Perform a binary search to find the maximum number of values that can fit into each bucket
120
4
    int search_step = 0;
121
4
    const int max_search_steps =
122
4
            10; // Limit the number of search steps to avoid excessive iteration
123
124
30
    while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) {
125
        // Calculate the midpoint of the upper and lower bounds
126
26
        const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2;
127
128
        // Check if the given number of values can be assigned to the desired number of buckets
129
26
        if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) {
130
            // If it can, then set the upper bound to the midpoint
131
12
            upper_bucket_values = bucket_values;
132
14
        } else {
133
            // If it can't, then set the lower bound to the midpoint
134
14
            lower_bucket_values = bucket_values;
135
14
        }
136
        // Increment the search step counter
137
26
        ++search_step;
138
26
    }
139
140
4
    return upper_bucket_values;
141
4
}
142
143
/**
144
 * Greedy equi-height histogram construction algorithm, inspired by the MySQL
145
 * equi_height implementation(https://dev.mysql.com/doc/dev/mysql-server/latest/equi__height_8h.html).
146
 *
147
 * Given an ordered collection of [value, count] pairs and a maximum bucket
148
 * size, construct a histogram by inserting values into a bucket while keeping
149
 * track of its size. If the insertion of a value into a non-empty bucket
150
 * causes the bucket to exceed the maximum size, create a new empty bucket and
151
 * continue.
152
 *
153
 * The algorithm guarantees a selectivity estimation error of at most ~2 *
154
 * #values / #buckets, often less. Values with a higher relative frequency are
155
 * guaranteed to be placed in singleton buckets.
156
 *
157
 * The minimum composite bucket size is used to minimize the worst case
158
 * selectivity estimation error. In general, the algorithm will adapt to the
159
 * data distribution to minimize the size of composite buckets. The heavy values
160
 * can be placed in singleton buckets and the remaining values will be evenly
161
 * spread across the remaining buckets, leading to a lower composite bucket size.
162
 *
163
 * Note: The term "value" refers to an entry in a column and the actual value
164
 * of an entry. The ordered_map is an ordered collection of [distinct value,
165
 * value count] pairs. For example, a Value_map<String> could contain the pairs ["a", 1], ["b", 2]
166
 * to represent one "a" value and two "b" values.
167
 *
168
 * @param buckets A vector of empty buckets that will be populated with data.
169
 * @param ordered_map An ordered map of distinct values and their counts.
170
 * @param max_num_buckets The maximum number of buckets that can be used.
171
 *
172
 * @return True if the buckets were successfully built, false otherwise.
173
 */
174
template <typename T>
175
bool build_histogram(std::vector<Bucket<T>>& buckets, const std::map<T, size_t>& ordered_map,
176
86
                     const size_t max_num_buckets) {
177
    // If the input map is empty, there is nothing to build.
178
86
    if (ordered_map.empty()) {
179
34
        return false;
180
34
    }
181
182
    // Calculate the maximum number of values that can be assigned to each bucket.
183
52
    auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets);
184
185
    // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional
186
    // allocations when inserting buckets.
187
52
    buckets.clear();
188
52
    buckets.reserve(max_num_buckets);
189
190
    // Initialize bucket variables.
191
52
    size_t distinct_values_count = 0;
192
52
    size_t values_count = 0;
193
52
    size_t cumulative_values = 0;
194
195
    // Record how many values still need to be assigned.
196
52
    auto remaining_distinct_values = ordered_map.size();
197
198
52
    auto it = ordered_map.begin();
199
200
    // Lower value of the current bucket.
201
52
    const T* lower_value = &it->first;
202
203
    // Iterate over the ordered map of distinct values and their counts.
204
7.69k
    for (; it != ordered_map.end(); ++it) {
205
7.64k
        const auto count = it->second;
206
7.64k
        const auto current_value = it->first;
207
208
        // Update the bucket counts and track the number of distinct values assigned.
209
7.64k
        distinct_values_count++;
210
7.64k
        remaining_distinct_values--;
211
7.64k
        values_count += count;
212
7.64k
        cumulative_values += count;
213
214
        // Check whether the current value should be added to the current bucket.
215
7.64k
        auto next = std::next(it);
216
7.64k
        size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1;
217
218
7.64k
        if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets &&
219
7.64k
            values_count + next->second <= bucket_max_values) {
220
            // If the current value is the last in the input map and there are more remaining
221
            // distinct values than empty buckets and adding the value does not cause the bucket
222
            // to exceed its max size, skip adding the value to the current bucket.
223
7.41k
            continue;
224
7.41k
        }
225
226
        // Finalize the current bucket and add it to our collection of buckets.
227
228
        auto pre_sum = cumulative_values - values_count;
228
229
228
        Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count,
230
228
                             pre_sum);
231
228
        buckets.push_back(new_bucket);
232
233
        // Reset variables for the next bucket.
234
228
        if (next != ordered_map.end()) {
235
176
            lower_value = &next->first;
236
176
        }
237
228
        values_count = 0;
238
228
        distinct_values_count = 0;
239
228
    }
240
241
52
    return true;
242
86
}
_ZN5doris15build_histogramIiEEbRSt6vectorINS_6BucketIT_EESaIS4_EERKSt3mapIS3_mSt4lessIS3_ESaISt4pairIKS3_mEEEm
Line
Count
Source
176
22
                     const size_t max_num_buckets) {
177
    // If the input map is empty, there is nothing to build.
178
22
    if (ordered_map.empty()) {
179
6
        return false;
180
6
    }
181
182
    // Calculate the maximum number of values that can be assigned to each bucket.
183
16
    auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets);
184
185
    // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional
186
    // allocations when inserting buckets.
187
16
    buckets.clear();
188
16
    buckets.reserve(max_num_buckets);
189
190
    // Initialize bucket variables.
191
16
    size_t distinct_values_count = 0;
192
16
    size_t values_count = 0;
193
16
    size_t cumulative_values = 0;
194
195
    // Record how many values still need to be assigned.
196
16
    auto remaining_distinct_values = ordered_map.size();
197
198
16
    auto it = ordered_map.begin();
199
200
    // Lower value of the current bucket.
201
16
    const T* lower_value = &it->first;
202
203
    // Iterate over the ordered map of distinct values and their counts.
204
458
    for (; it != ordered_map.end(); ++it) {
205
442
        const auto count = it->second;
206
442
        const auto current_value = it->first;
207
208
        // Update the bucket counts and track the number of distinct values assigned.
209
442
        distinct_values_count++;
210
442
        remaining_distinct_values--;
211
442
        values_count += count;
212
442
        cumulative_values += count;
213
214
        // Check whether the current value should be added to the current bucket.
215
442
        auto next = std::next(it);
216
442
        size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1;
217
218
442
        if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets &&
219
442
            values_count + next->second <= bucket_max_values) {
220
            // If the current value is the last in the input map and there are more remaining
221
            // distinct values than empty buckets and adding the value does not cause the bucket
222
            // to exceed its max size, skip adding the value to the current bucket.
223
394
            continue;
224
394
        }
225
226
        // Finalize the current bucket and add it to our collection of buckets.
227
48
        auto pre_sum = cumulative_values - values_count;
228
229
48
        Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count,
230
48
                             pre_sum);
231
48
        buckets.push_back(new_bucket);
232
233
        // Reset variables for the next bucket.
234
48
        if (next != ordered_map.end()) {
235
32
            lower_value = &next->first;
236
32
        }
237
48
        values_count = 0;
238
48
        distinct_values_count = 0;
239
48
    }
240
241
16
    return true;
242
22
}
Unexecuted instantiation: _ZN5doris15build_histogramIhEEbRSt6vectorINS_6BucketIT_EESaIS4_EERKSt3mapIS3_mSt4lessIS3_ESaISt4pairIKS3_mEEEm
_ZN5doris15build_histogramIaEEbRSt6vectorINS_6BucketIT_EESaIS4_EERKSt3mapIS3_mSt4lessIS3_ESaISt4pairIKS3_mEEEm
Line
Count
Source
176
8
                     const size_t max_num_buckets) {
177
    // If the input map is empty, there is nothing to build.
178
8
    if (ordered_map.empty()) {
179
4
        return false;
180
4
    }
181
182
    // Calculate the maximum number of values that can be assigned to each bucket.
183
4
    auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets);
184
185
    // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional
186
    // allocations when inserting buckets.
187
4
    buckets.clear();
188
4
    buckets.reserve(max_num_buckets);
189
190
    // Initialize bucket variables.
191
4
    size_t distinct_values_count = 0;
192
4
    size_t values_count = 0;
193
4
    size_t cumulative_values = 0;
194
195
    // Record how many values still need to be assigned.
196
4
    auto remaining_distinct_values = ordered_map.size();
197
198
4
    auto it = ordered_map.begin();
199
200
    // Lower value of the current bucket.
201
4
    const T* lower_value = &it->first;
202
203
    // Iterate over the ordered map of distinct values and their counts.
204
404
    for (; it != ordered_map.end(); ++it) {
205
400
        const auto count = it->second;
206
400
        const auto current_value = it->first;
207
208
        // Update the bucket counts and track the number of distinct values assigned.
209
400
        distinct_values_count++;
210
400
        remaining_distinct_values--;
211
400
        values_count += count;
212
400
        cumulative_values += count;
213
214
        // Check whether the current value should be added to the current bucket.
215
400
        auto next = std::next(it);
216
400
        size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1;
217
218
400
        if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets &&
219
400
            values_count + next->second <= bucket_max_values) {
220
            // If the current value is the last in the input map and there are more remaining
221
            // distinct values than empty buckets and adding the value does not cause the bucket
222
            // to exceed its max size, skip adding the value to the current bucket.
223
380
            continue;
224
380
        }
225
226
        // Finalize the current bucket and add it to our collection of buckets.
227
20
        auto pre_sum = cumulative_values - values_count;
228
229
20
        Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count,
230
20
                             pre_sum);
231
20
        buckets.push_back(new_bucket);
232
233
        // Reset variables for the next bucket.
234
20
        if (next != ordered_map.end()) {
235
16
            lower_value = &next->first;
236
16
        }
237
20
        values_count = 0;
238
20
        distinct_values_count = 0;
239
20
    }
240
241
4
    return true;
242
8
}
_ZN5doris15build_histogramIsEEbRSt6vectorINS_6BucketIT_EESaIS4_EERKSt3mapIS3_mSt4lessIS3_ESaISt4pairIKS3_mEEEm
Line
Count
Source
176
8
                     const size_t max_num_buckets) {
177
    // If the input map is empty, there is nothing to build.
178
8
    if (ordered_map.empty()) {
179
4
        return false;
180
4
    }
181
182
    // Calculate the maximum number of values that can be assigned to each bucket.
183
4
    auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets);
184
185
    // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional
186
    // allocations when inserting buckets.
187
4
    buckets.clear();
188
4
    buckets.reserve(max_num_buckets);
189
190
    // Initialize bucket variables.
191
4
    size_t distinct_values_count = 0;
192
4
    size_t values_count = 0;
193
4
    size_t cumulative_values = 0;
194
195
    // Record how many values still need to be assigned.
196
4
    auto remaining_distinct_values = ordered_map.size();
197
198
4
    auto it = ordered_map.begin();
199
200
    // Lower value of the current bucket.
201
4
    const T* lower_value = &it->first;
202
203
    // Iterate over the ordered map of distinct values and their counts.
204
404
    for (; it != ordered_map.end(); ++it) {
205
400
        const auto count = it->second;
206
400
        const auto current_value = it->first;
207
208
        // Update the bucket counts and track the number of distinct values assigned.
209
400
        distinct_values_count++;
210
400
        remaining_distinct_values--;
211
400
        values_count += count;
212
400
        cumulative_values += count;
213
214
        // Check whether the current value should be added to the current bucket.
215
400
        auto next = std::next(it);
216
400
        size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1;
217
218
400
        if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets &&
219
400
            values_count + next->second <= bucket_max_values) {
220
            // If the current value is the last in the input map and there are more remaining
221
            // distinct values than empty buckets and adding the value does not cause the bucket
222
            // to exceed its max size, skip adding the value to the current bucket.
223
380
            continue;
224
380
        }
225
226
        // Finalize the current bucket and add it to our collection of buckets.
227
20
        auto pre_sum = cumulative_values - values_count;
228
229
20
        Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count,
230
20
                             pre_sum);
231
20
        buckets.push_back(new_bucket);
232
233
        // Reset variables for the next bucket.
234
20
        if (next != ordered_map.end()) {
235
16
            lower_value = &next->first;
236
16
        }
237
20
        values_count = 0;
238
20
        distinct_values_count = 0;
239
20
    }
240
241
4
    return true;
242
8
}
_ZN5doris15build_histogramIlEEbRSt6vectorINS_6BucketIT_EESaIS4_EERKSt3mapIS3_mSt4lessIS3_ESaISt4pairIKS3_mEEEm
Line
Count
Source
176
8
                     const size_t max_num_buckets) {
177
    // If the input map is empty, there is nothing to build.
178
8
    if (ordered_map.empty()) {
179
4
        return false;
180
4
    }
181
182
    // Calculate the maximum number of values that can be assigned to each bucket.
183
4
    auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets);
184
185
    // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional
186
    // allocations when inserting buckets.
187
4
    buckets.clear();
188
4
    buckets.reserve(max_num_buckets);
189
190
    // Initialize bucket variables.
191
4
    size_t distinct_values_count = 0;
192
4
    size_t values_count = 0;
193
4
    size_t cumulative_values = 0;
194
195
    // Record how many values still need to be assigned.
196
4
    auto remaining_distinct_values = ordered_map.size();
197
198
4
    auto it = ordered_map.begin();
199
200
    // Lower value of the current bucket.
201
4
    const T* lower_value = &it->first;
202
203
    // Iterate over the ordered map of distinct values and their counts.
204
404
    for (; it != ordered_map.end(); ++it) {
205
400
        const auto count = it->second;
206
400
        const auto current_value = it->first;
207
208
        // Update the bucket counts and track the number of distinct values assigned.
209
400
        distinct_values_count++;
210
400
        remaining_distinct_values--;
211
400
        values_count += count;
212
400
        cumulative_values += count;
213
214
        // Check whether the current value should be added to the current bucket.
215
400
        auto next = std::next(it);
216
400
        size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1;
217
218
400
        if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets &&
219
400
            values_count + next->second <= bucket_max_values) {
220
            // If the current value is the last in the input map and there are more remaining
221
            // distinct values than empty buckets and adding the value does not cause the bucket
222
            // to exceed its max size, skip adding the value to the current bucket.
223
380
            continue;
224
380
        }
225
226
        // Finalize the current bucket and add it to our collection of buckets.
227
20
        auto pre_sum = cumulative_values - values_count;
228
229
20
        Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count,
230
20
                             pre_sum);
231
20
        buckets.push_back(new_bucket);
232
233
        // Reset variables for the next bucket.
234
20
        if (next != ordered_map.end()) {
235
16
            lower_value = &next->first;
236
16
        }
237
20
        values_count = 0;
238
20
        distinct_values_count = 0;
239
20
    }
240
241
4
    return true;
242
8
}
_ZN5doris15build_histogramInEEbRSt6vectorINS_6BucketIT_EESaIS4_EERKSt3mapIS3_mSt4lessIS3_ESaISt4pairIKS3_mEEEm
Line
Count
Source
176
8
                     const size_t max_num_buckets) {
177
    // If the input map is empty, there is nothing to build.
178
8
    if (ordered_map.empty()) {
179
4
        return false;
180
4
    }
181
182
    // Calculate the maximum number of values that can be assigned to each bucket.
183
4
    auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets);
184
185
    // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional
186
    // allocations when inserting buckets.
187
4
    buckets.clear();
188
4
    buckets.reserve(max_num_buckets);
189
190
    // Initialize bucket variables.
191
4
    size_t distinct_values_count = 0;
192
4
    size_t values_count = 0;
193
4
    size_t cumulative_values = 0;
194
195
    // Record how many values still need to be assigned.
196
4
    auto remaining_distinct_values = ordered_map.size();
197
198
4
    auto it = ordered_map.begin();
199
200
    // Lower value of the current bucket.
201
4
    const T* lower_value = &it->first;
202
203
    // Iterate over the ordered map of distinct values and their counts.
204
404
    for (; it != ordered_map.end(); ++it) {
205
400
        const auto count = it->second;
206
400
        const auto current_value = it->first;
207
208
        // Update the bucket counts and track the number of distinct values assigned.
209
400
        distinct_values_count++;
210
400
        remaining_distinct_values--;
211
400
        values_count += count;
212
400
        cumulative_values += count;
213
214
        // Check whether the current value should be added to the current bucket.
215
400
        auto next = std::next(it);
216
400
        size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1;
217
218
400
        if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets &&
219
400
            values_count + next->second <= bucket_max_values) {
220
            // If the current value is the last in the input map and there are more remaining
221
            // distinct values than empty buckets and adding the value does not cause the bucket
222
            // to exceed its max size, skip adding the value to the current bucket.
223
380
            continue;
224
380
        }
225
226
        // Finalize the current bucket and add it to our collection of buckets.
227
20
        auto pre_sum = cumulative_values - values_count;
228
229
20
        Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count,
230
20
                             pre_sum);
231
20
        buckets.push_back(new_bucket);
232
233
        // Reset variables for the next bucket.
234
20
        if (next != ordered_map.end()) {
235
16
            lower_value = &next->first;
236
16
        }
237
20
        values_count = 0;
238
20
        distinct_values_count = 0;
239
20
    }
240
241
4
    return true;
242
8
}
_ZN5doris15build_histogramIfEEbRSt6vectorINS_6BucketIT_EESaIS4_EERKSt3mapIS3_mSt4lessIS3_ESaISt4pairIKS3_mEEEm
Line
Count
Source
176
8
                     const size_t max_num_buckets) {
177
    // If the input map is empty, there is nothing to build.
178
8
    if (ordered_map.empty()) {
179
4
        return false;
180
4
    }
181
182
    // Calculate the maximum number of values that can be assigned to each bucket.
183
4
    auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets);
184
185
    // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional
186
    // allocations when inserting buckets.
187
4
    buckets.clear();
188
4
    buckets.reserve(max_num_buckets);
189
190
    // Initialize bucket variables.
191
4
    size_t distinct_values_count = 0;
192
4
    size_t values_count = 0;
193
4
    size_t cumulative_values = 0;
194
195
    // Record how many values still need to be assigned.
196
4
    auto remaining_distinct_values = ordered_map.size();
197
198
4
    auto it = ordered_map.begin();
199
200
    // Lower value of the current bucket.
201
4
    const T* lower_value = &it->first;
202
203
    // Iterate over the ordered map of distinct values and their counts.
204
404
    for (; it != ordered_map.end(); ++it) {
205
400
        const auto count = it->second;
206
400
        const auto current_value = it->first;
207
208
        // Update the bucket counts and track the number of distinct values assigned.
209
400
        distinct_values_count++;
210
400
        remaining_distinct_values--;
211
400
        values_count += count;
212
400
        cumulative_values += count;
213
214
        // Check whether the current value should be added to the current bucket.
215
400
        auto next = std::next(it);
216
400
        size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1;
217
218
400
        if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets &&
219
400
            values_count + next->second <= bucket_max_values) {
220
            // If the current value is the last in the input map and there are more remaining
221
            // distinct values than empty buckets and adding the value does not cause the bucket
222
            // to exceed its max size, skip adding the value to the current bucket.
223
380
            continue;
224
380
        }
225
226
        // Finalize the current bucket and add it to our collection of buckets.
227
20
        auto pre_sum = cumulative_values - values_count;
228
229
20
        Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count,
230
20
                             pre_sum);
231
20
        buckets.push_back(new_bucket);
232
233
        // Reset variables for the next bucket.
234
20
        if (next != ordered_map.end()) {
235
16
            lower_value = &next->first;
236
16
        }
237
20
        values_count = 0;
238
20
        distinct_values_count = 0;
239
20
    }
240
241
4
    return true;
242
8
}
_ZN5doris15build_histogramIdEEbRSt6vectorINS_6BucketIT_EESaIS4_EERKSt3mapIS3_mSt4lessIS3_ESaISt4pairIKS3_mEEEm
Line
Count
Source
176
8
                     const size_t max_num_buckets) {
177
    // If the input map is empty, there is nothing to build.
178
8
    if (ordered_map.empty()) {
179
4
        return false;
180
4
    }
181
182
    // Calculate the maximum number of values that can be assigned to each bucket.
183
4
    auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets);
184
185
    // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional
186
    // allocations when inserting buckets.
187
4
    buckets.clear();
188
4
    buckets.reserve(max_num_buckets);
189
190
    // Initialize bucket variables.
191
4
    size_t distinct_values_count = 0;
192
4
    size_t values_count = 0;
193
4
    size_t cumulative_values = 0;
194
195
    // Record how many values still need to be assigned.
196
4
    auto remaining_distinct_values = ordered_map.size();
197
198
4
    auto it = ordered_map.begin();
199
200
    // Lower value of the current bucket.
201
4
    const T* lower_value = &it->first;
202
203
    // Iterate over the ordered map of distinct values and their counts.
204
404
    for (; it != ordered_map.end(); ++it) {
205
400
        const auto count = it->second;
206
400
        const auto current_value = it->first;
207
208
        // Update the bucket counts and track the number of distinct values assigned.
209
400
        distinct_values_count++;
210
400
        remaining_distinct_values--;
211
400
        values_count += count;
212
400
        cumulative_values += count;
213
214
        // Check whether the current value should be added to the current bucket.
215
400
        auto next = std::next(it);
216
400
        size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1;
217
218
400
        if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets &&
219
400
            values_count + next->second <= bucket_max_values) {
220
            // If the current value is the last in the input map and there are more remaining
221
            // distinct values than empty buckets and adding the value does not cause the bucket
222
            // to exceed its max size, skip adding the value to the current bucket.
223
380
            continue;
224
380
        }
225
226
        // Finalize the current bucket and add it to our collection of buckets.
227
20
        auto pre_sum = cumulative_values - values_count;
228
229
20
        Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count,
230
20
                             pre_sum);
231
20
        buckets.push_back(new_bucket);
232
233
        // Reset variables for the next bucket.
234
20
        if (next != ordered_map.end()) {
235
16
            lower_value = &next->first;
236
16
        }
237
20
        values_count = 0;
238
20
        distinct_values_count = 0;
239
20
    }
240
241
4
    return true;
242
8
}
Unexecuted instantiation: _ZN5doris15build_histogramINS_7DecimalIiEEEEbRSt6vectorINS_6BucketIT_EESaIS6_EERKSt3mapIS5_mSt4lessIS5_ESaISt4pairIKS5_mEEEm
Unexecuted instantiation: _ZN5doris15build_histogramINS_7DecimalIlEEEEbRSt6vectorINS_6BucketIT_EESaIS6_EERKSt3mapIS5_mSt4lessIS5_ESaISt4pairIKS5_mEEEm
Unexecuted instantiation: _ZN5doris15build_histogramINS_12Decimal128V3EEEbRSt6vectorINS_6BucketIT_EESaIS5_EERKSt3mapIS4_mSt4lessIS4_ESaISt4pairIKS4_mEEEm
Unexecuted instantiation: _ZN5doris15build_histogramINS_7DecimalIN4wide7integerILm256EiEEEEEEbRSt6vectorINS_6BucketIT_EESaIS9_EERKSt3mapIS8_mSt4lessIS8_ESaISt4pairIKS8_mEEEm
_ZN5doris15build_histogramINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEEbRSt6vectorINS_6BucketIT_EESaISA_EERKSt3mapIS9_mSt4lessIS9_ESaISt4pairIKS9_mEEEm
Line
Count
Source
176
8
                     const size_t max_num_buckets) {
177
    // If the input map is empty, there is nothing to build.
178
8
    if (ordered_map.empty()) {
179
4
        return false;
180
4
    }
181
182
    // Calculate the maximum number of values that can be assigned to each bucket.
183
4
    auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets);
184
185
    // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional
186
    // allocations when inserting buckets.
187
4
    buckets.clear();
188
4
    buckets.reserve(max_num_buckets);
189
190
    // Initialize bucket variables.
191
4
    size_t distinct_values_count = 0;
192
4
    size_t values_count = 0;
193
4
    size_t cumulative_values = 0;
194
195
    // Record how many values still need to be assigned.
196
4
    auto remaining_distinct_values = ordered_map.size();
197
198
4
    auto it = ordered_map.begin();
199
200
    // Lower value of the current bucket.
201
4
    const T* lower_value = &it->first;
202
203
    // Iterate over the ordered map of distinct values and their counts.
204
4.00k
    for (; it != ordered_map.end(); ++it) {
205
4.00k
        const auto count = it->second;
206
4.00k
        const auto current_value = it->first;
207
208
        // Update the bucket counts and track the number of distinct values assigned.
209
4.00k
        distinct_values_count++;
210
4.00k
        remaining_distinct_values--;
211
4.00k
        values_count += count;
212
4.00k
        cumulative_values += count;
213
214
        // Check whether the current value should be added to the current bucket.
215
4.00k
        auto next = std::next(it);
216
4.00k
        size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1;
217
218
4.00k
        if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets &&
219
4.00k
            values_count + next->second <= bucket_max_values) {
220
            // If the current value is the last in the input map and there are more remaining
221
            // distinct values than empty buckets and adding the value does not cause the bucket
222
            // to exceed its max size, skip adding the value to the current bucket.
223
3.98k
            continue;
224
3.98k
        }
225
226
        // Finalize the current bucket and add it to our collection of buckets.
227
20
        auto pre_sum = cumulative_values - values_count;
228
229
20
        Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count,
230
20
                             pre_sum);
231
20
        buckets.push_back(new_bucket);
232
233
        // Reset variables for the next bucket.
234
20
        if (next != ordered_map.end()) {
235
16
            lower_value = &next->first;
236
16
        }
237
20
        values_count = 0;
238
20
        distinct_values_count = 0;
239
20
    }
240
241
4
    return true;
242
8
}
_ZN5doris15build_histogramINS_11DateV2ValueINS_15DateV2ValueTypeEEEEEbRSt6vectorINS_6BucketIT_EESaIS7_EERKSt3mapIS6_mSt4lessIS6_ESaISt4pairIKS6_mEEEm
Line
Count
Source
176
4
                     const size_t max_num_buckets) {
177
    // If the input map is empty, there is nothing to build.
178
4
    if (ordered_map.empty()) {
179
0
        return false;
180
0
    }
181
182
    // Calculate the maximum number of values that can be assigned to each bucket.
183
4
    auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets);
184
185
    // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional
186
    // allocations when inserting buckets.
187
4
    buckets.clear();
188
4
    buckets.reserve(max_num_buckets);
189
190
    // Initialize bucket variables.
191
4
    size_t distinct_values_count = 0;
192
4
    size_t values_count = 0;
193
4
    size_t cumulative_values = 0;
194
195
    // Record how many values still need to be assigned.
196
4
    auto remaining_distinct_values = ordered_map.size();
197
198
4
    auto it = ordered_map.begin();
199
200
    // Lower value of the current bucket.
201
4
    const T* lower_value = &it->first;
202
203
    // Iterate over the ordered map of distinct values and their counts.
204
404
    for (; it != ordered_map.end(); ++it) {
205
400
        const auto count = it->second;
206
400
        const auto current_value = it->first;
207
208
        // Update the bucket counts and track the number of distinct values assigned.
209
400
        distinct_values_count++;
210
400
        remaining_distinct_values--;
211
400
        values_count += count;
212
400
        cumulative_values += count;
213
214
        // Check whether the current value should be added to the current bucket.
215
400
        auto next = std::next(it);
216
400
        size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1;
217
218
400
        if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets &&
219
400
            values_count + next->second <= bucket_max_values) {
220
            // If the current value is the last in the input map and there are more remaining
221
            // distinct values than empty buckets and adding the value does not cause the bucket
222
            // to exceed its max size, skip adding the value to the current bucket.
223
380
            continue;
224
380
        }
225
226
        // Finalize the current bucket and add it to our collection of buckets.
227
20
        auto pre_sum = cumulative_values - values_count;
228
229
20
        Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count,
230
20
                             pre_sum);
231
20
        buckets.push_back(new_bucket);
232
233
        // Reset variables for the next bucket.
234
20
        if (next != ordered_map.end()) {
235
16
            lower_value = &next->first;
236
16
        }
237
20
        values_count = 0;
238
20
        distinct_values_count = 0;
239
20
    }
240
241
4
    return true;
242
4
}
_ZN5doris15build_histogramINS_11DateV2ValueINS_19DateTimeV2ValueTypeEEEEEbRSt6vectorINS_6BucketIT_EESaIS7_EERKSt3mapIS6_mSt4lessIS6_ESaISt4pairIKS6_mEEEm
Line
Count
Source
176
4
                     const size_t max_num_buckets) {
177
    // If the input map is empty, there is nothing to build.
178
4
    if (ordered_map.empty()) {
179
0
        return false;
180
0
    }
181
182
    // Calculate the maximum number of values that can be assigned to each bucket.
183
4
    auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets);
184
185
    // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional
186
    // allocations when inserting buckets.
187
4
    buckets.clear();
188
4
    buckets.reserve(max_num_buckets);
189
190
    // Initialize bucket variables.
191
4
    size_t distinct_values_count = 0;
192
4
    size_t values_count = 0;
193
4
    size_t cumulative_values = 0;
194
195
    // Record how many values still need to be assigned.
196
4
    auto remaining_distinct_values = ordered_map.size();
197
198
4
    auto it = ordered_map.begin();
199
200
    // Lower value of the current bucket.
201
4
    const T* lower_value = &it->first;
202
203
    // Iterate over the ordered map of distinct values and their counts.
204
404
    for (; it != ordered_map.end(); ++it) {
205
400
        const auto count = it->second;
206
400
        const auto current_value = it->first;
207
208
        // Update the bucket counts and track the number of distinct values assigned.
209
400
        distinct_values_count++;
210
400
        remaining_distinct_values--;
211
400
        values_count += count;
212
400
        cumulative_values += count;
213
214
        // Check whether the current value should be added to the current bucket.
215
400
        auto next = std::next(it);
216
400
        size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1;
217
218
400
        if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets &&
219
400
            values_count + next->second <= bucket_max_values) {
220
            // If the current value is the last in the input map and there are more remaining
221
            // distinct values than empty buckets and adding the value does not cause the bucket
222
            // to exceed its max size, skip adding the value to the current bucket.
223
380
            continue;
224
380
        }
225
226
        // Finalize the current bucket and add it to our collection of buckets.
227
20
        auto pre_sum = cumulative_values - values_count;
228
229
20
        Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count,
230
20
                             pre_sum);
231
20
        buckets.push_back(new_bucket);
232
233
        // Reset variables for the next bucket.
234
20
        if (next != ordered_map.end()) {
235
16
            lower_value = &next->first;
236
16
        }
237
20
        values_count = 0;
238
20
        distinct_values_count = 0;
239
20
    }
240
241
4
    return true;
242
4
}
243
244
template <typename T>
245
bool histogram_to_json(rapidjson::StringBuffer& buffer, const std::vector<Bucket<T>>& buckets,
246
76
                       const DataTypePtr& data_type) {
247
76
    rapidjson::Document doc;
248
76
    doc.SetObject();
249
76
    rapidjson::Document::AllocatorType& allocator = doc.GetAllocator();
250
251
76
    int num_buckets = cast_set<int>(buckets.size());
252
76
    doc.AddMember("num_buckets", num_buckets, allocator);
253
254
76
    rapidjson::Value bucket_arr(rapidjson::kArrayType);
255
76
    bucket_arr.Reserve(num_buckets, allocator);
256
257
76
    std::stringstream ss1;
258
76
    std::stringstream ss2;
259
260
76
    rapidjson::Value lower_val;
261
76
    rapidjson::Value upper_val;
262
263
    // Convert bucket's lower and upper to 2 columns
264
76
    MutableColumnPtr lower_column = data_type->create_column();
265
76
    MutableColumnPtr upper_column = data_type->create_column();
266
204
    for (const auto& bucket : buckets) {
267
        // String type is different, it has to pass in length
268
        // if it is string type , directly use string value
269
204
        if constexpr (!std::is_same_v<T, std::string>) {
270
184
            lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0);
271
184
            upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0);
272
184
        }
273
204
    }
274
76
    size_t row_num = 0;
275
276
76
    auto format_options = DataTypeSerDe::get_default_format_options();
277
76
    auto time_zone = cctz::utc_time_zone();
278
76
    format_options.timezone = &time_zone;
279
280
204
    for (const auto& bucket : buckets) {
281
204
        if constexpr (std::is_same_v<T, std::string>) {
282
20
            lower_val.SetString(bucket.lower.data(),
283
20
                                static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator);
284
20
            upper_val.SetString(bucket.upper.data(),
285
20
                                static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator);
286
184
        } else {
287
184
            std::string lower_str = data_type->to_string(*lower_column, row_num, format_options);
288
184
            std::string upper_str = data_type->to_string(*upper_column, row_num, format_options);
289
184
            ++row_num;
290
184
            lower_val.SetString(lower_str.data(),
291
184
                                static_cast<rapidjson::SizeType>(lower_str.size()), allocator);
292
184
            upper_val.SetString(upper_str.data(),
293
184
                                static_cast<rapidjson::SizeType>(upper_str.size()), allocator);
294
184
        }
295
204
        rapidjson::Value bucket_json(rapidjson::kObjectType);
296
204
        bucket_json.AddMember("lower", lower_val, allocator);
297
204
        bucket_json.AddMember("upper", upper_val, allocator);
298
204
        bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator);
299
204
        bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator);
300
204
        bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator);
301
302
204
        bucket_arr.PushBack(bucket_json, allocator);
303
204
    }
304
305
76
    doc.AddMember("buckets", bucket_arr, allocator);
306
76
    rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
307
76
    doc.Accept(writer);
308
309
76
    return !buckets.empty() && buffer.GetSize() > 0;
310
76
}
_ZN5doris17histogram_to_jsonIiEEbRN9rapidjson19GenericStringBufferINS1_4UTF8IcEENS1_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISB_EERKSt10shared_ptrIKNS_9IDataTypeEE
Line
Count
Source
246
12
                       const DataTypePtr& data_type) {
247
12
    rapidjson::Document doc;
248
12
    doc.SetObject();
249
12
    rapidjson::Document::AllocatorType& allocator = doc.GetAllocator();
250
251
12
    int num_buckets = cast_set<int>(buckets.size());
252
12
    doc.AddMember("num_buckets", num_buckets, allocator);
253
254
12
    rapidjson::Value bucket_arr(rapidjson::kArrayType);
255
12
    bucket_arr.Reserve(num_buckets, allocator);
256
257
12
    std::stringstream ss1;
258
12
    std::stringstream ss2;
259
260
12
    rapidjson::Value lower_val;
261
12
    rapidjson::Value upper_val;
262
263
    // Convert bucket's lower and upper to 2 columns
264
12
    MutableColumnPtr lower_column = data_type->create_column();
265
12
    MutableColumnPtr upper_column = data_type->create_column();
266
24
    for (const auto& bucket : buckets) {
267
        // String type is different, it has to pass in length
268
        // if it is string type , directly use string value
269
24
        if constexpr (!std::is_same_v<T, std::string>) {
270
24
            lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0);
271
24
            upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0);
272
24
        }
273
24
    }
274
12
    size_t row_num = 0;
275
276
12
    auto format_options = DataTypeSerDe::get_default_format_options();
277
12
    auto time_zone = cctz::utc_time_zone();
278
12
    format_options.timezone = &time_zone;
279
280
24
    for (const auto& bucket : buckets) {
281
        if constexpr (std::is_same_v<T, std::string>) {
282
            lower_val.SetString(bucket.lower.data(),
283
                                static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator);
284
            upper_val.SetString(bucket.upper.data(),
285
                                static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator);
286
24
        } else {
287
24
            std::string lower_str = data_type->to_string(*lower_column, row_num, format_options);
288
24
            std::string upper_str = data_type->to_string(*upper_column, row_num, format_options);
289
24
            ++row_num;
290
24
            lower_val.SetString(lower_str.data(),
291
24
                                static_cast<rapidjson::SizeType>(lower_str.size()), allocator);
292
24
            upper_val.SetString(upper_str.data(),
293
24
                                static_cast<rapidjson::SizeType>(upper_str.size()), allocator);
294
24
        }
295
24
        rapidjson::Value bucket_json(rapidjson::kObjectType);
296
24
        bucket_json.AddMember("lower", lower_val, allocator);
297
24
        bucket_json.AddMember("upper", upper_val, allocator);
298
24
        bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator);
299
24
        bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator);
300
24
        bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator);
301
302
24
        bucket_arr.PushBack(bucket_json, allocator);
303
24
    }
304
305
12
    doc.AddMember("buckets", bucket_arr, allocator);
306
12
    rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
307
12
    doc.Accept(writer);
308
309
12
    return !buckets.empty() && buffer.GetSize() > 0;
310
12
}
Unexecuted instantiation: _ZN5doris17histogram_to_jsonIhEEbRN9rapidjson19GenericStringBufferINS1_4UTF8IcEENS1_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISB_EERKSt10shared_ptrIKNS_9IDataTypeEE
_ZN5doris17histogram_to_jsonIaEEbRN9rapidjson19GenericStringBufferINS1_4UTF8IcEENS1_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISB_EERKSt10shared_ptrIKNS_9IDataTypeEE
Line
Count
Source
246
8
                       const DataTypePtr& data_type) {
247
8
    rapidjson::Document doc;
248
8
    doc.SetObject();
249
8
    rapidjson::Document::AllocatorType& allocator = doc.GetAllocator();
250
251
8
    int num_buckets = cast_set<int>(buckets.size());
252
8
    doc.AddMember("num_buckets", num_buckets, allocator);
253
254
8
    rapidjson::Value bucket_arr(rapidjson::kArrayType);
255
8
    bucket_arr.Reserve(num_buckets, allocator);
256
257
8
    std::stringstream ss1;
258
8
    std::stringstream ss2;
259
260
8
    rapidjson::Value lower_val;
261
8
    rapidjson::Value upper_val;
262
263
    // Convert bucket's lower and upper to 2 columns
264
8
    MutableColumnPtr lower_column = data_type->create_column();
265
8
    MutableColumnPtr upper_column = data_type->create_column();
266
20
    for (const auto& bucket : buckets) {
267
        // String type is different, it has to pass in length
268
        // if it is string type , directly use string value
269
20
        if constexpr (!std::is_same_v<T, std::string>) {
270
20
            lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0);
271
20
            upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0);
272
20
        }
273
20
    }
274
8
    size_t row_num = 0;
275
276
8
    auto format_options = DataTypeSerDe::get_default_format_options();
277
8
    auto time_zone = cctz::utc_time_zone();
278
8
    format_options.timezone = &time_zone;
279
280
20
    for (const auto& bucket : buckets) {
281
        if constexpr (std::is_same_v<T, std::string>) {
282
            lower_val.SetString(bucket.lower.data(),
283
                                static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator);
284
            upper_val.SetString(bucket.upper.data(),
285
                                static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator);
286
20
        } else {
287
20
            std::string lower_str = data_type->to_string(*lower_column, row_num, format_options);
288
20
            std::string upper_str = data_type->to_string(*upper_column, row_num, format_options);
289
20
            ++row_num;
290
20
            lower_val.SetString(lower_str.data(),
291
20
                                static_cast<rapidjson::SizeType>(lower_str.size()), allocator);
292
20
            upper_val.SetString(upper_str.data(),
293
20
                                static_cast<rapidjson::SizeType>(upper_str.size()), allocator);
294
20
        }
295
20
        rapidjson::Value bucket_json(rapidjson::kObjectType);
296
20
        bucket_json.AddMember("lower", lower_val, allocator);
297
20
        bucket_json.AddMember("upper", upper_val, allocator);
298
20
        bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator);
299
20
        bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator);
300
20
        bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator);
301
302
20
        bucket_arr.PushBack(bucket_json, allocator);
303
20
    }
304
305
8
    doc.AddMember("buckets", bucket_arr, allocator);
306
8
    rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
307
8
    doc.Accept(writer);
308
309
8
    return !buckets.empty() && buffer.GetSize() > 0;
310
8
}
_ZN5doris17histogram_to_jsonIsEEbRN9rapidjson19GenericStringBufferINS1_4UTF8IcEENS1_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISB_EERKSt10shared_ptrIKNS_9IDataTypeEE
Line
Count
Source
246
8
                       const DataTypePtr& data_type) {
247
8
    rapidjson::Document doc;
248
8
    doc.SetObject();
249
8
    rapidjson::Document::AllocatorType& allocator = doc.GetAllocator();
250
251
8
    int num_buckets = cast_set<int>(buckets.size());
252
8
    doc.AddMember("num_buckets", num_buckets, allocator);
253
254
8
    rapidjson::Value bucket_arr(rapidjson::kArrayType);
255
8
    bucket_arr.Reserve(num_buckets, allocator);
256
257
8
    std::stringstream ss1;
258
8
    std::stringstream ss2;
259
260
8
    rapidjson::Value lower_val;
261
8
    rapidjson::Value upper_val;
262
263
    // Convert bucket's lower and upper to 2 columns
264
8
    MutableColumnPtr lower_column = data_type->create_column();
265
8
    MutableColumnPtr upper_column = data_type->create_column();
266
20
    for (const auto& bucket : buckets) {
267
        // String type is different, it has to pass in length
268
        // if it is string type , directly use string value
269
20
        if constexpr (!std::is_same_v<T, std::string>) {
270
20
            lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0);
271
20
            upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0);
272
20
        }
273
20
    }
274
8
    size_t row_num = 0;
275
276
8
    auto format_options = DataTypeSerDe::get_default_format_options();
277
8
    auto time_zone = cctz::utc_time_zone();
278
8
    format_options.timezone = &time_zone;
279
280
20
    for (const auto& bucket : buckets) {
281
        if constexpr (std::is_same_v<T, std::string>) {
282
            lower_val.SetString(bucket.lower.data(),
283
                                static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator);
284
            upper_val.SetString(bucket.upper.data(),
285
                                static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator);
286
20
        } else {
287
20
            std::string lower_str = data_type->to_string(*lower_column, row_num, format_options);
288
20
            std::string upper_str = data_type->to_string(*upper_column, row_num, format_options);
289
20
            ++row_num;
290
20
            lower_val.SetString(lower_str.data(),
291
20
                                static_cast<rapidjson::SizeType>(lower_str.size()), allocator);
292
20
            upper_val.SetString(upper_str.data(),
293
20
                                static_cast<rapidjson::SizeType>(upper_str.size()), allocator);
294
20
        }
295
20
        rapidjson::Value bucket_json(rapidjson::kObjectType);
296
20
        bucket_json.AddMember("lower", lower_val, allocator);
297
20
        bucket_json.AddMember("upper", upper_val, allocator);
298
20
        bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator);
299
20
        bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator);
300
20
        bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator);
301
302
20
        bucket_arr.PushBack(bucket_json, allocator);
303
20
    }
304
305
8
    doc.AddMember("buckets", bucket_arr, allocator);
306
8
    rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
307
8
    doc.Accept(writer);
308
309
8
    return !buckets.empty() && buffer.GetSize() > 0;
310
8
}
_ZN5doris17histogram_to_jsonIlEEbRN9rapidjson19GenericStringBufferINS1_4UTF8IcEENS1_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISB_EERKSt10shared_ptrIKNS_9IDataTypeEE
Line
Count
Source
246
8
                       const DataTypePtr& data_type) {
247
8
    rapidjson::Document doc;
248
8
    doc.SetObject();
249
8
    rapidjson::Document::AllocatorType& allocator = doc.GetAllocator();
250
251
8
    int num_buckets = cast_set<int>(buckets.size());
252
8
    doc.AddMember("num_buckets", num_buckets, allocator);
253
254
8
    rapidjson::Value bucket_arr(rapidjson::kArrayType);
255
8
    bucket_arr.Reserve(num_buckets, allocator);
256
257
8
    std::stringstream ss1;
258
8
    std::stringstream ss2;
259
260
8
    rapidjson::Value lower_val;
261
8
    rapidjson::Value upper_val;
262
263
    // Convert bucket's lower and upper to 2 columns
264
8
    MutableColumnPtr lower_column = data_type->create_column();
265
8
    MutableColumnPtr upper_column = data_type->create_column();
266
20
    for (const auto& bucket : buckets) {
267
        // String type is different, it has to pass in length
268
        // if it is string type , directly use string value
269
20
        if constexpr (!std::is_same_v<T, std::string>) {
270
20
            lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0);
271
20
            upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0);
272
20
        }
273
20
    }
274
8
    size_t row_num = 0;
275
276
8
    auto format_options = DataTypeSerDe::get_default_format_options();
277
8
    auto time_zone = cctz::utc_time_zone();
278
8
    format_options.timezone = &time_zone;
279
280
20
    for (const auto& bucket : buckets) {
281
        if constexpr (std::is_same_v<T, std::string>) {
282
            lower_val.SetString(bucket.lower.data(),
283
                                static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator);
284
            upper_val.SetString(bucket.upper.data(),
285
                                static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator);
286
20
        } else {
287
20
            std::string lower_str = data_type->to_string(*lower_column, row_num, format_options);
288
20
            std::string upper_str = data_type->to_string(*upper_column, row_num, format_options);
289
20
            ++row_num;
290
20
            lower_val.SetString(lower_str.data(),
291
20
                                static_cast<rapidjson::SizeType>(lower_str.size()), allocator);
292
20
            upper_val.SetString(upper_str.data(),
293
20
                                static_cast<rapidjson::SizeType>(upper_str.size()), allocator);
294
20
        }
295
20
        rapidjson::Value bucket_json(rapidjson::kObjectType);
296
20
        bucket_json.AddMember("lower", lower_val, allocator);
297
20
        bucket_json.AddMember("upper", upper_val, allocator);
298
20
        bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator);
299
20
        bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator);
300
20
        bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator);
301
302
20
        bucket_arr.PushBack(bucket_json, allocator);
303
20
    }
304
305
8
    doc.AddMember("buckets", bucket_arr, allocator);
306
8
    rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
307
8
    doc.Accept(writer);
308
309
8
    return !buckets.empty() && buffer.GetSize() > 0;
310
8
}
_ZN5doris17histogram_to_jsonInEEbRN9rapidjson19GenericStringBufferINS1_4UTF8IcEENS1_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISB_EERKSt10shared_ptrIKNS_9IDataTypeEE
Line
Count
Source
246
8
                       const DataTypePtr& data_type) {
247
8
    rapidjson::Document doc;
248
8
    doc.SetObject();
249
8
    rapidjson::Document::AllocatorType& allocator = doc.GetAllocator();
250
251
8
    int num_buckets = cast_set<int>(buckets.size());
252
8
    doc.AddMember("num_buckets", num_buckets, allocator);
253
254
8
    rapidjson::Value bucket_arr(rapidjson::kArrayType);
255
8
    bucket_arr.Reserve(num_buckets, allocator);
256
257
8
    std::stringstream ss1;
258
8
    std::stringstream ss2;
259
260
8
    rapidjson::Value lower_val;
261
8
    rapidjson::Value upper_val;
262
263
    // Convert bucket's lower and upper to 2 columns
264
8
    MutableColumnPtr lower_column = data_type->create_column();
265
8
    MutableColumnPtr upper_column = data_type->create_column();
266
20
    for (const auto& bucket : buckets) {
267
        // String type is different, it has to pass in length
268
        // if it is string type , directly use string value
269
20
        if constexpr (!std::is_same_v<T, std::string>) {
270
20
            lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0);
271
20
            upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0);
272
20
        }
273
20
    }
274
8
    size_t row_num = 0;
275
276
8
    auto format_options = DataTypeSerDe::get_default_format_options();
277
8
    auto time_zone = cctz::utc_time_zone();
278
8
    format_options.timezone = &time_zone;
279
280
20
    for (const auto& bucket : buckets) {
281
        if constexpr (std::is_same_v<T, std::string>) {
282
            lower_val.SetString(bucket.lower.data(),
283
                                static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator);
284
            upper_val.SetString(bucket.upper.data(),
285
                                static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator);
286
20
        } else {
287
20
            std::string lower_str = data_type->to_string(*lower_column, row_num, format_options);
288
20
            std::string upper_str = data_type->to_string(*upper_column, row_num, format_options);
289
20
            ++row_num;
290
20
            lower_val.SetString(lower_str.data(),
291
20
                                static_cast<rapidjson::SizeType>(lower_str.size()), allocator);
292
20
            upper_val.SetString(upper_str.data(),
293
20
                                static_cast<rapidjson::SizeType>(upper_str.size()), allocator);
294
20
        }
295
20
        rapidjson::Value bucket_json(rapidjson::kObjectType);
296
20
        bucket_json.AddMember("lower", lower_val, allocator);
297
20
        bucket_json.AddMember("upper", upper_val, allocator);
298
20
        bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator);
299
20
        bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator);
300
20
        bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator);
301
302
20
        bucket_arr.PushBack(bucket_json, allocator);
303
20
    }
304
305
8
    doc.AddMember("buckets", bucket_arr, allocator);
306
8
    rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
307
8
    doc.Accept(writer);
308
309
8
    return !buckets.empty() && buffer.GetSize() > 0;
310
8
}
_ZN5doris17histogram_to_jsonIfEEbRN9rapidjson19GenericStringBufferINS1_4UTF8IcEENS1_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISB_EERKSt10shared_ptrIKNS_9IDataTypeEE
Line
Count
Source
246
8
                       const DataTypePtr& data_type) {
247
8
    rapidjson::Document doc;
248
8
    doc.SetObject();
249
8
    rapidjson::Document::AllocatorType& allocator = doc.GetAllocator();
250
251
8
    int num_buckets = cast_set<int>(buckets.size());
252
8
    doc.AddMember("num_buckets", num_buckets, allocator);
253
254
8
    rapidjson::Value bucket_arr(rapidjson::kArrayType);
255
8
    bucket_arr.Reserve(num_buckets, allocator);
256
257
8
    std::stringstream ss1;
258
8
    std::stringstream ss2;
259
260
8
    rapidjson::Value lower_val;
261
8
    rapidjson::Value upper_val;
262
263
    // Convert bucket's lower and upper to 2 columns
264
8
    MutableColumnPtr lower_column = data_type->create_column();
265
8
    MutableColumnPtr upper_column = data_type->create_column();
266
20
    for (const auto& bucket : buckets) {
267
        // String type is different, it has to pass in length
268
        // if it is string type , directly use string value
269
20
        if constexpr (!std::is_same_v<T, std::string>) {
270
20
            lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0);
271
20
            upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0);
272
20
        }
273
20
    }
274
8
    size_t row_num = 0;
275
276
8
    auto format_options = DataTypeSerDe::get_default_format_options();
277
8
    auto time_zone = cctz::utc_time_zone();
278
8
    format_options.timezone = &time_zone;
279
280
20
    for (const auto& bucket : buckets) {
281
        if constexpr (std::is_same_v<T, std::string>) {
282
            lower_val.SetString(bucket.lower.data(),
283
                                static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator);
284
            upper_val.SetString(bucket.upper.data(),
285
                                static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator);
286
20
        } else {
287
20
            std::string lower_str = data_type->to_string(*lower_column, row_num, format_options);
288
20
            std::string upper_str = data_type->to_string(*upper_column, row_num, format_options);
289
20
            ++row_num;
290
20
            lower_val.SetString(lower_str.data(),
291
20
                                static_cast<rapidjson::SizeType>(lower_str.size()), allocator);
292
20
            upper_val.SetString(upper_str.data(),
293
20
                                static_cast<rapidjson::SizeType>(upper_str.size()), allocator);
294
20
        }
295
20
        rapidjson::Value bucket_json(rapidjson::kObjectType);
296
20
        bucket_json.AddMember("lower", lower_val, allocator);
297
20
        bucket_json.AddMember("upper", upper_val, allocator);
298
20
        bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator);
299
20
        bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator);
300
20
        bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator);
301
302
20
        bucket_arr.PushBack(bucket_json, allocator);
303
20
    }
304
305
8
    doc.AddMember("buckets", bucket_arr, allocator);
306
8
    rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
307
8
    doc.Accept(writer);
308
309
8
    return !buckets.empty() && buffer.GetSize() > 0;
310
8
}
_ZN5doris17histogram_to_jsonIdEEbRN9rapidjson19GenericStringBufferINS1_4UTF8IcEENS1_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISB_EERKSt10shared_ptrIKNS_9IDataTypeEE
Line
Count
Source
246
8
                       const DataTypePtr& data_type) {
247
8
    rapidjson::Document doc;
248
8
    doc.SetObject();
249
8
    rapidjson::Document::AllocatorType& allocator = doc.GetAllocator();
250
251
8
    int num_buckets = cast_set<int>(buckets.size());
252
8
    doc.AddMember("num_buckets", num_buckets, allocator);
253
254
8
    rapidjson::Value bucket_arr(rapidjson::kArrayType);
255
8
    bucket_arr.Reserve(num_buckets, allocator);
256
257
8
    std::stringstream ss1;
258
8
    std::stringstream ss2;
259
260
8
    rapidjson::Value lower_val;
261
8
    rapidjson::Value upper_val;
262
263
    // Convert bucket's lower and upper to 2 columns
264
8
    MutableColumnPtr lower_column = data_type->create_column();
265
8
    MutableColumnPtr upper_column = data_type->create_column();
266
20
    for (const auto& bucket : buckets) {
267
        // String type is different, it has to pass in length
268
        // if it is string type , directly use string value
269
20
        if constexpr (!std::is_same_v<T, std::string>) {
270
20
            lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0);
271
20
            upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0);
272
20
        }
273
20
    }
274
8
    size_t row_num = 0;
275
276
8
    auto format_options = DataTypeSerDe::get_default_format_options();
277
8
    auto time_zone = cctz::utc_time_zone();
278
8
    format_options.timezone = &time_zone;
279
280
20
    for (const auto& bucket : buckets) {
281
        if constexpr (std::is_same_v<T, std::string>) {
282
            lower_val.SetString(bucket.lower.data(),
283
                                static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator);
284
            upper_val.SetString(bucket.upper.data(),
285
                                static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator);
286
20
        } else {
287
20
            std::string lower_str = data_type->to_string(*lower_column, row_num, format_options);
288
20
            std::string upper_str = data_type->to_string(*upper_column, row_num, format_options);
289
20
            ++row_num;
290
20
            lower_val.SetString(lower_str.data(),
291
20
                                static_cast<rapidjson::SizeType>(lower_str.size()), allocator);
292
20
            upper_val.SetString(upper_str.data(),
293
20
                                static_cast<rapidjson::SizeType>(upper_str.size()), allocator);
294
20
        }
295
20
        rapidjson::Value bucket_json(rapidjson::kObjectType);
296
20
        bucket_json.AddMember("lower", lower_val, allocator);
297
20
        bucket_json.AddMember("upper", upper_val, allocator);
298
20
        bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator);
299
20
        bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator);
300
20
        bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator);
301
302
20
        bucket_arr.PushBack(bucket_json, allocator);
303
20
    }
304
305
8
    doc.AddMember("buckets", bucket_arr, allocator);
306
8
    rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
307
8
    doc.Accept(writer);
308
309
8
    return !buckets.empty() && buffer.GetSize() > 0;
310
8
}
Unexecuted instantiation: _ZN5doris17histogram_to_jsonINS_7DecimalIiEEEEbRN9rapidjson19GenericStringBufferINS3_4UTF8IcEENS3_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISD_EERKSt10shared_ptrIKNS_9IDataTypeEE
Unexecuted instantiation: _ZN5doris17histogram_to_jsonINS_7DecimalIlEEEEbRN9rapidjson19GenericStringBufferINS3_4UTF8IcEENS3_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISD_EERKSt10shared_ptrIKNS_9IDataTypeEE
Unexecuted instantiation: _ZN5doris17histogram_to_jsonINS_12Decimal128V3EEEbRN9rapidjson19GenericStringBufferINS2_4UTF8IcEENS2_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISC_EERKSt10shared_ptrIKNS_9IDataTypeEE
Unexecuted instantiation: _ZN5doris17histogram_to_jsonINS_7DecimalIN4wide7integerILm256EiEEEEEEbRN9rapidjson19GenericStringBufferINS6_4UTF8IcEENS6_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISG_EERKSt10shared_ptrIKNS_9IDataTypeEE
_ZN5doris17histogram_to_jsonINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEEbRN9rapidjson19GenericStringBufferINS7_4UTF8IcEENS7_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISH_EERKSt10shared_ptrIKNS_9IDataTypeEE
Line
Count
Source
246
8
                       const DataTypePtr& data_type) {
247
8
    rapidjson::Document doc;
248
8
    doc.SetObject();
249
8
    rapidjson::Document::AllocatorType& allocator = doc.GetAllocator();
250
251
8
    int num_buckets = cast_set<int>(buckets.size());
252
8
    doc.AddMember("num_buckets", num_buckets, allocator);
253
254
8
    rapidjson::Value bucket_arr(rapidjson::kArrayType);
255
8
    bucket_arr.Reserve(num_buckets, allocator);
256
257
8
    std::stringstream ss1;
258
8
    std::stringstream ss2;
259
260
8
    rapidjson::Value lower_val;
261
8
    rapidjson::Value upper_val;
262
263
    // Convert bucket's lower and upper to 2 columns
264
8
    MutableColumnPtr lower_column = data_type->create_column();
265
8
    MutableColumnPtr upper_column = data_type->create_column();
266
20
    for (const auto& bucket : buckets) {
267
        // String type is different, it has to pass in length
268
        // if it is string type , directly use string value
269
        if constexpr (!std::is_same_v<T, std::string>) {
270
            lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0);
271
            upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0);
272
        }
273
20
    }
274
8
    size_t row_num = 0;
275
276
8
    auto format_options = DataTypeSerDe::get_default_format_options();
277
8
    auto time_zone = cctz::utc_time_zone();
278
8
    format_options.timezone = &time_zone;
279
280
20
    for (const auto& bucket : buckets) {
281
20
        if constexpr (std::is_same_v<T, std::string>) {
282
20
            lower_val.SetString(bucket.lower.data(),
283
20
                                static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator);
284
20
            upper_val.SetString(bucket.upper.data(),
285
20
                                static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator);
286
        } else {
287
            std::string lower_str = data_type->to_string(*lower_column, row_num, format_options);
288
            std::string upper_str = data_type->to_string(*upper_column, row_num, format_options);
289
            ++row_num;
290
            lower_val.SetString(lower_str.data(),
291
                                static_cast<rapidjson::SizeType>(lower_str.size()), allocator);
292
            upper_val.SetString(upper_str.data(),
293
                                static_cast<rapidjson::SizeType>(upper_str.size()), allocator);
294
        }
295
20
        rapidjson::Value bucket_json(rapidjson::kObjectType);
296
20
        bucket_json.AddMember("lower", lower_val, allocator);
297
20
        bucket_json.AddMember("upper", upper_val, allocator);
298
20
        bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator);
299
20
        bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator);
300
20
        bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator);
301
302
20
        bucket_arr.PushBack(bucket_json, allocator);
303
20
    }
304
305
8
    doc.AddMember("buckets", bucket_arr, allocator);
306
8
    rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
307
8
    doc.Accept(writer);
308
309
8
    return !buckets.empty() && buffer.GetSize() > 0;
310
8
}
_ZN5doris17histogram_to_jsonINS_11DateV2ValueINS_15DateV2ValueTypeEEEEEbRN9rapidjson19GenericStringBufferINS4_4UTF8IcEENS4_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISE_EERKSt10shared_ptrIKNS_9IDataTypeEE
Line
Count
Source
246
4
                       const DataTypePtr& data_type) {
247
4
    rapidjson::Document doc;
248
4
    doc.SetObject();
249
4
    rapidjson::Document::AllocatorType& allocator = doc.GetAllocator();
250
251
4
    int num_buckets = cast_set<int>(buckets.size());
252
4
    doc.AddMember("num_buckets", num_buckets, allocator);
253
254
4
    rapidjson::Value bucket_arr(rapidjson::kArrayType);
255
4
    bucket_arr.Reserve(num_buckets, allocator);
256
257
4
    std::stringstream ss1;
258
4
    std::stringstream ss2;
259
260
4
    rapidjson::Value lower_val;
261
4
    rapidjson::Value upper_val;
262
263
    // Convert bucket's lower and upper to 2 columns
264
4
    MutableColumnPtr lower_column = data_type->create_column();
265
4
    MutableColumnPtr upper_column = data_type->create_column();
266
20
    for (const auto& bucket : buckets) {
267
        // String type is different, it has to pass in length
268
        // if it is string type , directly use string value
269
20
        if constexpr (!std::is_same_v<T, std::string>) {
270
20
            lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0);
271
20
            upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0);
272
20
        }
273
20
    }
274
4
    size_t row_num = 0;
275
276
4
    auto format_options = DataTypeSerDe::get_default_format_options();
277
4
    auto time_zone = cctz::utc_time_zone();
278
4
    format_options.timezone = &time_zone;
279
280
20
    for (const auto& bucket : buckets) {
281
        if constexpr (std::is_same_v<T, std::string>) {
282
            lower_val.SetString(bucket.lower.data(),
283
                                static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator);
284
            upper_val.SetString(bucket.upper.data(),
285
                                static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator);
286
20
        } else {
287
20
            std::string lower_str = data_type->to_string(*lower_column, row_num, format_options);
288
20
            std::string upper_str = data_type->to_string(*upper_column, row_num, format_options);
289
20
            ++row_num;
290
20
            lower_val.SetString(lower_str.data(),
291
20
                                static_cast<rapidjson::SizeType>(lower_str.size()), allocator);
292
20
            upper_val.SetString(upper_str.data(),
293
20
                                static_cast<rapidjson::SizeType>(upper_str.size()), allocator);
294
20
        }
295
20
        rapidjson::Value bucket_json(rapidjson::kObjectType);
296
20
        bucket_json.AddMember("lower", lower_val, allocator);
297
20
        bucket_json.AddMember("upper", upper_val, allocator);
298
20
        bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator);
299
20
        bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator);
300
20
        bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator);
301
302
20
        bucket_arr.PushBack(bucket_json, allocator);
303
20
    }
304
305
4
    doc.AddMember("buckets", bucket_arr, allocator);
306
4
    rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
307
4
    doc.Accept(writer);
308
309
4
    return !buckets.empty() && buffer.GetSize() > 0;
310
4
}
_ZN5doris17histogram_to_jsonINS_11DateV2ValueINS_19DateTimeV2ValueTypeEEEEEbRN9rapidjson19GenericStringBufferINS4_4UTF8IcEENS4_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISE_EERKSt10shared_ptrIKNS_9IDataTypeEE
Line
Count
Source
246
4
                       const DataTypePtr& data_type) {
247
4
    rapidjson::Document doc;
248
4
    doc.SetObject();
249
4
    rapidjson::Document::AllocatorType& allocator = doc.GetAllocator();
250
251
4
    int num_buckets = cast_set<int>(buckets.size());
252
4
    doc.AddMember("num_buckets", num_buckets, allocator);
253
254
4
    rapidjson::Value bucket_arr(rapidjson::kArrayType);
255
4
    bucket_arr.Reserve(num_buckets, allocator);
256
257
4
    std::stringstream ss1;
258
4
    std::stringstream ss2;
259
260
4
    rapidjson::Value lower_val;
261
4
    rapidjson::Value upper_val;
262
263
    // Convert bucket's lower and upper to 2 columns
264
4
    MutableColumnPtr lower_column = data_type->create_column();
265
4
    MutableColumnPtr upper_column = data_type->create_column();
266
20
    for (const auto& bucket : buckets) {
267
        // String type is different, it has to pass in length
268
        // if it is string type , directly use string value
269
20
        if constexpr (!std::is_same_v<T, std::string>) {
270
20
            lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0);
271
20
            upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0);
272
20
        }
273
20
    }
274
4
    size_t row_num = 0;
275
276
4
    auto format_options = DataTypeSerDe::get_default_format_options();
277
4
    auto time_zone = cctz::utc_time_zone();
278
4
    format_options.timezone = &time_zone;
279
280
20
    for (const auto& bucket : buckets) {
281
        if constexpr (std::is_same_v<T, std::string>) {
282
            lower_val.SetString(bucket.lower.data(),
283
                                static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator);
284
            upper_val.SetString(bucket.upper.data(),
285
                                static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator);
286
20
        } else {
287
20
            std::string lower_str = data_type->to_string(*lower_column, row_num, format_options);
288
20
            std::string upper_str = data_type->to_string(*upper_column, row_num, format_options);
289
20
            ++row_num;
290
20
            lower_val.SetString(lower_str.data(),
291
20
                                static_cast<rapidjson::SizeType>(lower_str.size()), allocator);
292
20
            upper_val.SetString(upper_str.data(),
293
20
                                static_cast<rapidjson::SizeType>(upper_str.size()), allocator);
294
20
        }
295
20
        rapidjson::Value bucket_json(rapidjson::kObjectType);
296
20
        bucket_json.AddMember("lower", lower_val, allocator);
297
20
        bucket_json.AddMember("upper", upper_val, allocator);
298
20
        bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator);
299
20
        bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator);
300
20
        bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator);
301
302
20
        bucket_arr.PushBack(bucket_json, allocator);
303
20
    }
304
305
4
    doc.AddMember("buckets", bucket_arr, allocator);
306
4
    rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
307
4
    doc.Accept(writer);
308
309
4
    return !buckets.empty() && buffer.GetSize() > 0;
310
4
}
311
#include "common/compile_check_end.h"
312
} // namespace  doris