Coverage Report

Created: 2026-03-12 16:03

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exec/common/histogram_helpers.hpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <rapidjson/document.h>
21
#include <rapidjson/prettywriter.h>
22
#include <rapidjson/stringbuffer.h>
23
24
#include <boost/dynamic_bitset.hpp>
25
26
#include "common/cast_set.h"
27
#include "core/data_type/data_type_decimal.h"
28
#include "util/io_helper.h"
29
30
namespace doris {
31
#include "common/compile_check_begin.h"
32
template <typename T>
33
struct Bucket {
34
public:
35
    Bucket() = default;
36
    Bucket(T lower, T upper, size_t ndv, size_t count, size_t pre_sum)
37
1.85k
            : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {}
_ZN5doris6BucketINS_7DecimalIiEEEC2ES2_S2_mmm
Line
Count
Source
37
133
            : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {}
Unexecuted instantiation: _ZN5doris6BucketINS_7DecimalIlEEEC2ES2_S2_mmm
Unexecuted instantiation: _ZN5doris6BucketINS_12Decimal128V3EEC2ES1_S1_mmm
Unexecuted instantiation: _ZN5doris6BucketINS_7DecimalIN4wide7integerILm256EiEEEEEC2ES5_S5_mmm
_ZN5doris6BucketINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEC2ES6_S6_mmm
Line
Count
Source
37
178
            : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {}
_ZN5doris6BucketINS_11DateV2ValueINS_15DateV2ValueTypeEEEEC2ES3_S3_mmm
Line
Count
Source
37
256
            : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {}
_ZN5doris6BucketINS_11DateV2ValueINS_19DateTimeV2ValueTypeEEEEC2ES3_S3_mmm
Line
Count
Source
37
255
            : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {}
_ZN5doris6BucketIhEC2Ehhmmm
Line
Count
Source
37
2
            : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {}
_ZN5doris6BucketIaEC2Eaammm
Line
Count
Source
37
129
            : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {}
_ZN5doris6BucketIsEC2Essmmm
Line
Count
Source
37
132
            : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {}
_ZN5doris6BucketIiEC2Eiimmm
Line
Count
Source
37
163
            : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {}
_ZN5doris6BucketIlEC2Ellmmm
Line
Count
Source
37
212
            : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {}
_ZN5doris6BucketInEC2Ennmmm
Line
Count
Source
37
141
            : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {}
_ZN5doris6BucketIfEC2Effmmm
Line
Count
Source
37
128
            : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {}
_ZN5doris6BucketIdEC2Eddmmm
Line
Count
Source
37
128
            : lower(lower), upper(upper), ndv(ndv), count(count), pre_sum(pre_sum) {}
38
39
    T lower;
40
    T upper;
41
    size_t ndv;
42
    size_t count;
43
    size_t pre_sum;
44
};
45
46
/**
47
 * Checks if it is possible to assign the provided value_map to the given
48
 * number of buckets such that no bucket has a size larger than max_bucket_size.
49
 *
50
 * @param value_map A mapping of values to their counts.
51
 * @param max_bucket_size The maximum size that any bucket is allowed to have.
52
 * @param num_buckets The number of buckets that we want to assign values to.
53
 *
54
 * @return true if the values can be assigned to the buckets, false otherwise.
55
 */
56
template <typename T>
57
bool can_assign_into_buckets(const std::map<T, size_t>& value_map, const size_t max_bucket_size,
58
505
                             const size_t num_buckets) {
59
505
    if (value_map.empty()) {
60
1
        return false;
61
504
    };
62
63
504
    size_t used_buckets = 1;
64
504
    size_t current_bucket_size = 0;
65
66
30.1k
    for (const auto& [value, count] : value_map) {
67
30.1k
        current_bucket_size += count;
68
69
        // If adding the current value to the current bucket would exceed max_bucket_size,
70
        // then we start a new bucket.
71
30.1k
        if (current_bucket_size > max_bucket_size) {
72
1.14k
            ++used_buckets;
73
1.14k
            current_bucket_size = count;
74
1.14k
        }
75
76
        // If we have used more buckets than num_buckets, we cannot assign the values to buckets.
77
30.1k
        if (used_buckets > num_buckets) {
78
222
            return false;
79
222
        }
80
30.1k
    }
81
82
282
    return true;
83
504
}
Unexecuted instantiation: _ZN5doris23can_assign_into_bucketsIhEEbRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEmm
_ZN5doris23can_assign_into_bucketsIaEEbRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEmm
Line
Count
Source
58
27
                             const size_t num_buckets) {
59
27
    if (value_map.empty()) {
60
0
        return false;
61
27
    };
62
63
27
    size_t used_buckets = 1;
64
27
    size_t current_bucket_size = 0;
65
66
1.24k
    for (const auto& [value, count] : value_map) {
67
1.24k
        current_bucket_size += count;
68
69
        // If adding the current value to the current bucket would exceed max_bucket_size,
70
        // then we start a new bucket.
71
1.24k
        if (current_bucket_size > max_bucket_size) {
72
70
            ++used_buckets;
73
70
            current_bucket_size = count;
74
70
        }
75
76
        // If we have used more buckets than num_buckets, we cannot assign the values to buckets.
77
1.24k
        if (used_buckets > num_buckets) {
78
11
            return false;
79
11
        }
80
1.24k
    }
81
82
16
    return true;
83
27
}
_ZN5doris23can_assign_into_bucketsIsEEbRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEmm
Line
Count
Source
58
40
                             const size_t num_buckets) {
59
40
    if (value_map.empty()) {
60
0
        return false;
61
40
    };
62
63
40
    size_t used_buckets = 1;
64
40
    size_t current_bucket_size = 0;
65
66
1.34k
    for (const auto& [value, count] : value_map) {
67
1.34k
        current_bucket_size += count;
68
69
        // If adding the current value to the current bucket would exceed max_bucket_size,
70
        // then we start a new bucket.
71
1.34k
        if (current_bucket_size > max_bucket_size) {
72
88
            ++used_buckets;
73
88
            current_bucket_size = count;
74
88
        }
75
76
        // If we have used more buckets than num_buckets, we cannot assign the values to buckets.
77
1.34k
        if (used_buckets > num_buckets) {
78
18
            return false;
79
18
        }
80
1.34k
    }
81
82
22
    return true;
83
40
}
_ZN5doris23can_assign_into_bucketsIiEEbRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEmm
Line
Count
Source
58
69
                             const size_t num_buckets) {
59
69
    if (value_map.empty()) {
60
1
        return false;
61
68
    };
62
63
68
    size_t used_buckets = 1;
64
68
    size_t current_bucket_size = 0;
65
66
1.40k
    for (const auto& [value, count] : value_map) {
67
1.40k
        current_bucket_size += count;
68
69
        // If adding the current value to the current bucket would exceed max_bucket_size,
70
        // then we start a new bucket.
71
1.40k
        if (current_bucket_size > max_bucket_size) {
72
169
            ++used_buckets;
73
169
            current_bucket_size = count;
74
169
        }
75
76
        // If we have used more buckets than num_buckets, we cannot assign the values to buckets.
77
1.40k
        if (used_buckets > num_buckets) {
78
30
            return false;
79
30
        }
80
1.40k
    }
81
82
38
    return true;
83
68
}
_ZN5doris23can_assign_into_bucketsIlEEbRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEmm
Line
Count
Source
58
34
                             const size_t num_buckets) {
59
34
    if (value_map.empty()) {
60
0
        return false;
61
34
    };
62
63
34
    size_t used_buckets = 1;
64
34
    size_t current_bucket_size = 0;
65
66
1.29k
    for (const auto& [value, count] : value_map) {
67
1.29k
        current_bucket_size += count;
68
69
        // If adding the current value to the current bucket would exceed max_bucket_size,
70
        // then we start a new bucket.
71
1.29k
        if (current_bucket_size > max_bucket_size) {
72
90
            ++used_buckets;
73
90
            current_bucket_size = count;
74
90
        }
75
76
        // If we have used more buckets than num_buckets, we cannot assign the values to buckets.
77
1.29k
        if (used_buckets > num_buckets) {
78
17
            return false;
79
17
        }
80
1.29k
    }
81
82
17
    return true;
83
34
}
_ZN5doris23can_assign_into_bucketsInEEbRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEmm
Line
Count
Source
58
32
                             const size_t num_buckets) {
59
32
    if (value_map.empty()) {
60
0
        return false;
61
32
    };
62
63
32
    size_t used_buckets = 1;
64
32
    size_t current_bucket_size = 0;
65
66
1.28k
    for (const auto& [value, count] : value_map) {
67
1.28k
        current_bucket_size += count;
68
69
        // If adding the current value to the current bucket would exceed max_bucket_size,
70
        // then we start a new bucket.
71
1.28k
        if (current_bucket_size > max_bucket_size) {
72
96
            ++used_buckets;
73
96
            current_bucket_size = count;
74
96
        }
75
76
        // If we have used more buckets than num_buckets, we cannot assign the values to buckets.
77
1.28k
        if (used_buckets > num_buckets) {
78
15
            return false;
79
15
        }
80
1.28k
    }
81
82
17
    return true;
83
32
}
_ZN5doris23can_assign_into_bucketsIfEEbRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEmm
Line
Count
Source
58
31
                             const size_t num_buckets) {
59
31
    if (value_map.empty()) {
60
0
        return false;
61
31
    };
62
63
31
    size_t used_buckets = 1;
64
31
    size_t current_bucket_size = 0;
65
66
1.25k
    for (const auto& [value, count] : value_map) {
67
1.25k
        current_bucket_size += count;
68
69
        // If adding the current value to the current bucket would exceed max_bucket_size,
70
        // then we start a new bucket.
71
1.25k
        if (current_bucket_size > max_bucket_size) {
72
78
            ++used_buckets;
73
78
            current_bucket_size = count;
74
78
        }
75
76
        // If we have used more buckets than num_buckets, we cannot assign the values to buckets.
77
1.25k
        if (used_buckets > num_buckets) {
78
15
            return false;
79
15
        }
80
1.25k
    }
81
82
16
    return true;
83
31
}
_ZN5doris23can_assign_into_bucketsIdEEbRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEmm
Line
Count
Source
58
31
                             const size_t num_buckets) {
59
31
    if (value_map.empty()) {
60
0
        return false;
61
31
    };
62
63
31
    size_t used_buckets = 1;
64
31
    size_t current_bucket_size = 0;
65
66
1.25k
    for (const auto& [value, count] : value_map) {
67
1.25k
        current_bucket_size += count;
68
69
        // If adding the current value to the current bucket would exceed max_bucket_size,
70
        // then we start a new bucket.
71
1.25k
        if (current_bucket_size > max_bucket_size) {
72
78
            ++used_buckets;
73
78
            current_bucket_size = count;
74
78
        }
75
76
        // If we have used more buckets than num_buckets, we cannot assign the values to buckets.
77
1.25k
        if (used_buckets > num_buckets) {
78
15
            return false;
79
15
        }
80
1.25k
    }
81
82
16
    return true;
83
31
}
_ZN5doris23can_assign_into_bucketsINS_7DecimalIiEEEEbRKSt3mapIT_mSt4lessIS4_ESaISt4pairIKS4_mEEEmm
Line
Count
Source
58
39
                             const size_t num_buckets) {
59
39
    if (value_map.empty()) {
60
0
        return false;
61
39
    };
62
63
39
    size_t used_buckets = 1;
64
39
    size_t current_bucket_size = 0;
65
66
161
    for (const auto& [value, count] : value_map) {
67
161
        current_bucket_size += count;
68
69
        // If adding the current value to the current bucket would exceed max_bucket_size,
70
        // then we start a new bucket.
71
161
        if (current_bucket_size > max_bucket_size) {
72
54
            ++used_buckets;
73
54
            current_bucket_size = count;
74
54
        }
75
76
        // If we have used more buckets than num_buckets, we cannot assign the values to buckets.
77
161
        if (used_buckets > num_buckets) {
78
17
            return false;
79
17
        }
80
161
    }
81
82
22
    return true;
83
39
}
Unexecuted instantiation: _ZN5doris23can_assign_into_bucketsINS_7DecimalIlEEEEbRKSt3mapIT_mSt4lessIS4_ESaISt4pairIKS4_mEEEmm
Unexecuted instantiation: _ZN5doris23can_assign_into_bucketsINS_12Decimal128V3EEEbRKSt3mapIT_mSt4lessIS3_ESaISt4pairIKS3_mEEEmm
Unexecuted instantiation: _ZN5doris23can_assign_into_bucketsINS_7DecimalIN4wide7integerILm256EiEEEEEEbRKSt3mapIT_mSt4lessIS7_ESaISt4pairIKS7_mEEEmm
_ZN5doris23can_assign_into_bucketsINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEEbRKSt3mapIT_mSt4lessIS8_ESaISt4pairIKS8_mEEEmm
Line
Count
Source
58
100
                             const size_t num_buckets) {
59
100
    if (value_map.empty()) {
60
0
        return false;
61
100
    };
62
63
100
    size_t used_buckets = 1;
64
100
    size_t current_bucket_size = 0;
65
66
18.3k
    for (const auto& [value, count] : value_map) {
67
18.3k
        current_bucket_size += count;
68
69
        // If adding the current value to the current bucket would exceed max_bucket_size,
70
        // then we start a new bucket.
71
18.3k
        if (current_bucket_size > max_bucket_size) {
72
197
            ++used_buckets;
73
197
            current_bucket_size = count;
74
197
        }
75
76
        // If we have used more buckets than num_buckets, we cannot assign the values to buckets.
77
18.3k
        if (used_buckets > num_buckets) {
78
38
            return false;
79
38
        }
80
18.3k
    }
81
82
62
    return true;
83
100
}
_ZN5doris23can_assign_into_bucketsINS_11DateV2ValueINS_15DateV2ValueTypeEEEEEbRKSt3mapIT_mSt4lessIS5_ESaISt4pairIKS5_mEEEmm
Line
Count
Source
58
51
                             const size_t num_buckets) {
59
51
    if (value_map.empty()) {
60
0
        return false;
61
51
    };
62
63
51
    size_t used_buckets = 1;
64
51
    size_t current_bucket_size = 0;
65
66
1.31k
    for (const auto& [value, count] : value_map) {
67
1.31k
        current_bucket_size += count;
68
69
        // If adding the current value to the current bucket would exceed max_bucket_size,
70
        // then we start a new bucket.
71
1.31k
        if (current_bucket_size > max_bucket_size) {
72
114
            ++used_buckets;
73
114
            current_bucket_size = count;
74
114
        }
75
76
        // If we have used more buckets than num_buckets, we cannot assign the values to buckets.
77
1.31k
        if (used_buckets > num_buckets) {
78
23
            return false;
79
23
        }
80
1.31k
    }
81
82
28
    return true;
83
51
}
_ZN5doris23can_assign_into_bucketsINS_11DateV2ValueINS_19DateTimeV2ValueTypeEEEEEbRKSt3mapIT_mSt4lessIS5_ESaISt4pairIKS5_mEEEmm
Line
Count
Source
58
51
                             const size_t num_buckets) {
59
51
    if (value_map.empty()) {
60
0
        return false;
61
51
    };
62
63
51
    size_t used_buckets = 1;
64
51
    size_t current_bucket_size = 0;
65
66
1.31k
    for (const auto& [value, count] : value_map) {
67
1.31k
        current_bucket_size += count;
68
69
        // If adding the current value to the current bucket would exceed max_bucket_size,
70
        // then we start a new bucket.
71
1.31k
        if (current_bucket_size > max_bucket_size) {
72
115
            ++used_buckets;
73
115
            current_bucket_size = count;
74
115
        }
75
76
        // If we have used more buckets than num_buckets, we cannot assign the values to buckets.
77
1.31k
        if (used_buckets > num_buckets) {
78
23
            return false;
79
23
        }
80
1.31k
    }
81
82
28
    return true;
83
51
}
84
85
/**
86
 * Calculates the maximum number of values that can fit into each bucket given a set of values
87
 * and the desired number of buckets.
88
 *
89
 * @tparam T the type of the values in the value map
90
 * @param value_map the map of values and their counts
91
 * @param num_buckets the desired number of buckets
92
 * @return the maximum number of values that can fit into each bucket
93
 */
94
template <typename T>
95
647
size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) {
96
    // Ensure that the value map is not empty
97
647
    assert(!value_map.empty());
98
99
    // Calculate the total number of values in the map using std::accumulate()
100
647
    size_t total_values = 0;
101
5.76k
    for (const auto& [value, count] : value_map) {
102
5.76k
        total_values += count;
103
5.76k
    }
104
105
    // If there is only one bucket, then all values will be assigned to that bucket
106
647
    if (num_buckets == 1) {
107
4
        return total_values;
108
4
    }
109
110
    // To calculate the maximum value count in each bucket, we first calculate a conservative upper
111
    // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed
112
    // the actual maximum value count, but it does not underestimate it. The subsequent binary search
113
    // algorithm will approach the actual maximum value count.
114
643
    size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1;
115
116
    // Initialize the lower bound to 0
117
643
    size_t lower_bucket_values = 0;
118
119
    // Perform a binary search to find the maximum number of values that can fit into each bucket
120
643
    int search_step = 0;
121
643
    const int max_search_steps =
122
643
            10; // Limit the number of search steps to avoid excessive iteration
123
124
1.13k
    while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) {
125
        // Calculate the midpoint of the upper and lower bounds
126
491
        const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2;
127
128
        // Check if the given number of values can be assigned to the desired number of buckets
129
491
        if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) {
130
            // If it can, then set the upper bound to the midpoint
131
275
            upper_bucket_values = bucket_values;
132
275
        } else {
133
            // If it can't, then set the lower bound to the midpoint
134
216
            lower_bucket_values = bucket_values;
135
216
        }
136
        // Increment the search step counter
137
491
        ++search_step;
138
491
    }
139
140
643
    return upper_bucket_values;
141
647
}
_ZN5doris27calculate_bucket_max_valuesIhEEmRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEm
Line
Count
Source
95
1
size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) {
96
    // Ensure that the value map is not empty
97
1
    assert(!value_map.empty());
98
99
    // Calculate the total number of values in the map using std::accumulate()
100
1
    size_t total_values = 0;
101
2
    for (const auto& [value, count] : value_map) {
102
2
        total_values += count;
103
2
    }
104
105
    // If there is only one bucket, then all values will be assigned to that bucket
106
1
    if (num_buckets == 1) {
107
0
        return total_values;
108
0
    }
109
110
    // To calculate the maximum value count in each bucket, we first calculate a conservative upper
111
    // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed
112
    // the actual maximum value count, but it does not underestimate it. The subsequent binary search
113
    // algorithm will approach the actual maximum value count.
114
1
    size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1;
115
116
    // Initialize the lower bound to 0
117
1
    size_t lower_bucket_values = 0;
118
119
    // Perform a binary search to find the maximum number of values that can fit into each bucket
120
1
    int search_step = 0;
121
1
    const int max_search_steps =
122
1
            10; // Limit the number of search steps to avoid excessive iteration
123
124
1
    while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) {
125
        // Calculate the midpoint of the upper and lower bounds
126
0
        const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2;
127
128
        // Check if the given number of values can be assigned to the desired number of buckets
129
0
        if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) {
130
            // If it can, then set the upper bound to the midpoint
131
0
            upper_bucket_values = bucket_values;
132
0
        } else {
133
            // If it can't, then set the lower bound to the midpoint
134
0
            lower_bucket_values = bucket_values;
135
0
        }
136
        // Increment the search step counter
137
0
        ++search_step;
138
0
    }
139
140
1
    return upper_bucket_values;
141
1
}
_ZN5doris27calculate_bucket_max_valuesIaEEmRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEm
Line
Count
Source
95
42
size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) {
96
    // Ensure that the value map is not empty
97
42
    assert(!value_map.empty());
98
99
    // Calculate the total number of values in the map using std::accumulate()
100
42
    size_t total_values = 0;
101
340
    for (const auto& [value, count] : value_map) {
102
340
        total_values += count;
103
340
    }
104
105
    // If there is only one bucket, then all values will be assigned to that bucket
106
42
    if (num_buckets == 1) {
107
1
        return total_values;
108
1
    }
109
110
    // To calculate the maximum value count in each bucket, we first calculate a conservative upper
111
    // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed
112
    // the actual maximum value count, but it does not underestimate it. The subsequent binary search
113
    // algorithm will approach the actual maximum value count.
114
41
    size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1;
115
116
    // Initialize the lower bound to 0
117
41
    size_t lower_bucket_values = 0;
118
119
    // Perform a binary search to find the maximum number of values that can fit into each bucket
120
41
    int search_step = 0;
121
41
    const int max_search_steps =
122
41
            10; // Limit the number of search steps to avoid excessive iteration
123
124
68
    while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) {
125
        // Calculate the midpoint of the upper and lower bounds
126
27
        const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2;
127
128
        // Check if the given number of values can be assigned to the desired number of buckets
129
27
        if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) {
130
            // If it can, then set the upper bound to the midpoint
131
16
            upper_bucket_values = bucket_values;
132
16
        } else {
133
            // If it can't, then set the lower bound to the midpoint
134
11
            lower_bucket_values = bucket_values;
135
11
        }
136
        // Increment the search step counter
137
27
        ++search_step;
138
27
    }
139
140
41
    return upper_bucket_values;
141
42
}
_ZN5doris27calculate_bucket_max_valuesIsEEmRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEm
Line
Count
Source
95
43
size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) {
96
    // Ensure that the value map is not empty
97
43
    assert(!value_map.empty());
98
99
    // Calculate the total number of values in the map using std::accumulate()
100
43
    size_t total_values = 0;
101
349
    for (const auto& [value, count] : value_map) {
102
349
        total_values += count;
103
349
    }
104
105
    // If there is only one bucket, then all values will be assigned to that bucket
106
43
    if (num_buckets == 1) {
107
0
        return total_values;
108
0
    }
109
110
    // To calculate the maximum value count in each bucket, we first calculate a conservative upper
111
    // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed
112
    // the actual maximum value count, but it does not underestimate it. The subsequent binary search
113
    // algorithm will approach the actual maximum value count.
114
43
    size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1;
115
116
    // Initialize the lower bound to 0
117
43
    size_t lower_bucket_values = 0;
118
119
    // Perform a binary search to find the maximum number of values that can fit into each bucket
120
43
    int search_step = 0;
121
43
    const int max_search_steps =
122
43
            10; // Limit the number of search steps to avoid excessive iteration
123
124
83
    while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) {
125
        // Calculate the midpoint of the upper and lower bounds
126
40
        const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2;
127
128
        // Check if the given number of values can be assigned to the desired number of buckets
129
40
        if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) {
130
            // If it can, then set the upper bound to the midpoint
131
22
            upper_bucket_values = bucket_values;
132
22
        } else {
133
            // If it can't, then set the lower bound to the midpoint
134
18
            lower_bucket_values = bucket_values;
135
18
        }
136
        // Increment the search step counter
137
40
        ++search_step;
138
40
    }
139
140
43
    return upper_bucket_values;
141
43
}
_ZN5doris27calculate_bucket_max_valuesIiEEmRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEm
Line
Count
Source
95
71
size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) {
96
    // Ensure that the value map is not empty
97
71
    assert(!value_map.empty());
98
99
    // Calculate the total number of values in the map using std::accumulate()
100
71
    size_t total_values = 0;
101
375
    for (const auto& [value, count] : value_map) {
102
375
        total_values += count;
103
375
    }
104
105
    // If there is only one bucket, then all values will be assigned to that bucket
106
71
    if (num_buckets == 1) {
107
3
        return total_values;
108
3
    }
109
110
    // To calculate the maximum value count in each bucket, we first calculate a conservative upper
111
    // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed
112
    // the actual maximum value count, but it does not underestimate it. The subsequent binary search
113
    // algorithm will approach the actual maximum value count.
114
68
    size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1;
115
116
    // Initialize the lower bound to 0
117
68
    size_t lower_bucket_values = 0;
118
119
    // Perform a binary search to find the maximum number of values that can fit into each bucket
120
68
    int search_step = 0;
121
68
    const int max_search_steps =
122
68
            10; // Limit the number of search steps to avoid excessive iteration
123
124
123
    while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) {
125
        // Calculate the midpoint of the upper and lower bounds
126
55
        const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2;
127
128
        // Check if the given number of values can be assigned to the desired number of buckets
129
55
        if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) {
130
            // If it can, then set the upper bound to the midpoint
131
31
            upper_bucket_values = bucket_values;
132
31
        } else {
133
            // If it can't, then set the lower bound to the midpoint
134
24
            lower_bucket_values = bucket_values;
135
24
        }
136
        // Increment the search step counter
137
55
        ++search_step;
138
55
    }
139
140
68
    return upper_bucket_values;
141
71
}
_ZN5doris27calculate_bucket_max_valuesIlEEmRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEm
Line
Count
Source
95
59
size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) {
96
    // Ensure that the value map is not empty
97
59
    assert(!value_map.empty());
98
99
    // Calculate the total number of values in the map using std::accumulate()
100
59
    size_t total_values = 0;
101
418
    for (const auto& [value, count] : value_map) {
102
418
        total_values += count;
103
418
    }
104
105
    // If there is only one bucket, then all values will be assigned to that bucket
106
59
    if (num_buckets == 1) {
107
0
        return total_values;
108
0
    }
109
110
    // To calculate the maximum value count in each bucket, we first calculate a conservative upper
111
    // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed
112
    // the actual maximum value count, but it does not underestimate it. The subsequent binary search
113
    // algorithm will approach the actual maximum value count.
114
59
    size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1;
115
116
    // Initialize the lower bound to 0
117
59
    size_t lower_bucket_values = 0;
118
119
    // Perform a binary search to find the maximum number of values that can fit into each bucket
120
59
    int search_step = 0;
121
59
    const int max_search_steps =
122
59
            10; // Limit the number of search steps to avoid excessive iteration
123
124
93
    while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) {
125
        // Calculate the midpoint of the upper and lower bounds
126
34
        const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2;
127
128
        // Check if the given number of values can be assigned to the desired number of buckets
129
34
        if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) {
130
            // If it can, then set the upper bound to the midpoint
131
17
            upper_bucket_values = bucket_values;
132
17
        } else {
133
            // If it can't, then set the lower bound to the midpoint
134
17
            lower_bucket_values = bucket_values;
135
17
        }
136
        // Increment the search step counter
137
34
        ++search_step;
138
34
    }
139
140
59
    return upper_bucket_values;
141
59
}
_ZN5doris27calculate_bucket_max_valuesInEEmRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEm
Line
Count
Source
95
42
size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) {
96
    // Ensure that the value map is not empty
97
42
    assert(!value_map.empty());
98
99
    // Calculate the total number of values in the map using std::accumulate()
100
42
    size_t total_values = 0;
101
343
    for (const auto& [value, count] : value_map) {
102
343
        total_values += count;
103
343
    }
104
105
    // If there is only one bucket, then all values will be assigned to that bucket
106
42
    if (num_buckets == 1) {
107
0
        return total_values;
108
0
    }
109
110
    // To calculate the maximum value count in each bucket, we first calculate a conservative upper
111
    // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed
112
    // the actual maximum value count, but it does not underestimate it. The subsequent binary search
113
    // algorithm will approach the actual maximum value count.
114
42
    size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1;
115
116
    // Initialize the lower bound to 0
117
42
    size_t lower_bucket_values = 0;
118
119
    // Perform a binary search to find the maximum number of values that can fit into each bucket
120
42
    int search_step = 0;
121
42
    const int max_search_steps =
122
42
            10; // Limit the number of search steps to avoid excessive iteration
123
124
74
    while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) {
125
        // Calculate the midpoint of the upper and lower bounds
126
32
        const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2;
127
128
        // Check if the given number of values can be assigned to the desired number of buckets
129
32
        if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) {
130
            // If it can, then set the upper bound to the midpoint
131
17
            upper_bucket_values = bucket_values;
132
17
        } else {
133
            // If it can't, then set the lower bound to the midpoint
134
15
            lower_bucket_values = bucket_values;
135
15
        }
136
        // Increment the search step counter
137
32
        ++search_step;
138
32
    }
139
140
42
    return upper_bucket_values;
141
42
}
_ZN5doris27calculate_bucket_max_valuesIfEEmRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEm
Line
Count
Source
95
41
size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) {
96
    // Ensure that the value map is not empty
97
41
    assert(!value_map.empty());
98
99
    // Calculate the total number of values in the map using std::accumulate()
100
41
    size_t total_values = 0;
101
328
    for (const auto& [value, count] : value_map) {
102
328
        total_values += count;
103
328
    }
104
105
    // If there is only one bucket, then all values will be assigned to that bucket
106
41
    if (num_buckets == 1) {
107
0
        return total_values;
108
0
    }
109
110
    // To calculate the maximum value count in each bucket, we first calculate a conservative upper
111
    // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed
112
    // the actual maximum value count, but it does not underestimate it. The subsequent binary search
113
    // algorithm will approach the actual maximum value count.
114
41
    size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1;
115
116
    // Initialize the lower bound to 0
117
41
    size_t lower_bucket_values = 0;
118
119
    // Perform a binary search to find the maximum number of values that can fit into each bucket
120
41
    int search_step = 0;
121
41
    const int max_search_steps =
122
41
            10; // Limit the number of search steps to avoid excessive iteration
123
124
72
    while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) {
125
        // Calculate the midpoint of the upper and lower bounds
126
31
        const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2;
127
128
        // Check if the given number of values can be assigned to the desired number of buckets
129
31
        if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) {
130
            // If it can, then set the upper bound to the midpoint
131
16
            upper_bucket_values = bucket_values;
132
16
        } else {
133
            // If it can't, then set the lower bound to the midpoint
134
15
            lower_bucket_values = bucket_values;
135
15
        }
136
        // Increment the search step counter
137
31
        ++search_step;
138
31
    }
139
140
41
    return upper_bucket_values;
141
41
}
_ZN5doris27calculate_bucket_max_valuesIdEEmRKSt3mapIT_mSt4lessIS2_ESaISt4pairIKS2_mEEEm
Line
Count
Source
95
41
size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) {
96
    // Ensure that the value map is not empty
97
41
    assert(!value_map.empty());
98
99
    // Calculate the total number of values in the map using std::accumulate()
100
41
    size_t total_values = 0;
101
328
    for (const auto& [value, count] : value_map) {
102
328
        total_values += count;
103
328
    }
104
105
    // If there is only one bucket, then all values will be assigned to that bucket
106
41
    if (num_buckets == 1) {
107
0
        return total_values;
108
0
    }
109
110
    // To calculate the maximum value count in each bucket, we first calculate a conservative upper
111
    // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed
112
    // the actual maximum value count, but it does not underestimate it. The subsequent binary search
113
    // algorithm will approach the actual maximum value count.
114
41
    size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1;
115
116
    // Initialize the lower bound to 0
117
41
    size_t lower_bucket_values = 0;
118
119
    // Perform a binary search to find the maximum number of values that can fit into each bucket
120
41
    int search_step = 0;
121
41
    const int max_search_steps =
122
41
            10; // Limit the number of search steps to avoid excessive iteration
123
124
72
    while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) {
125
        // Calculate the midpoint of the upper and lower bounds
126
31
        const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2;
127
128
        // Check if the given number of values can be assigned to the desired number of buckets
129
31
        if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) {
130
            // If it can, then set the upper bound to the midpoint
131
16
            upper_bucket_values = bucket_values;
132
16
        } else {
133
            // If it can't, then set the lower bound to the midpoint
134
15
            lower_bucket_values = bucket_values;
135
15
        }
136
        // Increment the search step counter
137
31
        ++search_step;
138
31
    }
139
140
41
    return upper_bucket_values;
141
41
}
_ZN5doris27calculate_bucket_max_valuesINS_7DecimalIiEEEEmRKSt3mapIT_mSt4lessIS4_ESaISt4pairIKS4_mEEEm
Line
Count
Source
95
45
size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) {
96
    // Ensure that the value map is not empty
97
45
    assert(!value_map.empty());
98
99
    // Calculate the total number of values in the map using std::accumulate()
100
45
    size_t total_values = 0;
101
161
    for (const auto& [value, count] : value_map) {
102
161
        total_values += count;
103
161
    }
104
105
    // If there is only one bucket, then all values will be assigned to that bucket
106
45
    if (num_buckets == 1) {
107
0
        return total_values;
108
0
    }
109
110
    // To calculate the maximum value count in each bucket, we first calculate a conservative upper
111
    // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed
112
    // the actual maximum value count, but it does not underestimate it. The subsequent binary search
113
    // algorithm will approach the actual maximum value count.
114
45
    size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1;
115
116
    // Initialize the lower bound to 0
117
45
    size_t lower_bucket_values = 0;
118
119
    // Perform a binary search to find the maximum number of values that can fit into each bucket
120
45
    int search_step = 0;
121
45
    const int max_search_steps =
122
45
            10; // Limit the number of search steps to avoid excessive iteration
123
124
84
    while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) {
125
        // Calculate the midpoint of the upper and lower bounds
126
39
        const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2;
127
128
        // Check if the given number of values can be assigned to the desired number of buckets
129
39
        if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) {
130
            // If it can, then set the upper bound to the midpoint
131
22
            upper_bucket_values = bucket_values;
132
22
        } else {
133
            // If it can't, then set the lower bound to the midpoint
134
17
            lower_bucket_values = bucket_values;
135
17
        }
136
        // Increment the search step counter
137
39
        ++search_step;
138
39
    }
139
140
45
    return upper_bucket_values;
141
45
}
Unexecuted instantiation: _ZN5doris27calculate_bucket_max_valuesINS_7DecimalIlEEEEmRKSt3mapIT_mSt4lessIS4_ESaISt4pairIKS4_mEEEm
Unexecuted instantiation: _ZN5doris27calculate_bucket_max_valuesINS_12Decimal128V3EEEmRKSt3mapIT_mSt4lessIS3_ESaISt4pairIKS3_mEEEm
Unexecuted instantiation: _ZN5doris27calculate_bucket_max_valuesINS_7DecimalIN4wide7integerILm256EiEEEEEEmRKSt3mapIT_mSt4lessIS7_ESaISt4pairIKS7_mEEEm
_ZN5doris27calculate_bucket_max_valuesINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEEmRKSt3mapIT_mSt4lessIS8_ESaISt4pairIKS8_mEEEm
Line
Count
Source
95
100
size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) {
96
    // Ensure that the value map is not empty
97
100
    assert(!value_map.empty());
98
99
    // Calculate the total number of values in the map using std::accumulate()
100
100
    size_t total_values = 0;
101
2.20k
    for (const auto& [value, count] : value_map) {
102
2.20k
        total_values += count;
103
2.20k
    }
104
105
    // If there is only one bucket, then all values will be assigned to that bucket
106
100
    if (num_buckets == 1) {
107
0
        return total_values;
108
0
    }
109
110
    // To calculate the maximum value count in each bucket, we first calculate a conservative upper
111
    // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed
112
    // the actual maximum value count, but it does not underestimate it. The subsequent binary search
113
    // algorithm will approach the actual maximum value count.
114
100
    size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1;
115
116
    // Initialize the lower bound to 0
117
100
    size_t lower_bucket_values = 0;
118
119
    // Perform a binary search to find the maximum number of values that can fit into each bucket
120
100
    int search_step = 0;
121
100
    const int max_search_steps =
122
100
            10; // Limit the number of search steps to avoid excessive iteration
123
124
200
    while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) {
125
        // Calculate the midpoint of the upper and lower bounds
126
100
        const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2;
127
128
        // Check if the given number of values can be assigned to the desired number of buckets
129
100
        if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) {
130
            // If it can, then set the upper bound to the midpoint
131
62
            upper_bucket_values = bucket_values;
132
62
        } else {
133
            // If it can't, then set the lower bound to the midpoint
134
38
            lower_bucket_values = bucket_values;
135
38
        }
136
        // Increment the search step counter
137
100
        ++search_step;
138
100
    }
139
140
100
    return upper_bucket_values;
141
100
}
_ZN5doris27calculate_bucket_max_valuesINS_11DateV2ValueINS_15DateV2ValueTypeEEEEEmRKSt3mapIT_mSt4lessIS5_ESaISt4pairIKS5_mEEEm
Line
Count
Source
95
81
size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) {
96
    // Ensure that the value map is not empty
97
81
    assert(!value_map.empty());
98
99
    // Calculate the total number of values in the map using std::accumulate()
100
81
    size_t total_values = 0;
101
458
    for (const auto& [value, count] : value_map) {
102
458
        total_values += count;
103
458
    }
104
105
    // If there is only one bucket, then all values will be assigned to that bucket
106
81
    if (num_buckets == 1) {
107
0
        return total_values;
108
0
    }
109
110
    // To calculate the maximum value count in each bucket, we first calculate a conservative upper
111
    // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed
112
    // the actual maximum value count, but it does not underestimate it. The subsequent binary search
113
    // algorithm will approach the actual maximum value count.
114
81
    size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1;
115
116
    // Initialize the lower bound to 0
117
81
    size_t lower_bucket_values = 0;
118
119
    // Perform a binary search to find the maximum number of values that can fit into each bucket
120
81
    int search_step = 0;
121
81
    const int max_search_steps =
122
81
            10; // Limit the number of search steps to avoid excessive iteration
123
124
132
    while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) {
125
        // Calculate the midpoint of the upper and lower bounds
126
51
        const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2;
127
128
        // Check if the given number of values can be assigned to the desired number of buckets
129
51
        if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) {
130
            // If it can, then set the upper bound to the midpoint
131
28
            upper_bucket_values = bucket_values;
132
28
        } else {
133
            // If it can't, then set the lower bound to the midpoint
134
23
            lower_bucket_values = bucket_values;
135
23
        }
136
        // Increment the search step counter
137
51
        ++search_step;
138
51
    }
139
140
81
    return upper_bucket_values;
141
81
}
_ZN5doris27calculate_bucket_max_valuesINS_11DateV2ValueINS_19DateTimeV2ValueTypeEEEEEmRKSt3mapIT_mSt4lessIS5_ESaISt4pairIKS5_mEEEm
Line
Count
Source
95
81
size_t calculate_bucket_max_values(const std::map<T, size_t>& value_map, const size_t num_buckets) {
96
    // Ensure that the value map is not empty
97
81
    assert(!value_map.empty());
98
99
    // Calculate the total number of values in the map using std::accumulate()
100
81
    size_t total_values = 0;
101
457
    for (const auto& [value, count] : value_map) {
102
457
        total_values += count;
103
457
    }
104
105
    // If there is only one bucket, then all values will be assigned to that bucket
106
81
    if (num_buckets == 1) {
107
0
        return total_values;
108
0
    }
109
110
    // To calculate the maximum value count in each bucket, we first calculate a conservative upper
111
    // bound, which is equal to 2 * total_values / (max_buckets - 1) + 1. This upper bound may exceed
112
    // the actual maximum value count, but it does not underestimate it. The subsequent binary search
113
    // algorithm will approach the actual maximum value count.
114
81
    size_t upper_bucket_values = 2 * total_values / (num_buckets - 1) + 1;
115
116
    // Initialize the lower bound to 0
117
81
    size_t lower_bucket_values = 0;
118
119
    // Perform a binary search to find the maximum number of values that can fit into each bucket
120
81
    int search_step = 0;
121
81
    const int max_search_steps =
122
81
            10; // Limit the number of search steps to avoid excessive iteration
123
124
132
    while (upper_bucket_values > lower_bucket_values + 1 && search_step < max_search_steps) {
125
        // Calculate the midpoint of the upper and lower bounds
126
51
        const size_t bucket_values = (upper_bucket_values + lower_bucket_values) / 2;
127
128
        // Check if the given number of values can be assigned to the desired number of buckets
129
51
        if (can_assign_into_buckets(value_map, bucket_values, num_buckets)) {
130
            // If it can, then set the upper bound to the midpoint
131
28
            upper_bucket_values = bucket_values;
132
28
        } else {
133
            // If it can't, then set the lower bound to the midpoint
134
23
            lower_bucket_values = bucket_values;
135
23
        }
136
        // Increment the search step counter
137
51
        ++search_step;
138
51
    }
139
140
81
    return upper_bucket_values;
141
81
}
142
143
/**
144
 * Greedy equi-height histogram construction algorithm, inspired by the MySQL
145
 * equi_height implementation(https://dev.mysql.com/doc/dev/mysql-server/latest/equi__height_8h.html).
146
 *
147
 * Given an ordered collection of [value, count] pairs and a maximum bucket
148
 * size, construct a histogram by inserting values into a bucket while keeping
149
 * track of its size. If the insertion of a value into a non-empty bucket
150
 * causes the bucket to exceed the maximum size, create a new empty bucket and
151
 * continue.
152
 *
153
 * The algorithm guarantees a selectivity estimation error of at most ~2 *
154
 * #values / #buckets, often less. Values with a higher relative frequency are
155
 * guaranteed to be placed in singleton buckets.
156
 *
157
 * The minimum composite bucket size is used to minimize the worst case
158
 * selectivity estimation error. In general, the algorithm will adapt to the
159
 * data distribution to minimize the size of composite buckets. The heavy values
160
 * can be placed in singleton buckets and the remaining values will be evenly
161
 * spread across the remaining buckets, leading to a lower composite bucket size.
162
 *
163
 * Note: The term "value" refers to an entry in a column and the actual value
164
 * of an entry. The ordered_map is an ordered collection of [distinct value,
165
 * value count] pairs. For example, a Value_map<String> could contain the pairs ["a", 1], ["b", 2]
166
 * to represent one "a" value and two "b" values.
167
 *
168
 * @param buckets A vector of empty buckets that will be populated with data.
169
 * @param ordered_map An ordered map of distinct values and their counts.
170
 * @param max_num_buckets The maximum number of buckets that can be used.
171
 *
172
 * @return True if the buckets were successfully built, false otherwise.
173
 */
174
template <typename T>
175
bool build_histogram(std::vector<Bucket<T>>& buckets, const std::map<T, size_t>& ordered_map,
176
707
                     const size_t max_num_buckets) {
177
    // If the input map is empty, there is nothing to build.
178
707
    if (ordered_map.empty()) {
179
66
        return false;
180
66
    }
181
182
    // Calculate the maximum number of values that can be assigned to each bucket.
183
641
    auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets);
184
185
    // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional
186
    // allocations when inserting buckets.
187
641
    buckets.clear();
188
641
    buckets.reserve(max_num_buckets);
189
190
    // Initialize bucket variables.
191
641
    size_t distinct_values_count = 0;
192
641
    size_t values_count = 0;
193
641
    size_t cumulative_values = 0;
194
195
    // Record how many values still need to be assigned.
196
641
    auto remaining_distinct_values = ordered_map.size();
197
198
641
    auto it = ordered_map.begin();
199
200
    // Lower value of the current bucket.
201
641
    const T* lower_value = &it->first;
202
203
    // Iterate over the ordered map of distinct values and their counts.
204
6.39k
    for (; it != ordered_map.end(); ++it) {
205
5.74k
        const auto count = it->second;
206
5.74k
        const auto current_value = it->first;
207
208
        // Update the bucket counts and track the number of distinct values assigned.
209
5.74k
        distinct_values_count++;
210
5.74k
        remaining_distinct_values--;
211
5.74k
        values_count += count;
212
5.74k
        cumulative_values += count;
213
214
        // Check whether the current value should be added to the current bucket.
215
5.74k
        auto next = std::next(it);
216
5.74k
        size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1;
217
218
5.74k
        if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets &&
219
5.74k
            values_count + next->second <= bucket_max_values) {
220
            // If the current value is the last in the input map and there are more remaining
221
            // distinct values than empty buckets and adding the value does not cause the bucket
222
            // to exceed its max size, skip adding the value to the current bucket.
223
3.89k
            continue;
224
3.89k
        }
225
226
        // Finalize the current bucket and add it to our collection of buckets.
227
1.85k
        auto pre_sum = cumulative_values - values_count;
228
229
1.85k
        Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count,
230
1.85k
                             pre_sum);
231
1.85k
        buckets.push_back(new_bucket);
232
233
        // Reset variables for the next bucket.
234
1.85k
        if (next != ordered_map.end()) {
235
1.21k
            lower_value = &next->first;
236
1.21k
        }
237
1.85k
        values_count = 0;
238
1.85k
        distinct_values_count = 0;
239
1.85k
    }
240
241
641
    return true;
242
707
}
_ZN5doris15build_histogramIhEEbRSt6vectorINS_6BucketIT_EESaIS4_EERKSt3mapIS3_mSt4lessIS3_ESaISt4pairIKS3_mEEEm
Line
Count
Source
176
2
                     const size_t max_num_buckets) {
177
    // If the input map is empty, there is nothing to build.
178
2
    if (ordered_map.empty()) {
179
1
        return false;
180
1
    }
181
182
    // Calculate the maximum number of values that can be assigned to each bucket.
183
1
    auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets);
184
185
    // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional
186
    // allocations when inserting buckets.
187
1
    buckets.clear();
188
1
    buckets.reserve(max_num_buckets);
189
190
    // Initialize bucket variables.
191
1
    size_t distinct_values_count = 0;
192
1
    size_t values_count = 0;
193
1
    size_t cumulative_values = 0;
194
195
    // Record how many values still need to be assigned.
196
1
    auto remaining_distinct_values = ordered_map.size();
197
198
1
    auto it = ordered_map.begin();
199
200
    // Lower value of the current bucket.
201
1
    const T* lower_value = &it->first;
202
203
    // Iterate over the ordered map of distinct values and their counts.
204
3
    for (; it != ordered_map.end(); ++it) {
205
2
        const auto count = it->second;
206
2
        const auto current_value = it->first;
207
208
        // Update the bucket counts and track the number of distinct values assigned.
209
2
        distinct_values_count++;
210
2
        remaining_distinct_values--;
211
2
        values_count += count;
212
2
        cumulative_values += count;
213
214
        // Check whether the current value should be added to the current bucket.
215
2
        auto next = std::next(it);
216
2
        size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1;
217
218
2
        if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets &&
219
2
            values_count + next->second <= bucket_max_values) {
220
            // If the current value is the last in the input map and there are more remaining
221
            // distinct values than empty buckets and adding the value does not cause the bucket
222
            // to exceed its max size, skip adding the value to the current bucket.
223
0
            continue;
224
0
        }
225
226
        // Finalize the current bucket and add it to our collection of buckets.
227
2
        auto pre_sum = cumulative_values - values_count;
228
229
2
        Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count,
230
2
                             pre_sum);
231
2
        buckets.push_back(new_bucket);
232
233
        // Reset variables for the next bucket.
234
2
        if (next != ordered_map.end()) {
235
1
            lower_value = &next->first;
236
1
        }
237
2
        values_count = 0;
238
2
        distinct_values_count = 0;
239
2
    }
240
241
1
    return true;
242
2
}
_ZN5doris15build_histogramIaEEbRSt6vectorINS_6BucketIT_EESaIS4_EERKSt3mapIS3_mSt4lessIS3_ESaISt4pairIKS3_mEEEm
Line
Count
Source
176
48
                     const size_t max_num_buckets) {
177
    // If the input map is empty, there is nothing to build.
178
48
    if (ordered_map.empty()) {
179
6
        return false;
180
6
    }
181
182
    // Calculate the maximum number of values that can be assigned to each bucket.
183
42
    auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets);
184
185
    // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional
186
    // allocations when inserting buckets.
187
42
    buckets.clear();
188
42
    buckets.reserve(max_num_buckets);
189
190
    // Initialize bucket variables.
191
42
    size_t distinct_values_count = 0;
192
42
    size_t values_count = 0;
193
42
    size_t cumulative_values = 0;
194
195
    // Record how many values still need to be assigned.
196
42
    auto remaining_distinct_values = ordered_map.size();
197
198
42
    auto it = ordered_map.begin();
199
200
    // Lower value of the current bucket.
201
42
    const T* lower_value = &it->first;
202
203
    // Iterate over the ordered map of distinct values and their counts.
204
382
    for (; it != ordered_map.end(); ++it) {
205
340
        const auto count = it->second;
206
340
        const auto current_value = it->first;
207
208
        // Update the bucket counts and track the number of distinct values assigned.
209
340
        distinct_values_count++;
210
340
        remaining_distinct_values--;
211
340
        values_count += count;
212
340
        cumulative_values += count;
213
214
        // Check whether the current value should be added to the current bucket.
215
340
        auto next = std::next(it);
216
340
        size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1;
217
218
340
        if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets &&
219
340
            values_count + next->second <= bucket_max_values) {
220
            // If the current value is the last in the input map and there are more remaining
221
            // distinct values than empty buckets and adding the value does not cause the bucket
222
            // to exceed its max size, skip adding the value to the current bucket.
223
211
            continue;
224
211
        }
225
226
        // Finalize the current bucket and add it to our collection of buckets.
227
129
        auto pre_sum = cumulative_values - values_count;
228
229
129
        Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count,
230
129
                             pre_sum);
231
129
        buckets.push_back(new_bucket);
232
233
        // Reset variables for the next bucket.
234
129
        if (next != ordered_map.end()) {
235
87
            lower_value = &next->first;
236
87
        }
237
129
        values_count = 0;
238
129
        distinct_values_count = 0;
239
129
    }
240
241
42
    return true;
242
48
}
_ZN5doris15build_histogramIsEEbRSt6vectorINS_6BucketIT_EESaIS4_EERKSt3mapIS3_mSt4lessIS3_ESaISt4pairIKS3_mEEEm
Line
Count
Source
176
48
                     const size_t max_num_buckets) {
177
    // If the input map is empty, there is nothing to build.
178
48
    if (ordered_map.empty()) {
179
5
        return false;
180
5
    }
181
182
    // Calculate the maximum number of values that can be assigned to each bucket.
183
43
    auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets);
184
185
    // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional
186
    // allocations when inserting buckets.
187
43
    buckets.clear();
188
43
    buckets.reserve(max_num_buckets);
189
190
    // Initialize bucket variables.
191
43
    size_t distinct_values_count = 0;
192
43
    size_t values_count = 0;
193
43
    size_t cumulative_values = 0;
194
195
    // Record how many values still need to be assigned.
196
43
    auto remaining_distinct_values = ordered_map.size();
197
198
43
    auto it = ordered_map.begin();
199
200
    // Lower value of the current bucket.
201
43
    const T* lower_value = &it->first;
202
203
    // Iterate over the ordered map of distinct values and their counts.
204
392
    for (; it != ordered_map.end(); ++it) {
205
349
        const auto count = it->second;
206
349
        const auto current_value = it->first;
207
208
        // Update the bucket counts and track the number of distinct values assigned.
209
349
        distinct_values_count++;
210
349
        remaining_distinct_values--;
211
349
        values_count += count;
212
349
        cumulative_values += count;
213
214
        // Check whether the current value should be added to the current bucket.
215
349
        auto next = std::next(it);
216
349
        size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1;
217
218
349
        if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets &&
219
349
            values_count + next->second <= bucket_max_values) {
220
            // If the current value is the last in the input map and there are more remaining
221
            // distinct values than empty buckets and adding the value does not cause the bucket
222
            // to exceed its max size, skip adding the value to the current bucket.
223
217
            continue;
224
217
        }
225
226
        // Finalize the current bucket and add it to our collection of buckets.
227
132
        auto pre_sum = cumulative_values - values_count;
228
229
132
        Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count,
230
132
                             pre_sum);
231
132
        buckets.push_back(new_bucket);
232
233
        // Reset variables for the next bucket.
234
132
        if (next != ordered_map.end()) {
235
89
            lower_value = &next->first;
236
89
        }
237
132
        values_count = 0;
238
132
        distinct_values_count = 0;
239
132
    }
240
241
43
    return true;
242
48
}
_ZN5doris15build_histogramIiEEbRSt6vectorINS_6BucketIT_EESaIS4_EERKSt3mapIS3_mSt4lessIS3_ESaISt4pairIKS3_mEEEm
Line
Count
Source
176
74
                     const size_t max_num_buckets) {
177
    // If the input map is empty, there is nothing to build.
178
74
    if (ordered_map.empty()) {
179
9
        return false;
180
9
    }
181
182
    // Calculate the maximum number of values that can be assigned to each bucket.
183
65
    auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets);
184
185
    // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional
186
    // allocations when inserting buckets.
187
65
    buckets.clear();
188
65
    buckets.reserve(max_num_buckets);
189
190
    // Initialize bucket variables.
191
65
    size_t distinct_values_count = 0;
192
65
    size_t values_count = 0;
193
65
    size_t cumulative_values = 0;
194
195
    // Record how many values still need to be assigned.
196
65
    auto remaining_distinct_values = ordered_map.size();
197
198
65
    auto it = ordered_map.begin();
199
200
    // Lower value of the current bucket.
201
65
    const T* lower_value = &it->first;
202
203
    // Iterate over the ordered map of distinct values and their counts.
204
428
    for (; it != ordered_map.end(); ++it) {
205
363
        const auto count = it->second;
206
363
        const auto current_value = it->first;
207
208
        // Update the bucket counts and track the number of distinct values assigned.
209
363
        distinct_values_count++;
210
363
        remaining_distinct_values--;
211
363
        values_count += count;
212
363
        cumulative_values += count;
213
214
        // Check whether the current value should be added to the current bucket.
215
363
        auto next = std::next(it);
216
363
        size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1;
217
218
363
        if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets &&
219
363
            values_count + next->second <= bucket_max_values) {
220
            // If the current value is the last in the input map and there are more remaining
221
            // distinct values than empty buckets and adding the value does not cause the bucket
222
            // to exceed its max size, skip adding the value to the current bucket.
223
202
            continue;
224
202
        }
225
226
        // Finalize the current bucket and add it to our collection of buckets.
227
161
        auto pre_sum = cumulative_values - values_count;
228
229
161
        Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count,
230
161
                             pre_sum);
231
161
        buckets.push_back(new_bucket);
232
233
        // Reset variables for the next bucket.
234
161
        if (next != ordered_map.end()) {
235
96
            lower_value = &next->first;
236
96
        }
237
161
        values_count = 0;
238
161
        distinct_values_count = 0;
239
161
    }
240
241
65
    return true;
242
74
}
_ZN5doris15build_histogramIlEEbRSt6vectorINS_6BucketIT_EESaIS4_EERKSt3mapIS3_mSt4lessIS3_ESaISt4pairIKS3_mEEEm
Line
Count
Source
176
65
                     const size_t max_num_buckets) {
177
    // If the input map is empty, there is nothing to build.
178
65
    if (ordered_map.empty()) {
179
6
        return false;
180
6
    }
181
182
    // Calculate the maximum number of values that can be assigned to each bucket.
183
59
    auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets);
184
185
    // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional
186
    // allocations when inserting buckets.
187
59
    buckets.clear();
188
59
    buckets.reserve(max_num_buckets);
189
190
    // Initialize bucket variables.
191
59
    size_t distinct_values_count = 0;
192
59
    size_t values_count = 0;
193
59
    size_t cumulative_values = 0;
194
195
    // Record how many values still need to be assigned.
196
59
    auto remaining_distinct_values = ordered_map.size();
197
198
59
    auto it = ordered_map.begin();
199
200
    // Lower value of the current bucket.
201
59
    const T* lower_value = &it->first;
202
203
    // Iterate over the ordered map of distinct values and their counts.
204
477
    for (; it != ordered_map.end(); ++it) {
205
418
        const auto count = it->second;
206
418
        const auto current_value = it->first;
207
208
        // Update the bucket counts and track the number of distinct values assigned.
209
418
        distinct_values_count++;
210
418
        remaining_distinct_values--;
211
418
        values_count += count;
212
418
        cumulative_values += count;
213
214
        // Check whether the current value should be added to the current bucket.
215
418
        auto next = std::next(it);
216
418
        size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1;
217
218
418
        if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets &&
219
418
            values_count + next->second <= bucket_max_values) {
220
            // If the current value is the last in the input map and there are more remaining
221
            // distinct values than empty buckets and adding the value does not cause the bucket
222
            // to exceed its max size, skip adding the value to the current bucket.
223
206
            continue;
224
206
        }
225
226
        // Finalize the current bucket and add it to our collection of buckets.
227
212
        auto pre_sum = cumulative_values - values_count;
228
229
212
        Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count,
230
212
                             pre_sum);
231
212
        buckets.push_back(new_bucket);
232
233
        // Reset variables for the next bucket.
234
212
        if (next != ordered_map.end()) {
235
153
            lower_value = &next->first;
236
153
        }
237
212
        values_count = 0;
238
212
        distinct_values_count = 0;
239
212
    }
240
241
59
    return true;
242
65
}
_ZN5doris15build_histogramInEEbRSt6vectorINS_6BucketIT_EESaIS4_EERKSt3mapIS3_mSt4lessIS3_ESaISt4pairIKS3_mEEEm
Line
Count
Source
176
48
                     const size_t max_num_buckets) {
177
    // If the input map is empty, there is nothing to build.
178
48
    if (ordered_map.empty()) {
179
6
        return false;
180
6
    }
181
182
    // Calculate the maximum number of values that can be assigned to each bucket.
183
42
    auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets);
184
185
    // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional
186
    // allocations when inserting buckets.
187
42
    buckets.clear();
188
42
    buckets.reserve(max_num_buckets);
189
190
    // Initialize bucket variables.
191
42
    size_t distinct_values_count = 0;
192
42
    size_t values_count = 0;
193
42
    size_t cumulative_values = 0;
194
195
    // Record how many values still need to be assigned.
196
42
    auto remaining_distinct_values = ordered_map.size();
197
198
42
    auto it = ordered_map.begin();
199
200
    // Lower value of the current bucket.
201
42
    const T* lower_value = &it->first;
202
203
    // Iterate over the ordered map of distinct values and their counts.
204
385
    for (; it != ordered_map.end(); ++it) {
205
343
        const auto count = it->second;
206
343
        const auto current_value = it->first;
207
208
        // Update the bucket counts and track the number of distinct values assigned.
209
343
        distinct_values_count++;
210
343
        remaining_distinct_values--;
211
343
        values_count += count;
212
343
        cumulative_values += count;
213
214
        // Check whether the current value should be added to the current bucket.
215
343
        auto next = std::next(it);
216
343
        size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1;
217
218
343
        if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets &&
219
343
            values_count + next->second <= bucket_max_values) {
220
            // If the current value is the last in the input map and there are more remaining
221
            // distinct values than empty buckets and adding the value does not cause the bucket
222
            // to exceed its max size, skip adding the value to the current bucket.
223
202
            continue;
224
202
        }
225
226
        // Finalize the current bucket and add it to our collection of buckets.
227
141
        auto pre_sum = cumulative_values - values_count;
228
229
141
        Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count,
230
141
                             pre_sum);
231
141
        buckets.push_back(new_bucket);
232
233
        // Reset variables for the next bucket.
234
141
        if (next != ordered_map.end()) {
235
99
            lower_value = &next->first;
236
99
        }
237
141
        values_count = 0;
238
141
        distinct_values_count = 0;
239
141
    }
240
241
42
    return true;
242
48
}
_ZN5doris15build_histogramIfEEbRSt6vectorINS_6BucketIT_EESaIS4_EERKSt3mapIS3_mSt4lessIS3_ESaISt4pairIKS3_mEEEm
Line
Count
Source
176
46
                     const size_t max_num_buckets) {
177
    // If the input map is empty, there is nothing to build.
178
46
    if (ordered_map.empty()) {
179
5
        return false;
180
5
    }
181
182
    // Calculate the maximum number of values that can be assigned to each bucket.
183
41
    auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets);
184
185
    // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional
186
    // allocations when inserting buckets.
187
41
    buckets.clear();
188
41
    buckets.reserve(max_num_buckets);
189
190
    // Initialize bucket variables.
191
41
    size_t distinct_values_count = 0;
192
41
    size_t values_count = 0;
193
41
    size_t cumulative_values = 0;
194
195
    // Record how many values still need to be assigned.
196
41
    auto remaining_distinct_values = ordered_map.size();
197
198
41
    auto it = ordered_map.begin();
199
200
    // Lower value of the current bucket.
201
41
    const T* lower_value = &it->first;
202
203
    // Iterate over the ordered map of distinct values and their counts.
204
369
    for (; it != ordered_map.end(); ++it) {
205
328
        const auto count = it->second;
206
328
        const auto current_value = it->first;
207
208
        // Update the bucket counts and track the number of distinct values assigned.
209
328
        distinct_values_count++;
210
328
        remaining_distinct_values--;
211
328
        values_count += count;
212
328
        cumulative_values += count;
213
214
        // Check whether the current value should be added to the current bucket.
215
328
        auto next = std::next(it);
216
328
        size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1;
217
218
328
        if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets &&
219
328
            values_count + next->second <= bucket_max_values) {
220
            // If the current value is the last in the input map and there are more remaining
221
            // distinct values than empty buckets and adding the value does not cause the bucket
222
            // to exceed its max size, skip adding the value to the current bucket.
223
200
            continue;
224
200
        }
225
226
        // Finalize the current bucket and add it to our collection of buckets.
227
128
        auto pre_sum = cumulative_values - values_count;
228
229
128
        Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count,
230
128
                             pre_sum);
231
128
        buckets.push_back(new_bucket);
232
233
        // Reset variables for the next bucket.
234
128
        if (next != ordered_map.end()) {
235
87
            lower_value = &next->first;
236
87
        }
237
128
        values_count = 0;
238
128
        distinct_values_count = 0;
239
128
    }
240
241
41
    return true;
242
46
}
_ZN5doris15build_histogramIdEEbRSt6vectorINS_6BucketIT_EESaIS4_EERKSt3mapIS3_mSt4lessIS3_ESaISt4pairIKS3_mEEEm
Line
Count
Source
176
46
                     const size_t max_num_buckets) {
177
    // If the input map is empty, there is nothing to build.
178
46
    if (ordered_map.empty()) {
179
5
        return false;
180
5
    }
181
182
    // Calculate the maximum number of values that can be assigned to each bucket.
183
41
    auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets);
184
185
    // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional
186
    // allocations when inserting buckets.
187
41
    buckets.clear();
188
41
    buckets.reserve(max_num_buckets);
189
190
    // Initialize bucket variables.
191
41
    size_t distinct_values_count = 0;
192
41
    size_t values_count = 0;
193
41
    size_t cumulative_values = 0;
194
195
    // Record how many values still need to be assigned.
196
41
    auto remaining_distinct_values = ordered_map.size();
197
198
41
    auto it = ordered_map.begin();
199
200
    // Lower value of the current bucket.
201
41
    const T* lower_value = &it->first;
202
203
    // Iterate over the ordered map of distinct values and their counts.
204
369
    for (; it != ordered_map.end(); ++it) {
205
328
        const auto count = it->second;
206
328
        const auto current_value = it->first;
207
208
        // Update the bucket counts and track the number of distinct values assigned.
209
328
        distinct_values_count++;
210
328
        remaining_distinct_values--;
211
328
        values_count += count;
212
328
        cumulative_values += count;
213
214
        // Check whether the current value should be added to the current bucket.
215
328
        auto next = std::next(it);
216
328
        size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1;
217
218
328
        if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets &&
219
328
            values_count + next->second <= bucket_max_values) {
220
            // If the current value is the last in the input map and there are more remaining
221
            // distinct values than empty buckets and adding the value does not cause the bucket
222
            // to exceed its max size, skip adding the value to the current bucket.
223
200
            continue;
224
200
        }
225
226
        // Finalize the current bucket and add it to our collection of buckets.
227
128
        auto pre_sum = cumulative_values - values_count;
228
229
128
        Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count,
230
128
                             pre_sum);
231
128
        buckets.push_back(new_bucket);
232
233
        // Reset variables for the next bucket.
234
128
        if (next != ordered_map.end()) {
235
87
            lower_value = &next->first;
236
87
        }
237
128
        values_count = 0;
238
128
        distinct_values_count = 0;
239
128
    }
240
241
41
    return true;
242
46
}
_ZN5doris15build_histogramINS_7DecimalIiEEEEbRSt6vectorINS_6BucketIT_EESaIS6_EERKSt3mapIS5_mSt4lessIS5_ESaISt4pairIKS5_mEEEm
Line
Count
Source
176
49
                     const size_t max_num_buckets) {
177
    // If the input map is empty, there is nothing to build.
178
49
    if (ordered_map.empty()) {
179
4
        return false;
180
4
    }
181
182
    // Calculate the maximum number of values that can be assigned to each bucket.
183
45
    auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets);
184
185
    // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional
186
    // allocations when inserting buckets.
187
45
    buckets.clear();
188
45
    buckets.reserve(max_num_buckets);
189
190
    // Initialize bucket variables.
191
45
    size_t distinct_values_count = 0;
192
45
    size_t values_count = 0;
193
45
    size_t cumulative_values = 0;
194
195
    // Record how many values still need to be assigned.
196
45
    auto remaining_distinct_values = ordered_map.size();
197
198
45
    auto it = ordered_map.begin();
199
200
    // Lower value of the current bucket.
201
45
    const T* lower_value = &it->first;
202
203
    // Iterate over the ordered map of distinct values and their counts.
204
206
    for (; it != ordered_map.end(); ++it) {
205
161
        const auto count = it->second;
206
161
        const auto current_value = it->first;
207
208
        // Update the bucket counts and track the number of distinct values assigned.
209
161
        distinct_values_count++;
210
161
        remaining_distinct_values--;
211
161
        values_count += count;
212
161
        cumulative_values += count;
213
214
        // Check whether the current value should be added to the current bucket.
215
161
        auto next = std::next(it);
216
161
        size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1;
217
218
161
        if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets &&
219
161
            values_count + next->second <= bucket_max_values) {
220
            // If the current value is the last in the input map and there are more remaining
221
            // distinct values than empty buckets and adding the value does not cause the bucket
222
            // to exceed its max size, skip adding the value to the current bucket.
223
28
            continue;
224
28
        }
225
226
        // Finalize the current bucket and add it to our collection of buckets.
227
133
        auto pre_sum = cumulative_values - values_count;
228
229
133
        Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count,
230
133
                             pre_sum);
231
133
        buckets.push_back(new_bucket);
232
233
        // Reset variables for the next bucket.
234
133
        if (next != ordered_map.end()) {
235
88
            lower_value = &next->first;
236
88
        }
237
133
        values_count = 0;
238
133
        distinct_values_count = 0;
239
133
    }
240
241
45
    return true;
242
49
}
Unexecuted instantiation: _ZN5doris15build_histogramINS_7DecimalIlEEEEbRSt6vectorINS_6BucketIT_EESaIS6_EERKSt3mapIS5_mSt4lessIS5_ESaISt4pairIKS5_mEEEm
Unexecuted instantiation: _ZN5doris15build_histogramINS_12Decimal128V3EEEbRSt6vectorINS_6BucketIT_EESaIS5_EERKSt3mapIS4_mSt4lessIS4_ESaISt4pairIKS4_mEEEm
Unexecuted instantiation: _ZN5doris15build_histogramINS_7DecimalIN4wide7integerILm256EiEEEEEEbRSt6vectorINS_6BucketIT_EESaIS9_EERKSt3mapIS8_mSt4lessIS8_ESaISt4pairIKS8_mEEEm
_ZN5doris15build_histogramINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEEbRSt6vectorINS_6BucketIT_EESaISA_EERKSt3mapIS9_mSt4lessIS9_ESaISt4pairIKS9_mEEEm
Line
Count
Source
176
105
                     const size_t max_num_buckets) {
177
    // If the input map is empty, there is nothing to build.
178
105
    if (ordered_map.empty()) {
179
5
        return false;
180
5
    }
181
182
    // Calculate the maximum number of values that can be assigned to each bucket.
183
100
    auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets);
184
185
    // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional
186
    // allocations when inserting buckets.
187
100
    buckets.clear();
188
100
    buckets.reserve(max_num_buckets);
189
190
    // Initialize bucket variables.
191
100
    size_t distinct_values_count = 0;
192
100
    size_t values_count = 0;
193
100
    size_t cumulative_values = 0;
194
195
    // Record how many values still need to be assigned.
196
100
    auto remaining_distinct_values = ordered_map.size();
197
198
100
    auto it = ordered_map.begin();
199
200
    // Lower value of the current bucket.
201
100
    const T* lower_value = &it->first;
202
203
    // Iterate over the ordered map of distinct values and their counts.
204
2.30k
    for (; it != ordered_map.end(); ++it) {
205
2.20k
        const auto count = it->second;
206
2.20k
        const auto current_value = it->first;
207
208
        // Update the bucket counts and track the number of distinct values assigned.
209
2.20k
        distinct_values_count++;
210
2.20k
        remaining_distinct_values--;
211
2.20k
        values_count += count;
212
2.20k
        cumulative_values += count;
213
214
        // Check whether the current value should be added to the current bucket.
215
2.20k
        auto next = std::next(it);
216
2.20k
        size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1;
217
218
2.20k
        if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets &&
219
2.20k
            values_count + next->second <= bucket_max_values) {
220
            // If the current value is the last in the input map and there are more remaining
221
            // distinct values than empty buckets and adding the value does not cause the bucket
222
            // to exceed its max size, skip adding the value to the current bucket.
223
2.02k
            continue;
224
2.02k
        }
225
226
        // Finalize the current bucket and add it to our collection of buckets.
227
178
        auto pre_sum = cumulative_values - values_count;
228
229
178
        Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count,
230
178
                             pre_sum);
231
178
        buckets.push_back(new_bucket);
232
233
        // Reset variables for the next bucket.
234
178
        if (next != ordered_map.end()) {
235
78
            lower_value = &next->first;
236
78
        }
237
178
        values_count = 0;
238
178
        distinct_values_count = 0;
239
178
    }
240
241
100
    return true;
242
105
}
_ZN5doris15build_histogramINS_11DateV2ValueINS_15DateV2ValueTypeEEEEEbRSt6vectorINS_6BucketIT_EESaIS7_EERKSt3mapIS6_mSt4lessIS6_ESaISt4pairIKS6_mEEEm
Line
Count
Source
176
88
                     const size_t max_num_buckets) {
177
    // If the input map is empty, there is nothing to build.
178
88
    if (ordered_map.empty()) {
179
7
        return false;
180
7
    }
181
182
    // Calculate the maximum number of values that can be assigned to each bucket.
183
81
    auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets);
184
185
    // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional
186
    // allocations when inserting buckets.
187
81
    buckets.clear();
188
81
    buckets.reserve(max_num_buckets);
189
190
    // Initialize bucket variables.
191
81
    size_t distinct_values_count = 0;
192
81
    size_t values_count = 0;
193
81
    size_t cumulative_values = 0;
194
195
    // Record how many values still need to be assigned.
196
81
    auto remaining_distinct_values = ordered_map.size();
197
198
81
    auto it = ordered_map.begin();
199
200
    // Lower value of the current bucket.
201
81
    const T* lower_value = &it->first;
202
203
    // Iterate over the ordered map of distinct values and their counts.
204
539
    for (; it != ordered_map.end(); ++it) {
205
458
        const auto count = it->second;
206
458
        const auto current_value = it->first;
207
208
        // Update the bucket counts and track the number of distinct values assigned.
209
458
        distinct_values_count++;
210
458
        remaining_distinct_values--;
211
458
        values_count += count;
212
458
        cumulative_values += count;
213
214
        // Check whether the current value should be added to the current bucket.
215
458
        auto next = std::next(it);
216
458
        size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1;
217
218
458
        if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets &&
219
458
            values_count + next->second <= bucket_max_values) {
220
            // If the current value is the last in the input map and there are more remaining
221
            // distinct values than empty buckets and adding the value does not cause the bucket
222
            // to exceed its max size, skip adding the value to the current bucket.
223
202
            continue;
224
202
        }
225
226
        // Finalize the current bucket and add it to our collection of buckets.
227
256
        auto pre_sum = cumulative_values - values_count;
228
229
256
        Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count,
230
256
                             pre_sum);
231
256
        buckets.push_back(new_bucket);
232
233
        // Reset variables for the next bucket.
234
256
        if (next != ordered_map.end()) {
235
175
            lower_value = &next->first;
236
175
        }
237
256
        values_count = 0;
238
256
        distinct_values_count = 0;
239
256
    }
240
241
81
    return true;
242
88
}
_ZN5doris15build_histogramINS_11DateV2ValueINS_19DateTimeV2ValueTypeEEEEEbRSt6vectorINS_6BucketIT_EESaIS7_EERKSt3mapIS6_mSt4lessIS6_ESaISt4pairIKS6_mEEEm
Line
Count
Source
176
88
                     const size_t max_num_buckets) {
177
    // If the input map is empty, there is nothing to build.
178
88
    if (ordered_map.empty()) {
179
7
        return false;
180
7
    }
181
182
    // Calculate the maximum number of values that can be assigned to each bucket.
183
81
    auto bucket_max_values = calculate_bucket_max_values(ordered_map, max_num_buckets);
184
185
    // Ensure that the capacity is at least max_num_buckets in order to avoid the overhead of additional
186
    // allocations when inserting buckets.
187
81
    buckets.clear();
188
81
    buckets.reserve(max_num_buckets);
189
190
    // Initialize bucket variables.
191
81
    size_t distinct_values_count = 0;
192
81
    size_t values_count = 0;
193
81
    size_t cumulative_values = 0;
194
195
    // Record how many values still need to be assigned.
196
81
    auto remaining_distinct_values = ordered_map.size();
197
198
81
    auto it = ordered_map.begin();
199
200
    // Lower value of the current bucket.
201
81
    const T* lower_value = &it->first;
202
203
    // Iterate over the ordered map of distinct values and their counts.
204
538
    for (; it != ordered_map.end(); ++it) {
205
457
        const auto count = it->second;
206
457
        const auto current_value = it->first;
207
208
        // Update the bucket counts and track the number of distinct values assigned.
209
457
        distinct_values_count++;
210
457
        remaining_distinct_values--;
211
457
        values_count += count;
212
457
        cumulative_values += count;
213
214
        // Check whether the current value should be added to the current bucket.
215
457
        auto next = std::next(it);
216
457
        size_t remaining_empty_buckets = max_num_buckets - buckets.size() - 1;
217
218
457
        if (next != ordered_map.end() && remaining_distinct_values > remaining_empty_buckets &&
219
457
            values_count + next->second <= bucket_max_values) {
220
            // If the current value is the last in the input map and there are more remaining
221
            // distinct values than empty buckets and adding the value does not cause the bucket
222
            // to exceed its max size, skip adding the value to the current bucket.
223
202
            continue;
224
202
        }
225
226
        // Finalize the current bucket and add it to our collection of buckets.
227
255
        auto pre_sum = cumulative_values - values_count;
228
229
255
        Bucket<T> new_bucket(*lower_value, current_value, distinct_values_count, values_count,
230
255
                             pre_sum);
231
255
        buckets.push_back(new_bucket);
232
233
        // Reset variables for the next bucket.
234
255
        if (next != ordered_map.end()) {
235
174
            lower_value = &next->first;
236
174
        }
237
255
        values_count = 0;
238
255
        distinct_values_count = 0;
239
255
    }
240
241
81
    return true;
242
88
}
243
244
template <typename T>
245
bool histogram_to_json(rapidjson::StringBuffer& buffer, const std::vector<Bucket<T>>& buckets,
246
702
                       const DataTypePtr& data_type) {
247
702
    rapidjson::Document doc;
248
702
    doc.SetObject();
249
702
    rapidjson::Document::AllocatorType& allocator = doc.GetAllocator();
250
251
702
    int num_buckets = cast_set<int>(buckets.size());
252
702
    doc.AddMember("num_buckets", num_buckets, allocator);
253
254
702
    rapidjson::Value bucket_arr(rapidjson::kArrayType);
255
702
    bucket_arr.Reserve(num_buckets, allocator);
256
257
702
    std::stringstream ss1;
258
702
    std::stringstream ss2;
259
260
702
    rapidjson::Value lower_val;
261
702
    rapidjson::Value upper_val;
262
263
    // Convert bucket's lower and upper to 2 columns
264
702
    MutableColumnPtr lower_column = data_type->create_column();
265
702
    MutableColumnPtr upper_column = data_type->create_column();
266
1.84k
    for (const auto& bucket : buckets) {
267
        // String type is different, it has to pass in length
268
        // if it is string type , directly use string value
269
1.84k
        if constexpr (!std::is_same_v<T, std::string>) {
270
1.66k
            lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0);
271
1.66k
            upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0);
272
1.66k
        }
273
1.84k
    }
274
702
    size_t row_num = 0;
275
276
702
    auto format_options = DataTypeSerDe::get_default_format_options();
277
702
    auto time_zone = cctz::utc_time_zone();
278
702
    format_options.timezone = &time_zone;
279
280
1.84k
    for (const auto& bucket : buckets) {
281
1.84k
        if constexpr (std::is_same_v<T, std::string>) {
282
178
            lower_val.SetString(bucket.lower.data(),
283
178
                                static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator);
284
178
            upper_val.SetString(bucket.upper.data(),
285
178
                                static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator);
286
1.66k
        } else {
287
1.66k
            std::string lower_str = data_type->to_string(*lower_column, row_num, format_options);
288
1.66k
            std::string upper_str = data_type->to_string(*upper_column, row_num, format_options);
289
1.66k
            ++row_num;
290
1.66k
            lower_val.SetString(lower_str.data(),
291
1.66k
                                static_cast<rapidjson::SizeType>(lower_str.size()), allocator);
292
1.66k
            upper_val.SetString(upper_str.data(),
293
1.66k
                                static_cast<rapidjson::SizeType>(upper_str.size()), allocator);
294
1.66k
        }
295
1.84k
        rapidjson::Value bucket_json(rapidjson::kObjectType);
296
1.84k
        bucket_json.AddMember("lower", lower_val, allocator);
297
1.84k
        bucket_json.AddMember("upper", upper_val, allocator);
298
1.84k
        bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator);
299
1.84k
        bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator);
300
1.84k
        bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator);
301
302
1.84k
        bucket_arr.PushBack(bucket_json, allocator);
303
1.84k
    }
304
305
702
    doc.AddMember("buckets", bucket_arr, allocator);
306
702
    rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
307
702
    doc.Accept(writer);
308
309
702
    return !buckets.empty() && buffer.GetSize() > 0;
310
702
}
_ZN5doris17histogram_to_jsonIhEEbRN9rapidjson19GenericStringBufferINS1_4UTF8IcEENS1_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISB_EERKSt10shared_ptrIKNS_9IDataTypeEE
Line
Count
Source
246
2
                       const DataTypePtr& data_type) {
247
2
    rapidjson::Document doc;
248
2
    doc.SetObject();
249
2
    rapidjson::Document::AllocatorType& allocator = doc.GetAllocator();
250
251
2
    int num_buckets = cast_set<int>(buckets.size());
252
2
    doc.AddMember("num_buckets", num_buckets, allocator);
253
254
2
    rapidjson::Value bucket_arr(rapidjson::kArrayType);
255
2
    bucket_arr.Reserve(num_buckets, allocator);
256
257
2
    std::stringstream ss1;
258
2
    std::stringstream ss2;
259
260
2
    rapidjson::Value lower_val;
261
2
    rapidjson::Value upper_val;
262
263
    // Convert bucket's lower and upper to 2 columns
264
2
    MutableColumnPtr lower_column = data_type->create_column();
265
2
    MutableColumnPtr upper_column = data_type->create_column();
266
2
    for (const auto& bucket : buckets) {
267
        // String type is different, it has to pass in length
268
        // if it is string type , directly use string value
269
2
        if constexpr (!std::is_same_v<T, std::string>) {
270
2
            lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0);
271
2
            upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0);
272
2
        }
273
2
    }
274
2
    size_t row_num = 0;
275
276
2
    auto format_options = DataTypeSerDe::get_default_format_options();
277
2
    auto time_zone = cctz::utc_time_zone();
278
2
    format_options.timezone = &time_zone;
279
280
2
    for (const auto& bucket : buckets) {
281
        if constexpr (std::is_same_v<T, std::string>) {
282
            lower_val.SetString(bucket.lower.data(),
283
                                static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator);
284
            upper_val.SetString(bucket.upper.data(),
285
                                static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator);
286
2
        } else {
287
2
            std::string lower_str = data_type->to_string(*lower_column, row_num, format_options);
288
2
            std::string upper_str = data_type->to_string(*upper_column, row_num, format_options);
289
2
            ++row_num;
290
2
            lower_val.SetString(lower_str.data(),
291
2
                                static_cast<rapidjson::SizeType>(lower_str.size()), allocator);
292
2
            upper_val.SetString(upper_str.data(),
293
2
                                static_cast<rapidjson::SizeType>(upper_str.size()), allocator);
294
2
        }
295
2
        rapidjson::Value bucket_json(rapidjson::kObjectType);
296
2
        bucket_json.AddMember("lower", lower_val, allocator);
297
2
        bucket_json.AddMember("upper", upper_val, allocator);
298
2
        bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator);
299
2
        bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator);
300
2
        bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator);
301
302
2
        bucket_arr.PushBack(bucket_json, allocator);
303
2
    }
304
305
2
    doc.AddMember("buckets", bucket_arr, allocator);
306
2
    rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
307
2
    doc.Accept(writer);
308
309
2
    return !buckets.empty() && buffer.GetSize() > 0;
310
2
}
_ZN5doris17histogram_to_jsonIaEEbRN9rapidjson19GenericStringBufferINS1_4UTF8IcEENS1_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISB_EERKSt10shared_ptrIKNS_9IDataTypeEE
Line
Count
Source
246
48
                       const DataTypePtr& data_type) {
247
48
    rapidjson::Document doc;
248
48
    doc.SetObject();
249
48
    rapidjson::Document::AllocatorType& allocator = doc.GetAllocator();
250
251
48
    int num_buckets = cast_set<int>(buckets.size());
252
48
    doc.AddMember("num_buckets", num_buckets, allocator);
253
254
48
    rapidjson::Value bucket_arr(rapidjson::kArrayType);
255
48
    bucket_arr.Reserve(num_buckets, allocator);
256
257
48
    std::stringstream ss1;
258
48
    std::stringstream ss2;
259
260
48
    rapidjson::Value lower_val;
261
48
    rapidjson::Value upper_val;
262
263
    // Convert bucket's lower and upper to 2 columns
264
48
    MutableColumnPtr lower_column = data_type->create_column();
265
48
    MutableColumnPtr upper_column = data_type->create_column();
266
129
    for (const auto& bucket : buckets) {
267
        // String type is different, it has to pass in length
268
        // if it is string type , directly use string value
269
129
        if constexpr (!std::is_same_v<T, std::string>) {
270
129
            lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0);
271
129
            upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0);
272
129
        }
273
129
    }
274
48
    size_t row_num = 0;
275
276
48
    auto format_options = DataTypeSerDe::get_default_format_options();
277
48
    auto time_zone = cctz::utc_time_zone();
278
48
    format_options.timezone = &time_zone;
279
280
129
    for (const auto& bucket : buckets) {
281
        if constexpr (std::is_same_v<T, std::string>) {
282
            lower_val.SetString(bucket.lower.data(),
283
                                static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator);
284
            upper_val.SetString(bucket.upper.data(),
285
                                static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator);
286
129
        } else {
287
129
            std::string lower_str = data_type->to_string(*lower_column, row_num, format_options);
288
129
            std::string upper_str = data_type->to_string(*upper_column, row_num, format_options);
289
129
            ++row_num;
290
129
            lower_val.SetString(lower_str.data(),
291
129
                                static_cast<rapidjson::SizeType>(lower_str.size()), allocator);
292
129
            upper_val.SetString(upper_str.data(),
293
129
                                static_cast<rapidjson::SizeType>(upper_str.size()), allocator);
294
129
        }
295
129
        rapidjson::Value bucket_json(rapidjson::kObjectType);
296
129
        bucket_json.AddMember("lower", lower_val, allocator);
297
129
        bucket_json.AddMember("upper", upper_val, allocator);
298
129
        bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator);
299
129
        bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator);
300
129
        bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator);
301
302
129
        bucket_arr.PushBack(bucket_json, allocator);
303
129
    }
304
305
48
    doc.AddMember("buckets", bucket_arr, allocator);
306
48
    rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
307
48
    doc.Accept(writer);
308
309
48
    return !buckets.empty() && buffer.GetSize() > 0;
310
48
}
_ZN5doris17histogram_to_jsonIsEEbRN9rapidjson19GenericStringBufferINS1_4UTF8IcEENS1_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISB_EERKSt10shared_ptrIKNS_9IDataTypeEE
Line
Count
Source
246
48
                       const DataTypePtr& data_type) {
247
48
    rapidjson::Document doc;
248
48
    doc.SetObject();
249
48
    rapidjson::Document::AllocatorType& allocator = doc.GetAllocator();
250
251
48
    int num_buckets = cast_set<int>(buckets.size());
252
48
    doc.AddMember("num_buckets", num_buckets, allocator);
253
254
48
    rapidjson::Value bucket_arr(rapidjson::kArrayType);
255
48
    bucket_arr.Reserve(num_buckets, allocator);
256
257
48
    std::stringstream ss1;
258
48
    std::stringstream ss2;
259
260
48
    rapidjson::Value lower_val;
261
48
    rapidjson::Value upper_val;
262
263
    // Convert bucket's lower and upper to 2 columns
264
48
    MutableColumnPtr lower_column = data_type->create_column();
265
48
    MutableColumnPtr upper_column = data_type->create_column();
266
132
    for (const auto& bucket : buckets) {
267
        // String type is different, it has to pass in length
268
        // if it is string type , directly use string value
269
132
        if constexpr (!std::is_same_v<T, std::string>) {
270
132
            lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0);
271
132
            upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0);
272
132
        }
273
132
    }
274
48
    size_t row_num = 0;
275
276
48
    auto format_options = DataTypeSerDe::get_default_format_options();
277
48
    auto time_zone = cctz::utc_time_zone();
278
48
    format_options.timezone = &time_zone;
279
280
132
    for (const auto& bucket : buckets) {
281
        if constexpr (std::is_same_v<T, std::string>) {
282
            lower_val.SetString(bucket.lower.data(),
283
                                static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator);
284
            upper_val.SetString(bucket.upper.data(),
285
                                static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator);
286
132
        } else {
287
132
            std::string lower_str = data_type->to_string(*lower_column, row_num, format_options);
288
132
            std::string upper_str = data_type->to_string(*upper_column, row_num, format_options);
289
132
            ++row_num;
290
132
            lower_val.SetString(lower_str.data(),
291
132
                                static_cast<rapidjson::SizeType>(lower_str.size()), allocator);
292
132
            upper_val.SetString(upper_str.data(),
293
132
                                static_cast<rapidjson::SizeType>(upper_str.size()), allocator);
294
132
        }
295
132
        rapidjson::Value bucket_json(rapidjson::kObjectType);
296
132
        bucket_json.AddMember("lower", lower_val, allocator);
297
132
        bucket_json.AddMember("upper", upper_val, allocator);
298
132
        bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator);
299
132
        bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator);
300
132
        bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator);
301
302
132
        bucket_arr.PushBack(bucket_json, allocator);
303
132
    }
304
305
48
    doc.AddMember("buckets", bucket_arr, allocator);
306
48
    rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
307
48
    doc.Accept(writer);
308
309
48
    return !buckets.empty() && buffer.GetSize() > 0;
310
48
}
_ZN5doris17histogram_to_jsonIiEEbRN9rapidjson19GenericStringBufferINS1_4UTF8IcEENS1_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISB_EERKSt10shared_ptrIKNS_9IDataTypeEE
Line
Count
Source
246
69
                       const DataTypePtr& data_type) {
247
69
    rapidjson::Document doc;
248
69
    doc.SetObject();
249
69
    rapidjson::Document::AllocatorType& allocator = doc.GetAllocator();
250
251
69
    int num_buckets = cast_set<int>(buckets.size());
252
69
    doc.AddMember("num_buckets", num_buckets, allocator);
253
254
69
    rapidjson::Value bucket_arr(rapidjson::kArrayType);
255
69
    bucket_arr.Reserve(num_buckets, allocator);
256
257
69
    std::stringstream ss1;
258
69
    std::stringstream ss2;
259
260
69
    rapidjson::Value lower_val;
261
69
    rapidjson::Value upper_val;
262
263
    // Convert bucket's lower and upper to 2 columns
264
69
    MutableColumnPtr lower_column = data_type->create_column();
265
69
    MutableColumnPtr upper_column = data_type->create_column();
266
149
    for (const auto& bucket : buckets) {
267
        // String type is different, it has to pass in length
268
        // if it is string type , directly use string value
269
149
        if constexpr (!std::is_same_v<T, std::string>) {
270
149
            lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0);
271
149
            upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0);
272
149
        }
273
149
    }
274
69
    size_t row_num = 0;
275
276
69
    auto format_options = DataTypeSerDe::get_default_format_options();
277
69
    auto time_zone = cctz::utc_time_zone();
278
69
    format_options.timezone = &time_zone;
279
280
149
    for (const auto& bucket : buckets) {
281
        if constexpr (std::is_same_v<T, std::string>) {
282
            lower_val.SetString(bucket.lower.data(),
283
                                static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator);
284
            upper_val.SetString(bucket.upper.data(),
285
                                static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator);
286
149
        } else {
287
149
            std::string lower_str = data_type->to_string(*lower_column, row_num, format_options);
288
149
            std::string upper_str = data_type->to_string(*upper_column, row_num, format_options);
289
149
            ++row_num;
290
149
            lower_val.SetString(lower_str.data(),
291
149
                                static_cast<rapidjson::SizeType>(lower_str.size()), allocator);
292
149
            upper_val.SetString(upper_str.data(),
293
149
                                static_cast<rapidjson::SizeType>(upper_str.size()), allocator);
294
149
        }
295
149
        rapidjson::Value bucket_json(rapidjson::kObjectType);
296
149
        bucket_json.AddMember("lower", lower_val, allocator);
297
149
        bucket_json.AddMember("upper", upper_val, allocator);
298
149
        bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator);
299
149
        bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator);
300
149
        bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator);
301
302
149
        bucket_arr.PushBack(bucket_json, allocator);
303
149
    }
304
305
69
    doc.AddMember("buckets", bucket_arr, allocator);
306
69
    rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
307
69
    doc.Accept(writer);
308
309
69
    return !buckets.empty() && buffer.GetSize() > 0;
310
69
}
_ZN5doris17histogram_to_jsonIlEEbRN9rapidjson19GenericStringBufferINS1_4UTF8IcEENS1_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISB_EERKSt10shared_ptrIKNS_9IDataTypeEE
Line
Count
Source
246
65
                       const DataTypePtr& data_type) {
247
65
    rapidjson::Document doc;
248
65
    doc.SetObject();
249
65
    rapidjson::Document::AllocatorType& allocator = doc.GetAllocator();
250
251
65
    int num_buckets = cast_set<int>(buckets.size());
252
65
    doc.AddMember("num_buckets", num_buckets, allocator);
253
254
65
    rapidjson::Value bucket_arr(rapidjson::kArrayType);
255
65
    bucket_arr.Reserve(num_buckets, allocator);
256
257
65
    std::stringstream ss1;
258
65
    std::stringstream ss2;
259
260
65
    rapidjson::Value lower_val;
261
65
    rapidjson::Value upper_val;
262
263
    // Convert bucket's lower and upper to 2 columns
264
65
    MutableColumnPtr lower_column = data_type->create_column();
265
65
    MutableColumnPtr upper_column = data_type->create_column();
266
212
    for (const auto& bucket : buckets) {
267
        // String type is different, it has to pass in length
268
        // if it is string type , directly use string value
269
212
        if constexpr (!std::is_same_v<T, std::string>) {
270
212
            lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0);
271
212
            upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0);
272
212
        }
273
212
    }
274
65
    size_t row_num = 0;
275
276
65
    auto format_options = DataTypeSerDe::get_default_format_options();
277
65
    auto time_zone = cctz::utc_time_zone();
278
65
    format_options.timezone = &time_zone;
279
280
212
    for (const auto& bucket : buckets) {
281
        if constexpr (std::is_same_v<T, std::string>) {
282
            lower_val.SetString(bucket.lower.data(),
283
                                static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator);
284
            upper_val.SetString(bucket.upper.data(),
285
                                static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator);
286
212
        } else {
287
212
            std::string lower_str = data_type->to_string(*lower_column, row_num, format_options);
288
212
            std::string upper_str = data_type->to_string(*upper_column, row_num, format_options);
289
212
            ++row_num;
290
212
            lower_val.SetString(lower_str.data(),
291
212
                                static_cast<rapidjson::SizeType>(lower_str.size()), allocator);
292
212
            upper_val.SetString(upper_str.data(),
293
212
                                static_cast<rapidjson::SizeType>(upper_str.size()), allocator);
294
212
        }
295
212
        rapidjson::Value bucket_json(rapidjson::kObjectType);
296
212
        bucket_json.AddMember("lower", lower_val, allocator);
297
212
        bucket_json.AddMember("upper", upper_val, allocator);
298
212
        bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator);
299
212
        bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator);
300
212
        bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator);
301
302
212
        bucket_arr.PushBack(bucket_json, allocator);
303
212
    }
304
305
65
    doc.AddMember("buckets", bucket_arr, allocator);
306
65
    rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
307
65
    doc.Accept(writer);
308
309
65
    return !buckets.empty() && buffer.GetSize() > 0;
310
65
}
_ZN5doris17histogram_to_jsonInEEbRN9rapidjson19GenericStringBufferINS1_4UTF8IcEENS1_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISB_EERKSt10shared_ptrIKNS_9IDataTypeEE
Line
Count
Source
246
48
                       const DataTypePtr& data_type) {
247
48
    rapidjson::Document doc;
248
48
    doc.SetObject();
249
48
    rapidjson::Document::AllocatorType& allocator = doc.GetAllocator();
250
251
48
    int num_buckets = cast_set<int>(buckets.size());
252
48
    doc.AddMember("num_buckets", num_buckets, allocator);
253
254
48
    rapidjson::Value bucket_arr(rapidjson::kArrayType);
255
48
    bucket_arr.Reserve(num_buckets, allocator);
256
257
48
    std::stringstream ss1;
258
48
    std::stringstream ss2;
259
260
48
    rapidjson::Value lower_val;
261
48
    rapidjson::Value upper_val;
262
263
    // Convert bucket's lower and upper to 2 columns
264
48
    MutableColumnPtr lower_column = data_type->create_column();
265
48
    MutableColumnPtr upper_column = data_type->create_column();
266
141
    for (const auto& bucket : buckets) {
267
        // String type is different, it has to pass in length
268
        // if it is string type , directly use string value
269
141
        if constexpr (!std::is_same_v<T, std::string>) {
270
141
            lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0);
271
141
            upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0);
272
141
        }
273
141
    }
274
48
    size_t row_num = 0;
275
276
48
    auto format_options = DataTypeSerDe::get_default_format_options();
277
48
    auto time_zone = cctz::utc_time_zone();
278
48
    format_options.timezone = &time_zone;
279
280
141
    for (const auto& bucket : buckets) {
281
        if constexpr (std::is_same_v<T, std::string>) {
282
            lower_val.SetString(bucket.lower.data(),
283
                                static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator);
284
            upper_val.SetString(bucket.upper.data(),
285
                                static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator);
286
141
        } else {
287
141
            std::string lower_str = data_type->to_string(*lower_column, row_num, format_options);
288
141
            std::string upper_str = data_type->to_string(*upper_column, row_num, format_options);
289
141
            ++row_num;
290
141
            lower_val.SetString(lower_str.data(),
291
141
                                static_cast<rapidjson::SizeType>(lower_str.size()), allocator);
292
141
            upper_val.SetString(upper_str.data(),
293
141
                                static_cast<rapidjson::SizeType>(upper_str.size()), allocator);
294
141
        }
295
141
        rapidjson::Value bucket_json(rapidjson::kObjectType);
296
141
        bucket_json.AddMember("lower", lower_val, allocator);
297
141
        bucket_json.AddMember("upper", upper_val, allocator);
298
141
        bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator);
299
141
        bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator);
300
141
        bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator);
301
302
141
        bucket_arr.PushBack(bucket_json, allocator);
303
141
    }
304
305
48
    doc.AddMember("buckets", bucket_arr, allocator);
306
48
    rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
307
48
    doc.Accept(writer);
308
309
48
    return !buckets.empty() && buffer.GetSize() > 0;
310
48
}
_ZN5doris17histogram_to_jsonIfEEbRN9rapidjson19GenericStringBufferINS1_4UTF8IcEENS1_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISB_EERKSt10shared_ptrIKNS_9IDataTypeEE
Line
Count
Source
246
46
                       const DataTypePtr& data_type) {
247
46
    rapidjson::Document doc;
248
46
    doc.SetObject();
249
46
    rapidjson::Document::AllocatorType& allocator = doc.GetAllocator();
250
251
46
    int num_buckets = cast_set<int>(buckets.size());
252
46
    doc.AddMember("num_buckets", num_buckets, allocator);
253
254
46
    rapidjson::Value bucket_arr(rapidjson::kArrayType);
255
46
    bucket_arr.Reserve(num_buckets, allocator);
256
257
46
    std::stringstream ss1;
258
46
    std::stringstream ss2;
259
260
46
    rapidjson::Value lower_val;
261
46
    rapidjson::Value upper_val;
262
263
    // Convert bucket's lower and upper to 2 columns
264
46
    MutableColumnPtr lower_column = data_type->create_column();
265
46
    MutableColumnPtr upper_column = data_type->create_column();
266
128
    for (const auto& bucket : buckets) {
267
        // String type is different, it has to pass in length
268
        // if it is string type , directly use string value
269
128
        if constexpr (!std::is_same_v<T, std::string>) {
270
128
            lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0);
271
128
            upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0);
272
128
        }
273
128
    }
274
46
    size_t row_num = 0;
275
276
46
    auto format_options = DataTypeSerDe::get_default_format_options();
277
46
    auto time_zone = cctz::utc_time_zone();
278
46
    format_options.timezone = &time_zone;
279
280
128
    for (const auto& bucket : buckets) {
281
        if constexpr (std::is_same_v<T, std::string>) {
282
            lower_val.SetString(bucket.lower.data(),
283
                                static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator);
284
            upper_val.SetString(bucket.upper.data(),
285
                                static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator);
286
128
        } else {
287
128
            std::string lower_str = data_type->to_string(*lower_column, row_num, format_options);
288
128
            std::string upper_str = data_type->to_string(*upper_column, row_num, format_options);
289
128
            ++row_num;
290
128
            lower_val.SetString(lower_str.data(),
291
128
                                static_cast<rapidjson::SizeType>(lower_str.size()), allocator);
292
128
            upper_val.SetString(upper_str.data(),
293
128
                                static_cast<rapidjson::SizeType>(upper_str.size()), allocator);
294
128
        }
295
128
        rapidjson::Value bucket_json(rapidjson::kObjectType);
296
128
        bucket_json.AddMember("lower", lower_val, allocator);
297
128
        bucket_json.AddMember("upper", upper_val, allocator);
298
128
        bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator);
299
128
        bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator);
300
128
        bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator);
301
302
128
        bucket_arr.PushBack(bucket_json, allocator);
303
128
    }
304
305
46
    doc.AddMember("buckets", bucket_arr, allocator);
306
46
    rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
307
46
    doc.Accept(writer);
308
309
46
    return !buckets.empty() && buffer.GetSize() > 0;
310
46
}
_ZN5doris17histogram_to_jsonIdEEbRN9rapidjson19GenericStringBufferINS1_4UTF8IcEENS1_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISB_EERKSt10shared_ptrIKNS_9IDataTypeEE
Line
Count
Source
246
46
                       const DataTypePtr& data_type) {
247
46
    rapidjson::Document doc;
248
46
    doc.SetObject();
249
46
    rapidjson::Document::AllocatorType& allocator = doc.GetAllocator();
250
251
46
    int num_buckets = cast_set<int>(buckets.size());
252
46
    doc.AddMember("num_buckets", num_buckets, allocator);
253
254
46
    rapidjson::Value bucket_arr(rapidjson::kArrayType);
255
46
    bucket_arr.Reserve(num_buckets, allocator);
256
257
46
    std::stringstream ss1;
258
46
    std::stringstream ss2;
259
260
46
    rapidjson::Value lower_val;
261
46
    rapidjson::Value upper_val;
262
263
    // Convert bucket's lower and upper to 2 columns
264
46
    MutableColumnPtr lower_column = data_type->create_column();
265
46
    MutableColumnPtr upper_column = data_type->create_column();
266
128
    for (const auto& bucket : buckets) {
267
        // String type is different, it has to pass in length
268
        // if it is string type , directly use string value
269
128
        if constexpr (!std::is_same_v<T, std::string>) {
270
128
            lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0);
271
128
            upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0);
272
128
        }
273
128
    }
274
46
    size_t row_num = 0;
275
276
46
    auto format_options = DataTypeSerDe::get_default_format_options();
277
46
    auto time_zone = cctz::utc_time_zone();
278
46
    format_options.timezone = &time_zone;
279
280
128
    for (const auto& bucket : buckets) {
281
        if constexpr (std::is_same_v<T, std::string>) {
282
            lower_val.SetString(bucket.lower.data(),
283
                                static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator);
284
            upper_val.SetString(bucket.upper.data(),
285
                                static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator);
286
128
        } else {
287
128
            std::string lower_str = data_type->to_string(*lower_column, row_num, format_options);
288
128
            std::string upper_str = data_type->to_string(*upper_column, row_num, format_options);
289
128
            ++row_num;
290
128
            lower_val.SetString(lower_str.data(),
291
128
                                static_cast<rapidjson::SizeType>(lower_str.size()), allocator);
292
128
            upper_val.SetString(upper_str.data(),
293
128
                                static_cast<rapidjson::SizeType>(upper_str.size()), allocator);
294
128
        }
295
128
        rapidjson::Value bucket_json(rapidjson::kObjectType);
296
128
        bucket_json.AddMember("lower", lower_val, allocator);
297
128
        bucket_json.AddMember("upper", upper_val, allocator);
298
128
        bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator);
299
128
        bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator);
300
128
        bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator);
301
302
128
        bucket_arr.PushBack(bucket_json, allocator);
303
128
    }
304
305
46
    doc.AddMember("buckets", bucket_arr, allocator);
306
46
    rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
307
46
    doc.Accept(writer);
308
309
46
    return !buckets.empty() && buffer.GetSize() > 0;
310
46
}
_ZN5doris17histogram_to_jsonINS_7DecimalIiEEEEbRN9rapidjson19GenericStringBufferINS3_4UTF8IcEENS3_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISD_EERKSt10shared_ptrIKNS_9IDataTypeEE
Line
Count
Source
246
49
                       const DataTypePtr& data_type) {
247
49
    rapidjson::Document doc;
248
49
    doc.SetObject();
249
49
    rapidjson::Document::AllocatorType& allocator = doc.GetAllocator();
250
251
49
    int num_buckets = cast_set<int>(buckets.size());
252
49
    doc.AddMember("num_buckets", num_buckets, allocator);
253
254
49
    rapidjson::Value bucket_arr(rapidjson::kArrayType);
255
49
    bucket_arr.Reserve(num_buckets, allocator);
256
257
49
    std::stringstream ss1;
258
49
    std::stringstream ss2;
259
260
49
    rapidjson::Value lower_val;
261
49
    rapidjson::Value upper_val;
262
263
    // Convert bucket's lower and upper to 2 columns
264
49
    MutableColumnPtr lower_column = data_type->create_column();
265
49
    MutableColumnPtr upper_column = data_type->create_column();
266
133
    for (const auto& bucket : buckets) {
267
        // String type is different, it has to pass in length
268
        // if it is string type , directly use string value
269
133
        if constexpr (!std::is_same_v<T, std::string>) {
270
133
            lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0);
271
133
            upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0);
272
133
        }
273
133
    }
274
49
    size_t row_num = 0;
275
276
49
    auto format_options = DataTypeSerDe::get_default_format_options();
277
49
    auto time_zone = cctz::utc_time_zone();
278
49
    format_options.timezone = &time_zone;
279
280
133
    for (const auto& bucket : buckets) {
281
        if constexpr (std::is_same_v<T, std::string>) {
282
            lower_val.SetString(bucket.lower.data(),
283
                                static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator);
284
            upper_val.SetString(bucket.upper.data(),
285
                                static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator);
286
133
        } else {
287
133
            std::string lower_str = data_type->to_string(*lower_column, row_num, format_options);
288
133
            std::string upper_str = data_type->to_string(*upper_column, row_num, format_options);
289
133
            ++row_num;
290
133
            lower_val.SetString(lower_str.data(),
291
133
                                static_cast<rapidjson::SizeType>(lower_str.size()), allocator);
292
133
            upper_val.SetString(upper_str.data(),
293
133
                                static_cast<rapidjson::SizeType>(upper_str.size()), allocator);
294
133
        }
295
133
        rapidjson::Value bucket_json(rapidjson::kObjectType);
296
133
        bucket_json.AddMember("lower", lower_val, allocator);
297
133
        bucket_json.AddMember("upper", upper_val, allocator);
298
133
        bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator);
299
133
        bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator);
300
133
        bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator);
301
302
133
        bucket_arr.PushBack(bucket_json, allocator);
303
133
    }
304
305
49
    doc.AddMember("buckets", bucket_arr, allocator);
306
49
    rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
307
49
    doc.Accept(writer);
308
309
49
    return !buckets.empty() && buffer.GetSize() > 0;
310
49
}
Unexecuted instantiation: _ZN5doris17histogram_to_jsonINS_7DecimalIlEEEEbRN9rapidjson19GenericStringBufferINS3_4UTF8IcEENS3_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISD_EERKSt10shared_ptrIKNS_9IDataTypeEE
Unexecuted instantiation: _ZN5doris17histogram_to_jsonINS_12Decimal128V3EEEbRN9rapidjson19GenericStringBufferINS2_4UTF8IcEENS2_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISC_EERKSt10shared_ptrIKNS_9IDataTypeEE
Unexecuted instantiation: _ZN5doris17histogram_to_jsonINS_7DecimalIN4wide7integerILm256EiEEEEEEbRN9rapidjson19GenericStringBufferINS6_4UTF8IcEENS6_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISG_EERKSt10shared_ptrIKNS_9IDataTypeEE
_ZN5doris17histogram_to_jsonINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEEbRN9rapidjson19GenericStringBufferINS7_4UTF8IcEENS7_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISH_EERKSt10shared_ptrIKNS_9IDataTypeEE
Line
Count
Source
246
105
                       const DataTypePtr& data_type) {
247
105
    rapidjson::Document doc;
248
105
    doc.SetObject();
249
105
    rapidjson::Document::AllocatorType& allocator = doc.GetAllocator();
250
251
105
    int num_buckets = cast_set<int>(buckets.size());
252
105
    doc.AddMember("num_buckets", num_buckets, allocator);
253
254
105
    rapidjson::Value bucket_arr(rapidjson::kArrayType);
255
105
    bucket_arr.Reserve(num_buckets, allocator);
256
257
105
    std::stringstream ss1;
258
105
    std::stringstream ss2;
259
260
105
    rapidjson::Value lower_val;
261
105
    rapidjson::Value upper_val;
262
263
    // Convert bucket's lower and upper to 2 columns
264
105
    MutableColumnPtr lower_column = data_type->create_column();
265
105
    MutableColumnPtr upper_column = data_type->create_column();
266
178
    for (const auto& bucket : buckets) {
267
        // String type is different, it has to pass in length
268
        // if it is string type , directly use string value
269
        if constexpr (!std::is_same_v<T, std::string>) {
270
            lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0);
271
            upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0);
272
        }
273
178
    }
274
105
    size_t row_num = 0;
275
276
105
    auto format_options = DataTypeSerDe::get_default_format_options();
277
105
    auto time_zone = cctz::utc_time_zone();
278
105
    format_options.timezone = &time_zone;
279
280
178
    for (const auto& bucket : buckets) {
281
178
        if constexpr (std::is_same_v<T, std::string>) {
282
178
            lower_val.SetString(bucket.lower.data(),
283
178
                                static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator);
284
178
            upper_val.SetString(bucket.upper.data(),
285
178
                                static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator);
286
        } else {
287
            std::string lower_str = data_type->to_string(*lower_column, row_num, format_options);
288
            std::string upper_str = data_type->to_string(*upper_column, row_num, format_options);
289
            ++row_num;
290
            lower_val.SetString(lower_str.data(),
291
                                static_cast<rapidjson::SizeType>(lower_str.size()), allocator);
292
            upper_val.SetString(upper_str.data(),
293
                                static_cast<rapidjson::SizeType>(upper_str.size()), allocator);
294
        }
295
178
        rapidjson::Value bucket_json(rapidjson::kObjectType);
296
178
        bucket_json.AddMember("lower", lower_val, allocator);
297
178
        bucket_json.AddMember("upper", upper_val, allocator);
298
178
        bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator);
299
178
        bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator);
300
178
        bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator);
301
302
178
        bucket_arr.PushBack(bucket_json, allocator);
303
178
    }
304
305
105
    doc.AddMember("buckets", bucket_arr, allocator);
306
105
    rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
307
105
    doc.Accept(writer);
308
309
105
    return !buckets.empty() && buffer.GetSize() > 0;
310
105
}
_ZN5doris17histogram_to_jsonINS_11DateV2ValueINS_15DateV2ValueTypeEEEEEbRN9rapidjson19GenericStringBufferINS4_4UTF8IcEENS4_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISE_EERKSt10shared_ptrIKNS_9IDataTypeEE
Line
Count
Source
246
88
                       const DataTypePtr& data_type) {
247
88
    rapidjson::Document doc;
248
88
    doc.SetObject();
249
88
    rapidjson::Document::AllocatorType& allocator = doc.GetAllocator();
250
251
88
    int num_buckets = cast_set<int>(buckets.size());
252
88
    doc.AddMember("num_buckets", num_buckets, allocator);
253
254
88
    rapidjson::Value bucket_arr(rapidjson::kArrayType);
255
88
    bucket_arr.Reserve(num_buckets, allocator);
256
257
88
    std::stringstream ss1;
258
88
    std::stringstream ss2;
259
260
88
    rapidjson::Value lower_val;
261
88
    rapidjson::Value upper_val;
262
263
    // Convert bucket's lower and upper to 2 columns
264
88
    MutableColumnPtr lower_column = data_type->create_column();
265
88
    MutableColumnPtr upper_column = data_type->create_column();
266
256
    for (const auto& bucket : buckets) {
267
        // String type is different, it has to pass in length
268
        // if it is string type , directly use string value
269
256
        if constexpr (!std::is_same_v<T, std::string>) {
270
256
            lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0);
271
256
            upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0);
272
256
        }
273
256
    }
274
88
    size_t row_num = 0;
275
276
88
    auto format_options = DataTypeSerDe::get_default_format_options();
277
88
    auto time_zone = cctz::utc_time_zone();
278
88
    format_options.timezone = &time_zone;
279
280
256
    for (const auto& bucket : buckets) {
281
        if constexpr (std::is_same_v<T, std::string>) {
282
            lower_val.SetString(bucket.lower.data(),
283
                                static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator);
284
            upper_val.SetString(bucket.upper.data(),
285
                                static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator);
286
256
        } else {
287
256
            std::string lower_str = data_type->to_string(*lower_column, row_num, format_options);
288
256
            std::string upper_str = data_type->to_string(*upper_column, row_num, format_options);
289
256
            ++row_num;
290
256
            lower_val.SetString(lower_str.data(),
291
256
                                static_cast<rapidjson::SizeType>(lower_str.size()), allocator);
292
256
            upper_val.SetString(upper_str.data(),
293
256
                                static_cast<rapidjson::SizeType>(upper_str.size()), allocator);
294
256
        }
295
256
        rapidjson::Value bucket_json(rapidjson::kObjectType);
296
256
        bucket_json.AddMember("lower", lower_val, allocator);
297
256
        bucket_json.AddMember("upper", upper_val, allocator);
298
256
        bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator);
299
256
        bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator);
300
256
        bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator);
301
302
256
        bucket_arr.PushBack(bucket_json, allocator);
303
256
    }
304
305
88
    doc.AddMember("buckets", bucket_arr, allocator);
306
88
    rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
307
88
    doc.Accept(writer);
308
309
88
    return !buckets.empty() && buffer.GetSize() > 0;
310
88
}
_ZN5doris17histogram_to_jsonINS_11DateV2ValueINS_19DateTimeV2ValueTypeEEEEEbRN9rapidjson19GenericStringBufferINS4_4UTF8IcEENS4_12CrtAllocatorEEERKSt6vectorINS_6BucketIT_EESaISE_EERKSt10shared_ptrIKNS_9IDataTypeEE
Line
Count
Source
246
88
                       const DataTypePtr& data_type) {
247
88
    rapidjson::Document doc;
248
88
    doc.SetObject();
249
88
    rapidjson::Document::AllocatorType& allocator = doc.GetAllocator();
250
251
88
    int num_buckets = cast_set<int>(buckets.size());
252
88
    doc.AddMember("num_buckets", num_buckets, allocator);
253
254
88
    rapidjson::Value bucket_arr(rapidjson::kArrayType);
255
88
    bucket_arr.Reserve(num_buckets, allocator);
256
257
88
    std::stringstream ss1;
258
88
    std::stringstream ss2;
259
260
88
    rapidjson::Value lower_val;
261
88
    rapidjson::Value upper_val;
262
263
    // Convert bucket's lower and upper to 2 columns
264
88
    MutableColumnPtr lower_column = data_type->create_column();
265
88
    MutableColumnPtr upper_column = data_type->create_column();
266
255
    for (const auto& bucket : buckets) {
267
        // String type is different, it has to pass in length
268
        // if it is string type , directly use string value
269
255
        if constexpr (!std::is_same_v<T, std::string>) {
270
255
            lower_column->insert_data(reinterpret_cast<const char*>(&bucket.lower), 0);
271
255
            upper_column->insert_data(reinterpret_cast<const char*>(&bucket.upper), 0);
272
255
        }
273
255
    }
274
88
    size_t row_num = 0;
275
276
88
    auto format_options = DataTypeSerDe::get_default_format_options();
277
88
    auto time_zone = cctz::utc_time_zone();
278
88
    format_options.timezone = &time_zone;
279
280
255
    for (const auto& bucket : buckets) {
281
        if constexpr (std::is_same_v<T, std::string>) {
282
            lower_val.SetString(bucket.lower.data(),
283
                                static_cast<rapidjson::SizeType>(bucket.lower.size()), allocator);
284
            upper_val.SetString(bucket.upper.data(),
285
                                static_cast<rapidjson::SizeType>(bucket.upper.size()), allocator);
286
255
        } else {
287
255
            std::string lower_str = data_type->to_string(*lower_column, row_num, format_options);
288
255
            std::string upper_str = data_type->to_string(*upper_column, row_num, format_options);
289
255
            ++row_num;
290
255
            lower_val.SetString(lower_str.data(),
291
255
                                static_cast<rapidjson::SizeType>(lower_str.size()), allocator);
292
255
            upper_val.SetString(upper_str.data(),
293
255
                                static_cast<rapidjson::SizeType>(upper_str.size()), allocator);
294
255
        }
295
255
        rapidjson::Value bucket_json(rapidjson::kObjectType);
296
255
        bucket_json.AddMember("lower", lower_val, allocator);
297
255
        bucket_json.AddMember("upper", upper_val, allocator);
298
255
        bucket_json.AddMember("ndv", static_cast<int64_t>(bucket.ndv), allocator);
299
255
        bucket_json.AddMember("count", static_cast<int64_t>(bucket.count), allocator);
300
255
        bucket_json.AddMember("pre_sum", static_cast<int64_t>(bucket.pre_sum), allocator);
301
302
255
        bucket_arr.PushBack(bucket_json, allocator);
303
255
    }
304
305
88
    doc.AddMember("buckets", bucket_arr, allocator);
306
88
    rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
307
88
    doc.Accept(writer);
308
309
88
    return !buckets.empty() && buffer.GetSize() > 0;
310
88
}
311
#include "common/compile_check_end.h"
312
} // namespace  doris