Coverage Report

Created: 2025-05-12 13:45

/root/doris/be/src/util/counts.h
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <pdqsort.h>
21
22
#include <algorithm>
23
#include <cmath>
24
#include <queue>
25
26
#include "udf/udf.h"
27
#include "vec/common/pod_array.h"
28
#include "vec/common/string_buffer.hpp"
29
#include "vec/io/io_helper.h"
30
31
namespace doris {
32
33
template <typename Ty>
34
class Counts {
35
public:
36
3.46k
    Counts() = default;
Unexecuted instantiation: _ZN5doris6CountsIhEC2Ev
_ZN5doris6CountsIaEC2Ev
Line
Count
Source
36
34
    Counts() = default;
_ZN5doris6CountsIsEC2Ev
Line
Count
Source
36
231
    Counts() = default;
_ZN5doris6CountsIiEC2Ev
Line
Count
Source
36
2.08k
    Counts() = default;
_ZN5doris6CountsIlEC2Ev
Line
Count
Source
36
817
    Counts() = default;
_ZN5doris6CountsInEC2Ev
Line
Count
Source
36
30
    Counts() = default;
Unexecuted instantiation: _ZN5doris6CountsIfEC2Ev
_ZN5doris6CountsIdEC2Ev
Line
Count
Source
36
260
    Counts() = default;
37
38
1.17k
    void merge(Counts* other) {
39
1.17k
        if (other != nullptr && !other->_nums.empty()) {
40
1.17k
            _sorted_nums_vec.emplace_back(std::move(other->_nums));
41
1.17k
        }
42
1.17k
    }
Unexecuted instantiation: _ZN5doris6CountsIhE5mergeEPS1_
Unexecuted instantiation: _ZN5doris6CountsIaE5mergeEPS1_
_ZN5doris6CountsIsE5mergeEPS1_
Line
Count
Source
38
6
    void merge(Counts* other) {
39
6
        if (other != nullptr && !other->_nums.empty()) {
40
6
            _sorted_nums_vec.emplace_back(std::move(other->_nums));
41
6
        }
42
6
    }
_ZN5doris6CountsIiE5mergeEPS1_
Line
Count
Source
38
756
    void merge(Counts* other) {
39
756
        if (other != nullptr && !other->_nums.empty()) {
40
756
            _sorted_nums_vec.emplace_back(std::move(other->_nums));
41
756
        }
42
756
    }
_ZN5doris6CountsIlE5mergeEPS1_
Line
Count
Source
38
329
    void merge(Counts* other) {
39
329
        if (other != nullptr && !other->_nums.empty()) {
40
329
            _sorted_nums_vec.emplace_back(std::move(other->_nums));
41
329
        }
42
329
    }
Unexecuted instantiation: _ZN5doris6CountsInE5mergeEPS1_
Unexecuted instantiation: _ZN5doris6CountsIfE5mergeEPS1_
_ZN5doris6CountsIdE5mergeEPS1_
Line
Count
Source
38
80
    void merge(Counts* other) {
39
80
        if (other != nullptr && !other->_nums.empty()) {
40
80
            _sorted_nums_vec.emplace_back(std::move(other->_nums));
41
80
        }
42
80
    }
43
44
    void increment(Ty key, uint32_t i) {
45
        auto old_size = _nums.size();
46
        _nums.resize(_nums.size() + i);
47
        for (uint32_t j = 0; j < i; ++j) {
48
            _nums[old_size + j] = key;
49
        }
50
    }
51
52
3.16k
    void increment(Ty key) { _nums.push_back(key); }
Unexecuted instantiation: _ZN5doris6CountsIhE9incrementEh
_ZN5doris6CountsIaE9incrementEa
Line
Count
Source
52
64
    void increment(Ty key) { _nums.push_back(key); }
_ZN5doris6CountsIsE9incrementEs
Line
Count
Source
52
789
    void increment(Ty key) { _nums.push_back(key); }
_ZN5doris6CountsIiE9incrementEi
Line
Count
Source
52
1.60k
    void increment(Ty key) { _nums.push_back(key); }
_ZN5doris6CountsIlE9incrementEl
Line
Count
Source
52
412
    void increment(Ty key) { _nums.push_back(key); }
_ZN5doris6CountsInE9incrementEn
Line
Count
Source
52
36
    void increment(Ty key) { _nums.push_back(key); }
Unexecuted instantiation: _ZN5doris6CountsIfE9incrementEf
_ZN5doris6CountsIdE9incrementEd
Line
Count
Source
52
268
    void increment(Ty key) { _nums.push_back(key); }
53
54
5
    void increment_batch(const vectorized::PaddedPODArray<Ty>& keys) {
55
5
        _nums.insert(keys.begin(), keys.end());
56
5
    }
Unexecuted instantiation: _ZN5doris6CountsIhE15increment_batchERKNS_10vectorized8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_32NoTrackingDefaultMemoryAllocatorEEELm16ELm15EEE
Unexecuted instantiation: _ZN5doris6CountsIaE15increment_batchERKNS_10vectorized8PODArrayIaLm4096ENS_9AllocatorILb0ELb0ELb0ENS_32NoTrackingDefaultMemoryAllocatorEEELm16ELm15EEE
Unexecuted instantiation: _ZN5doris6CountsIsE15increment_batchERKNS_10vectorized8PODArrayIsLm4096ENS_9AllocatorILb0ELb0ELb0ENS_32NoTrackingDefaultMemoryAllocatorEEELm16ELm15EEE
Unexecuted instantiation: _ZN5doris6CountsIiE15increment_batchERKNS_10vectorized8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_32NoTrackingDefaultMemoryAllocatorEEELm16ELm15EEE
_ZN5doris6CountsIlE15increment_batchERKNS_10vectorized8PODArrayIlLm4096ENS_9AllocatorILb0ELb0ELb0ENS_32NoTrackingDefaultMemoryAllocatorEEELm16ELm15EEE
Line
Count
Source
54
5
    void increment_batch(const vectorized::PaddedPODArray<Ty>& keys) {
55
5
        _nums.insert(keys.begin(), keys.end());
56
5
    }
Unexecuted instantiation: _ZN5doris6CountsInE15increment_batchERKNS_10vectorized8PODArrayInLm4096ENS_9AllocatorILb0ELb0ELb0ENS_32NoTrackingDefaultMemoryAllocatorEEELm16ELm15EEE
Unexecuted instantiation: _ZN5doris6CountsIfE15increment_batchERKNS_10vectorized8PODArrayIfLm4096ENS_9AllocatorILb0ELb0ELb0ENS_32NoTrackingDefaultMemoryAllocatorEEELm16ELm15EEE
Unexecuted instantiation: _ZN5doris6CountsIdE15increment_batchERKNS_10vectorized8PODArrayIdLm4096ENS_9AllocatorILb0ELb0ELb0ENS_32NoTrackingDefaultMemoryAllocatorEEELm16ELm15EEE
57
58
1.58k
    void serialize(vectorized::BufferWritable& buf) {
59
1.58k
        if (!_nums.empty()) {
60
1.28k
            pdqsort(_nums.begin(), _nums.end());
61
1.28k
            size_t size = _nums.size();
62
1.28k
            write_binary(size, buf);
63
1.28k
            buf.write(reinterpret_cast<const char*>(_nums.data()), sizeof(Ty) * size);
64
1.28k
        } else {
65
            // convert _sorted_nums_vec to _nums and do seiralize again
66
302
            _convert_sorted_num_vec_to_nums();
67
302
            serialize(buf);
68
302
        }
69
1.58k
    }
Unexecuted instantiation: _ZN5doris6CountsIhE9serializeERNS_10vectorized14BufferWritableE
Unexecuted instantiation: _ZN5doris6CountsIaE9serializeERNS_10vectorized14BufferWritableE
_ZN5doris6CountsIsE9serializeERNS_10vectorized14BufferWritableE
Line
Count
Source
58
6
    void serialize(vectorized::BufferWritable& buf) {
59
6
        if (!_nums.empty()) {
60
6
            pdqsort(_nums.begin(), _nums.end());
61
6
            size_t size = _nums.size();
62
6
            write_binary(size, buf);
63
6
            buf.write(reinterpret_cast<const char*>(_nums.data()), sizeof(Ty) * size);
64
6
        } else {
65
            // convert _sorted_nums_vec to _nums and do seiralize again
66
0
            _convert_sorted_num_vec_to_nums();
67
0
            serialize(buf);
68
0
        }
69
6
    }
_ZN5doris6CountsIiE9serializeERNS_10vectorized14BufferWritableE
Line
Count
Source
58
990
    void serialize(vectorized::BufferWritable& buf) {
59
990
        if (!_nums.empty()) {
60
834
            pdqsort(_nums.begin(), _nums.end());
61
834
            size_t size = _nums.size();
62
834
            write_binary(size, buf);
63
834
            buf.write(reinterpret_cast<const char*>(_nums.data()), sizeof(Ty) * size);
64
834
        } else {
65
            // convert _sorted_nums_vec to _nums and do seiralize again
66
156
            _convert_sorted_num_vec_to_nums();
67
156
            serialize(buf);
68
156
        }
69
990
    }
_ZN5doris6CountsIlE9serializeERNS_10vectorized14BufferWritableE
Line
Count
Source
58
508
    void serialize(vectorized::BufferWritable& buf) {
59
508
        if (!_nums.empty()) {
60
362
            pdqsort(_nums.begin(), _nums.end());
61
362
            size_t size = _nums.size();
62
362
            write_binary(size, buf);
63
362
            buf.write(reinterpret_cast<const char*>(_nums.data()), sizeof(Ty) * size);
64
362
        } else {
65
            // convert _sorted_nums_vec to _nums and do seiralize again
66
146
            _convert_sorted_num_vec_to_nums();
67
146
            serialize(buf);
68
146
        }
69
508
    }
Unexecuted instantiation: _ZN5doris6CountsInE9serializeERNS_10vectorized14BufferWritableE
Unexecuted instantiation: _ZN5doris6CountsIfE9serializeERNS_10vectorized14BufferWritableE
_ZN5doris6CountsIdE9serializeERNS_10vectorized14BufferWritableE
Line
Count
Source
58
80
    void serialize(vectorized::BufferWritable& buf) {
59
80
        if (!_nums.empty()) {
60
80
            pdqsort(_nums.begin(), _nums.end());
61
80
            size_t size = _nums.size();
62
80
            write_binary(size, buf);
63
80
            buf.write(reinterpret_cast<const char*>(_nums.data()), sizeof(Ty) * size);
64
80
        } else {
65
            // convert _sorted_nums_vec to _nums and do seiralize again
66
0
            _convert_sorted_num_vec_to_nums();
67
0
            serialize(buf);
68
0
        }
69
80
    }
70
71
1.17k
    void unserialize(vectorized::BufferReadable& buf) {
72
1.17k
        size_t size;
73
1.17k
        read_binary(size, buf);
74
1.17k
        _nums.resize(size);
75
1.17k
        auto buff = buf.read(sizeof(Ty) * size);
76
1.17k
        memcpy(_nums.data(), buff.data, buff.size);
77
1.17k
    }
Unexecuted instantiation: _ZN5doris6CountsIhE11unserializeERNS_10vectorized14BufferReadableE
Unexecuted instantiation: _ZN5doris6CountsIaE11unserializeERNS_10vectorized14BufferReadableE
_ZN5doris6CountsIsE11unserializeERNS_10vectorized14BufferReadableE
Line
Count
Source
71
6
    void unserialize(vectorized::BufferReadable& buf) {
72
6
        size_t size;
73
6
        read_binary(size, buf);
74
6
        _nums.resize(size);
75
6
        auto buff = buf.read(sizeof(Ty) * size);
76
6
        memcpy(_nums.data(), buff.data, buff.size);
77
6
    }
_ZN5doris6CountsIiE11unserializeERNS_10vectorized14BufferReadableE
Line
Count
Source
71
756
    void unserialize(vectorized::BufferReadable& buf) {
72
756
        size_t size;
73
756
        read_binary(size, buf);
74
756
        _nums.resize(size);
75
756
        auto buff = buf.read(sizeof(Ty) * size);
76
756
        memcpy(_nums.data(), buff.data, buff.size);
77
756
    }
_ZN5doris6CountsIlE11unserializeERNS_10vectorized14BufferReadableE
Line
Count
Source
71
330
    void unserialize(vectorized::BufferReadable& buf) {
72
330
        size_t size;
73
330
        read_binary(size, buf);
74
330
        _nums.resize(size);
75
330
        auto buff = buf.read(sizeof(Ty) * size);
76
330
        memcpy(_nums.data(), buff.data, buff.size);
77
330
    }
Unexecuted instantiation: _ZN5doris6CountsInE11unserializeERNS_10vectorized14BufferReadableE
Unexecuted instantiation: _ZN5doris6CountsIfE11unserializeERNS_10vectorized14BufferReadableE
_ZN5doris6CountsIdE11unserializeERNS_10vectorized14BufferReadableE
Line
Count
Source
71
80
    void unserialize(vectorized::BufferReadable& buf) {
72
80
        size_t size;
73
80
        read_binary(size, buf);
74
80
        _nums.resize(size);
75
80
        auto buff = buf.read(sizeof(Ty) * size);
76
80
        memcpy(_nums.data(), buff.data, buff.size);
77
80
    }
78
79
1.14k
    double terminate(double quantile) {
80
1.14k
        if (_sorted_nums_vec.size() <= 1) {
81
1.05k
            if (_sorted_nums_vec.size() == 1) {
82
370
                _nums = std::move(_sorted_nums_vec[0]);
83
370
            }
84
85
1.05k
            if (_nums.empty()) {
86
                // Although set null here, but the value is 0.0 and the call method just
87
                // get val in aggregate_function_percentile_approx.h
88
0
                return 0.0;
89
0
            }
90
91
1.05k
            if (UNLIKELY(!std::is_sorted(_nums.begin(), _nums.end()))) {
92
200
                pdqsort(_nums.begin(), _nums.end());
93
200
            }
94
95
1.05k
            if (quantile == 1 || _nums.size() == 1) {
96
501
                return _nums.back();
97
501
            }
98
99
551
            double u = (_nums.size() - 1) * quantile;
100
551
            auto index = static_cast<uint32_t>(u);
101
551
            return _nums[index] +
102
551
                   (u - static_cast<double>(index)) * (_nums[index + 1] - _nums[index]);
103
1.05k
        } else {
104
94
            DCHECK(_nums.empty());
105
94
            size_t rows = 0;
106
216
            for (const auto& i : _sorted_nums_vec) {
107
216
                rows += i.size();
108
216
            }
109
94
            const bool reverse = quantile > 0.5 && rows > 2;
110
94
            double u = (rows - 1) * quantile;
111
94
            auto index = static_cast<uint32_t>(u);
112
            // if reverse, the step of target should start 0 like not reverse
113
            // so here rows need to minus index + 2
114
            // eg: rows = 10, index = 5
115
            // if not reverse, so the first number loc is 5, the second number loc is 6
116
            // if reverse, so the second number is 3, the first number is 4
117
            // 5 + 4 = 3 + 6 = 9 = rows - 1.
118
            // the rows must GE 2 beacuse `_sorted_nums_vec` size GE 2
119
94
            size_t target = reverse ? rows - index - 2 : index;
120
94
            if (quantile == 1) {
121
0
                target = 0;
122
0
            }
123
94
            auto [first_number, second_number] = _merge_sort_and_get_numbers(target, reverse);
124
94
            if (quantile == 1) {
125
0
                return second_number;
126
0
            }
127
94
            return first_number + (u - static_cast<double>(index)) * (second_number - first_number);
128
94
        }
129
1.14k
    }
Unexecuted instantiation: _ZN5doris6CountsIhE9terminateEd
_ZN5doris6CountsIaE9terminateEd
Line
Count
Source
79
58
    double terminate(double quantile) {
80
58
        if (_sorted_nums_vec.size() <= 1) {
81
58
            if (_sorted_nums_vec.size() == 1) {
82
0
                _nums = std::move(_sorted_nums_vec[0]);
83
0
            }
84
85
58
            if (_nums.empty()) {
86
                // Although set null here, but the value is 0.0 and the call method just
87
                // get val in aggregate_function_percentile_approx.h
88
0
                return 0.0;
89
0
            }
90
91
58
            if (UNLIKELY(!std::is_sorted(_nums.begin(), _nums.end()))) {
92
24
                pdqsort(_nums.begin(), _nums.end());
93
24
            }
94
95
58
            if (quantile == 1 || _nums.size() == 1) {
96
34
                return _nums.back();
97
34
            }
98
99
24
            double u = (_nums.size() - 1) * quantile;
100
24
            auto index = static_cast<uint32_t>(u);
101
24
            return _nums[index] +
102
24
                   (u - static_cast<double>(index)) * (_nums[index + 1] - _nums[index]);
103
58
        } else {
104
0
            DCHECK(_nums.empty());
105
0
            size_t rows = 0;
106
0
            for (const auto& i : _sorted_nums_vec) {
107
0
                rows += i.size();
108
0
            }
109
0
            const bool reverse = quantile > 0.5 && rows > 2;
110
0
            double u = (rows - 1) * quantile;
111
0
            auto index = static_cast<uint32_t>(u);
112
            // if reverse, the step of target should start 0 like not reverse
113
            // so here rows need to minus index + 2
114
            // eg: rows = 10, index = 5
115
            // if not reverse, so the first number loc is 5, the second number loc is 6
116
            // if reverse, so the second number is 3, the first number is 4
117
            // 5 + 4 = 3 + 6 = 9 = rows - 1.
118
            // the rows must GE 2 beacuse `_sorted_nums_vec` size GE 2
119
0
            size_t target = reverse ? rows - index - 2 : index;
120
0
            if (quantile == 1) {
121
0
                target = 0;
122
0
            }
123
0
            auto [first_number, second_number] = _merge_sort_and_get_numbers(target, reverse);
124
0
            if (quantile == 1) {
125
0
                return second_number;
126
0
            }
127
0
            return first_number + (u - static_cast<double>(index)) * (second_number - first_number);
128
0
        }
129
58
    }
_ZN5doris6CountsIsE9terminateEd
Line
Count
Source
79
297
    double terminate(double quantile) {
80
297
        if (_sorted_nums_vec.size() <= 1) {
81
294
            if (_sorted_nums_vec.size() == 1) {
82
0
                _nums = std::move(_sorted_nums_vec[0]);
83
0
            }
84
85
294
            if (_nums.empty()) {
86
                // Although set null here, but the value is 0.0 and the call method just
87
                // get val in aggregate_function_percentile_approx.h
88
0
                return 0.0;
89
0
            }
90
91
294
            if (UNLIKELY(!std::is_sorted(_nums.begin(), _nums.end()))) {
92
164
                pdqsort(_nums.begin(), _nums.end());
93
164
            }
94
95
294
            if (quantile == 1 || _nums.size() == 1) {
96
87
                return _nums.back();
97
87
            }
98
99
207
            double u = (_nums.size() - 1) * quantile;
100
207
            auto index = static_cast<uint32_t>(u);
101
207
            return _nums[index] +
102
207
                   (u - static_cast<double>(index)) * (_nums[index + 1] - _nums[index]);
103
294
        } else {
104
3
            DCHECK(_nums.empty());
105
3
            size_t rows = 0;
106
6
            for (const auto& i : _sorted_nums_vec) {
107
6
                rows += i.size();
108
6
            }
109
3
            const bool reverse = quantile > 0.5 && rows > 2;
110
3
            double u = (rows - 1) * quantile;
111
3
            auto index = static_cast<uint32_t>(u);
112
            // if reverse, the step of target should start 0 like not reverse
113
            // so here rows need to minus index + 2
114
            // eg: rows = 10, index = 5
115
            // if not reverse, so the first number loc is 5, the second number loc is 6
116
            // if reverse, so the second number is 3, the first number is 4
117
            // 5 + 4 = 3 + 6 = 9 = rows - 1.
118
            // the rows must GE 2 beacuse `_sorted_nums_vec` size GE 2
119
3
            size_t target = reverse ? rows - index - 2 : index;
120
3
            if (quantile == 1) {
121
0
                target = 0;
122
0
            }
123
3
            auto [first_number, second_number] = _merge_sort_and_get_numbers(target, reverse);
124
3
            if (quantile == 1) {
125
0
                return second_number;
126
0
            }
127
3
            return first_number + (u - static_cast<double>(index)) * (second_number - first_number);
128
3
        }
129
297
    }
_ZN5doris6CountsIiE9terminateEd
Line
Count
Source
79
495
    double terminate(double quantile) {
80
495
        if (_sorted_nums_vec.size() <= 1) {
81
441
            if (_sorted_nums_vec.size() == 1) {
82
309
                _nums = std::move(_sorted_nums_vec[0]);
83
309
            }
84
85
441
            if (_nums.empty()) {
86
                // Although set null here, but the value is 0.0 and the call method just
87
                // get val in aggregate_function_percentile_approx.h
88
0
                return 0.0;
89
0
            }
90
91
441
            if (UNLIKELY(!std::is_sorted(_nums.begin(), _nums.end()))) {
92
11
                pdqsort(_nums.begin(), _nums.end());
93
11
            }
94
95
441
            if (quantile == 1 || _nums.size() == 1) {
96
254
                return _nums.back();
97
254
            }
98
99
187
            double u = (_nums.size() - 1) * quantile;
100
187
            auto index = static_cast<uint32_t>(u);
101
187
            return _nums[index] +
102
187
                   (u - static_cast<double>(index)) * (_nums[index + 1] - _nums[index]);
103
441
        } else {
104
54
            DCHECK(_nums.empty());
105
54
            size_t rows = 0;
106
108
            for (const auto& i : _sorted_nums_vec) {
107
108
                rows += i.size();
108
108
            }
109
54
            const bool reverse = quantile > 0.5 && rows > 2;
110
54
            double u = (rows - 1) * quantile;
111
54
            auto index = static_cast<uint32_t>(u);
112
            // if reverse, the step of target should start 0 like not reverse
113
            // so here rows need to minus index + 2
114
            // eg: rows = 10, index = 5
115
            // if not reverse, so the first number loc is 5, the second number loc is 6
116
            // if reverse, so the second number is 3, the first number is 4
117
            // 5 + 4 = 3 + 6 = 9 = rows - 1.
118
            // the rows must GE 2 beacuse `_sorted_nums_vec` size GE 2
119
54
            size_t target = reverse ? rows - index - 2 : index;
120
54
            if (quantile == 1) {
121
0
                target = 0;
122
0
            }
123
54
            auto [first_number, second_number] = _merge_sort_and_get_numbers(target, reverse);
124
54
            if (quantile == 1) {
125
0
                return second_number;
126
0
            }
127
54
            return first_number + (u - static_cast<double>(index)) * (second_number - first_number);
128
54
        }
129
495
    }
_ZN5doris6CountsIlE9terminateEd
Line
Count
Source
79
118
    double terminate(double quantile) {
80
118
        if (_sorted_nums_vec.size() <= 1) {
81
99
            if (_sorted_nums_vec.size() == 1) {
82
29
                _nums = std::move(_sorted_nums_vec[0]);
83
29
            }
84
85
99
            if (_nums.empty()) {
86
                // Although set null here, but the value is 0.0 and the call method just
87
                // get val in aggregate_function_percentile_approx.h
88
0
                return 0.0;
89
0
            }
90
91
99
            if (UNLIKELY(!std::is_sorted(_nums.begin(), _nums.end()))) {
92
1
                pdqsort(_nums.begin(), _nums.end());
93
1
            }
94
95
99
            if (quantile == 1 || _nums.size() == 1) {
96
60
                return _nums.back();
97
60
            }
98
99
39
            double u = (_nums.size() - 1) * quantile;
100
39
            auto index = static_cast<uint32_t>(u);
101
39
            return _nums[index] +
102
39
                   (u - static_cast<double>(index)) * (_nums[index + 1] - _nums[index]);
103
99
        } else {
104
19
            DCHECK(_nums.empty());
105
19
            size_t rows = 0;
106
54
            for (const auto& i : _sorted_nums_vec) {
107
54
                rows += i.size();
108
54
            }
109
19
            const bool reverse = quantile > 0.5 && rows > 2;
110
19
            double u = (rows - 1) * quantile;
111
19
            auto index = static_cast<uint32_t>(u);
112
            // if reverse, the step of target should start 0 like not reverse
113
            // so here rows need to minus index + 2
114
            // eg: rows = 10, index = 5
115
            // if not reverse, so the first number loc is 5, the second number loc is 6
116
            // if reverse, so the second number is 3, the first number is 4
117
            // 5 + 4 = 3 + 6 = 9 = rows - 1.
118
            // the rows must GE 2 beacuse `_sorted_nums_vec` size GE 2
119
19
            size_t target = reverse ? rows - index - 2 : index;
120
19
            if (quantile == 1) {
121
0
                target = 0;
122
0
            }
123
19
            auto [first_number, second_number] = _merge_sort_and_get_numbers(target, reverse);
124
19
            if (quantile == 1) {
125
0
                return second_number;
126
0
            }
127
19
            return first_number + (u - static_cast<double>(index)) * (second_number - first_number);
128
19
        }
129
118
    }
_ZN5doris6CountsInE9terminateEd
Line
Count
Source
79
30
    double terminate(double quantile) {
80
30
        if (_sorted_nums_vec.size() <= 1) {
81
30
            if (_sorted_nums_vec.size() == 1) {
82
0
                _nums = std::move(_sorted_nums_vec[0]);
83
0
            }
84
85
30
            if (_nums.empty()) {
86
                // Although set null here, but the value is 0.0 and the call method just
87
                // get val in aggregate_function_percentile_approx.h
88
0
                return 0.0;
89
0
            }
90
91
30
            if (UNLIKELY(!std::is_sorted(_nums.begin(), _nums.end()))) {
92
0
                pdqsort(_nums.begin(), _nums.end());
93
0
            }
94
95
30
            if (quantile == 1 || _nums.size() == 1) {
96
24
                return _nums.back();
97
24
            }
98
99
6
            double u = (_nums.size() - 1) * quantile;
100
6
            auto index = static_cast<uint32_t>(u);
101
6
            return _nums[index] +
102
6
                   (u - static_cast<double>(index)) * (_nums[index + 1] - _nums[index]);
103
30
        } else {
104
0
            DCHECK(_nums.empty());
105
0
            size_t rows = 0;
106
0
            for (const auto& i : _sorted_nums_vec) {
107
0
                rows += i.size();
108
0
            }
109
0
            const bool reverse = quantile > 0.5 && rows > 2;
110
0
            double u = (rows - 1) * quantile;
111
0
            auto index = static_cast<uint32_t>(u);
112
            // if reverse, the step of target should start 0 like not reverse
113
            // so here rows need to minus index + 2
114
            // eg: rows = 10, index = 5
115
            // if not reverse, so the first number loc is 5, the second number loc is 6
116
            // if reverse, so the second number is 3, the first number is 4
117
            // 5 + 4 = 3 + 6 = 9 = rows - 1.
118
            // the rows must GE 2 beacuse `_sorted_nums_vec` size GE 2
119
0
            size_t target = reverse ? rows - index - 2 : index;
120
0
            if (quantile == 1) {
121
0
                target = 0;
122
0
            }
123
0
            auto [first_number, second_number] = _merge_sort_and_get_numbers(target, reverse);
124
0
            if (quantile == 1) {
125
0
                return second_number;
126
0
            }
127
0
            return first_number + (u - static_cast<double>(index)) * (second_number - first_number);
128
0
        }
129
30
    }
Unexecuted instantiation: _ZN5doris6CountsIfE9terminateEd
_ZN5doris6CountsIdE9terminateEd
Line
Count
Source
79
148
    double terminate(double quantile) {
80
148
        if (_sorted_nums_vec.size() <= 1) {
81
130
            if (_sorted_nums_vec.size() == 1) {
82
32
                _nums = std::move(_sorted_nums_vec[0]);
83
32
            }
84
85
130
            if (_nums.empty()) {
86
                // Although set null here, but the value is 0.0 and the call method just
87
                // get val in aggregate_function_percentile_approx.h
88
0
                return 0.0;
89
0
            }
90
91
130
            if (UNLIKELY(!std::is_sorted(_nums.begin(), _nums.end()))) {
92
0
                pdqsort(_nums.begin(), _nums.end());
93
0
            }
94
95
130
            if (quantile == 1 || _nums.size() == 1) {
96
42
                return _nums.back();
97
42
            }
98
99
88
            double u = (_nums.size() - 1) * quantile;
100
88
            auto index = static_cast<uint32_t>(u);
101
88
            return _nums[index] +
102
88
                   (u - static_cast<double>(index)) * (_nums[index + 1] - _nums[index]);
103
130
        } else {
104
18
            DCHECK(_nums.empty());
105
18
            size_t rows = 0;
106
48
            for (const auto& i : _sorted_nums_vec) {
107
48
                rows += i.size();
108
48
            }
109
18
            const bool reverse = quantile > 0.5 && rows > 2;
110
18
            double u = (rows - 1) * quantile;
111
18
            auto index = static_cast<uint32_t>(u);
112
            // if reverse, the step of target should start 0 like not reverse
113
            // so here rows need to minus index + 2
114
            // eg: rows = 10, index = 5
115
            // if not reverse, so the first number loc is 5, the second number loc is 6
116
            // if reverse, so the second number is 3, the first number is 4
117
            // 5 + 4 = 3 + 6 = 9 = rows - 1.
118
            // the rows must GE 2 beacuse `_sorted_nums_vec` size GE 2
119
18
            size_t target = reverse ? rows - index - 2 : index;
120
18
            if (quantile == 1) {
121
0
                target = 0;
122
0
            }
123
18
            auto [first_number, second_number] = _merge_sort_and_get_numbers(target, reverse);
124
18
            if (quantile == 1) {
125
0
                return second_number;
126
0
            }
127
18
            return first_number + (u - static_cast<double>(index)) * (second_number - first_number);
128
18
        }
129
148
    }
130
131
private:
132
    struct Node {
133
        Ty value;
134
        int array_index;
135
        int64_t element_index;
136
137
905
        auto operator<=>(const Node& other) const { return value <=> other.value; }
Unexecuted instantiation: _ZNK5doris6CountsIhE4NodessERKS2_
Unexecuted instantiation: _ZNK5doris6CountsIaE4NodessERKS2_
_ZNK5doris6CountsIsE4NodessERKS2_
Line
Count
Source
137
19
        auto operator<=>(const Node& other) const { return value <=> other.value; }
_ZNK5doris6CountsIiE4NodessERKS2_
Line
Count
Source
137
481
        auto operator<=>(const Node& other) const { return value <=> other.value; }
_ZNK5doris6CountsIlE4NodessERKS2_
Line
Count
Source
137
349
        auto operator<=>(const Node& other) const { return value <=> other.value; }
Unexecuted instantiation: _ZNK5doris6CountsInE4NodessERKS2_
Unexecuted instantiation: _ZNK5doris6CountsIfE4NodessERKS2_
_ZNK5doris6CountsIdE4NodessERKS2_
Line
Count
Source
137
56
        auto operator<=>(const Node& other) const { return value <=> other.value; }
138
    };
139
140
302
    void _convert_sorted_num_vec_to_nums() {
141
302
        size_t rows = 0;
142
586
        for (const auto& i : _sorted_nums_vec) {
143
586
            rows += i.size();
144
586
        }
145
302
        _nums.resize(rows);
146
302
        size_t count = 0;
147
148
302
        std::priority_queue<Node, std::vector<Node>, std::greater<Node>> min_heap;
149
888
        for (int i = 0; i < _sorted_nums_vec.size(); ++i) {
150
586
            if (!_sorted_nums_vec[i].empty()) {
151
586
                min_heap.emplace(_sorted_nums_vec[i][0], i, 0);
152
586
            }
153
586
        }
154
155
936
        while (!min_heap.empty()) {
156
634
            Node node = min_heap.top();
157
634
            min_heap.pop();
158
634
            _nums[count++] = node.value;
159
634
            if (++node.element_index < _sorted_nums_vec[node.array_index].size()) {
160
48
                node.value = _sorted_nums_vec[node.array_index][node.element_index];
161
48
                min_heap.push(node);
162
48
            }
163
634
        }
164
302
        _sorted_nums_vec.clear();
165
302
    }
Unexecuted instantiation: _ZN5doris6CountsIhE31_convert_sorted_num_vec_to_numsEv
Unexecuted instantiation: _ZN5doris6CountsIaE31_convert_sorted_num_vec_to_numsEv
Unexecuted instantiation: _ZN5doris6CountsIsE31_convert_sorted_num_vec_to_numsEv
_ZN5doris6CountsIiE31_convert_sorted_num_vec_to_numsEv
Line
Count
Source
140
156
    void _convert_sorted_num_vec_to_nums() {
141
156
        size_t rows = 0;
142
339
        for (const auto& i : _sorted_nums_vec) {
143
339
            rows += i.size();
144
339
        }
145
156
        _nums.resize(rows);
146
156
        size_t count = 0;
147
148
156
        std::priority_queue<Node, std::vector<Node>, std::greater<Node>> min_heap;
149
495
        for (int i = 0; i < _sorted_nums_vec.size(); ++i) {
150
339
            if (!_sorted_nums_vec[i].empty()) {
151
339
                min_heap.emplace(_sorted_nums_vec[i][0], i, 0);
152
339
            }
153
339
        }
154
155
531
        while (!min_heap.empty()) {
156
375
            Node node = min_heap.top();
157
375
            min_heap.pop();
158
375
            _nums[count++] = node.value;
159
375
            if (++node.element_index < _sorted_nums_vec[node.array_index].size()) {
160
36
                node.value = _sorted_nums_vec[node.array_index][node.element_index];
161
36
                min_heap.push(node);
162
36
            }
163
375
        }
164
156
        _sorted_nums_vec.clear();
165
156
    }
_ZN5doris6CountsIlE31_convert_sorted_num_vec_to_numsEv
Line
Count
Source
140
146
    void _convert_sorted_num_vec_to_nums() {
141
146
        size_t rows = 0;
142
247
        for (const auto& i : _sorted_nums_vec) {
143
247
            rows += i.size();
144
247
        }
145
146
        _nums.resize(rows);
146
146
        size_t count = 0;
147
148
146
        std::priority_queue<Node, std::vector<Node>, std::greater<Node>> min_heap;
149
393
        for (int i = 0; i < _sorted_nums_vec.size(); ++i) {
150
247
            if (!_sorted_nums_vec[i].empty()) {
151
247
                min_heap.emplace(_sorted_nums_vec[i][0], i, 0);
152
247
            }
153
247
        }
154
155
405
        while (!min_heap.empty()) {
156
259
            Node node = min_heap.top();
157
259
            min_heap.pop();
158
259
            _nums[count++] = node.value;
159
259
            if (++node.element_index < _sorted_nums_vec[node.array_index].size()) {
160
12
                node.value = _sorted_nums_vec[node.array_index][node.element_index];
161
12
                min_heap.push(node);
162
12
            }
163
259
        }
164
146
        _sorted_nums_vec.clear();
165
146
    }
Unexecuted instantiation: _ZN5doris6CountsInE31_convert_sorted_num_vec_to_numsEv
Unexecuted instantiation: _ZN5doris6CountsIfE31_convert_sorted_num_vec_to_numsEv
Unexecuted instantiation: _ZN5doris6CountsIdE31_convert_sorted_num_vec_to_numsEv
166
167
94
    std::pair<Ty, Ty> _merge_sort_and_get_numbers(int64_t target, bool reverse) {
168
94
        Ty first_number = 0, second_number = 0;
169
94
        size_t count = 0;
170
94
        if (reverse) {
171
27
            std::priority_queue<Node> max_heap;
172
99
            for (int i = 0; i < _sorted_nums_vec.size(); ++i) {
173
72
                if (!_sorted_nums_vec[i].empty()) {
174
72
                    max_heap.emplace(_sorted_nums_vec[i][_sorted_nums_vec[i].size() - 1], i,
175
72
                                     _sorted_nums_vec[i].size() - 1);
176
72
                }
177
72
            }
178
179
121
            while (!max_heap.empty()) {
180
121
                Node node = max_heap.top();
181
121
                max_heap.pop();
182
121
                if (count == target) {
183
27
                    second_number = node.value;
184
94
                } else if (count == target + 1) {
185
27
                    first_number = node.value;
186
27
                    break;
187
27
                }
188
94
                ++count;
189
94
                if (--node.element_index >= 0) {
190
78
                    node.value = _sorted_nums_vec[node.array_index][node.element_index];
191
78
                    max_heap.push(node);
192
78
                }
193
94
            }
194
195
67
        } else {
196
67
            std::priority_queue<Node, std::vector<Node>, std::greater<Node>> min_heap;
197
211
            for (int i = 0; i < _sorted_nums_vec.size(); ++i) {
198
144
                if (!_sorted_nums_vec[i].empty()) {
199
144
                    min_heap.emplace(_sorted_nums_vec[i][0], i, 0);
200
144
                }
201
144
            }
202
203
225
            while (!min_heap.empty()) {
204
225
                Node node = min_heap.top();
205
225
                min_heap.pop();
206
225
                if (count == target) {
207
67
                    first_number = node.value;
208
158
                } else if (count == target + 1) {
209
67
                    second_number = node.value;
210
67
                    break;
211
67
                }
212
158
                ++count;
213
158
                if (++node.element_index < _sorted_nums_vec[node.array_index].size()) {
214
138
                    node.value = _sorted_nums_vec[node.array_index][node.element_index];
215
138
                    min_heap.push(node);
216
138
                }
217
158
            }
218
67
        }
219
220
94
        return {first_number, second_number};
221
94
    }
Unexecuted instantiation: _ZN5doris6CountsIhE27_merge_sort_and_get_numbersElb
Unexecuted instantiation: _ZN5doris6CountsIaE27_merge_sort_and_get_numbersElb
_ZN5doris6CountsIsE27_merge_sort_and_get_numbersElb
Line
Count
Source
167
3
    std::pair<Ty, Ty> _merge_sort_and_get_numbers(int64_t target, bool reverse) {
168
3
        Ty first_number = 0, second_number = 0;
169
3
        size_t count = 0;
170
3
        if (reverse) {
171
1
            std::priority_queue<Node> max_heap;
172
3
            for (int i = 0; i < _sorted_nums_vec.size(); ++i) {
173
2
                if (!_sorted_nums_vec[i].empty()) {
174
2
                    max_heap.emplace(_sorted_nums_vec[i][_sorted_nums_vec[i].size() - 1], i,
175
2
                                     _sorted_nums_vec[i].size() - 1);
176
2
                }
177
2
            }
178
179
6
            while (!max_heap.empty()) {
180
6
                Node node = max_heap.top();
181
6
                max_heap.pop();
182
6
                if (count == target) {
183
1
                    second_number = node.value;
184
5
                } else if (count == target + 1) {
185
1
                    first_number = node.value;
186
1
                    break;
187
1
                }
188
5
                ++count;
189
5
                if (--node.element_index >= 0) {
190
5
                    node.value = _sorted_nums_vec[node.array_index][node.element_index];
191
5
                    max_heap.push(node);
192
5
                }
193
5
            }
194
195
2
        } else {
196
2
            std::priority_queue<Node, std::vector<Node>, std::greater<Node>> min_heap;
197
6
            for (int i = 0; i < _sorted_nums_vec.size(); ++i) {
198
4
                if (!_sorted_nums_vec[i].empty()) {
199
4
                    min_heap.emplace(_sorted_nums_vec[i][0], i, 0);
200
4
                }
201
4
            }
202
203
13
            while (!min_heap.empty()) {
204
13
                Node node = min_heap.top();
205
13
                min_heap.pop();
206
13
                if (count == target) {
207
2
                    first_number = node.value;
208
11
                } else if (count == target + 1) {
209
2
                    second_number = node.value;
210
2
                    break;
211
2
                }
212
11
                ++count;
213
11
                if (++node.element_index < _sorted_nums_vec[node.array_index].size()) {
214
11
                    node.value = _sorted_nums_vec[node.array_index][node.element_index];
215
11
                    min_heap.push(node);
216
11
                }
217
11
            }
218
2
        }
219
220
3
        return {first_number, second_number};
221
3
    }
_ZN5doris6CountsIiE27_merge_sort_and_get_numbersElb
Line
Count
Source
167
54
    std::pair<Ty, Ty> _merge_sort_and_get_numbers(int64_t target, bool reverse) {
168
54
        Ty first_number = 0, second_number = 0;
169
54
        size_t count = 0;
170
54
        if (reverse) {
171
8
            std::priority_queue<Node> max_heap;
172
24
            for (int i = 0; i < _sorted_nums_vec.size(); ++i) {
173
16
                if (!_sorted_nums_vec[i].empty()) {
174
16
                    max_heap.emplace(_sorted_nums_vec[i][_sorted_nums_vec[i].size() - 1], i,
175
16
                                     _sorted_nums_vec[i].size() - 1);
176
16
                }
177
16
            }
178
179
35
            while (!max_heap.empty()) {
180
35
                Node node = max_heap.top();
181
35
                max_heap.pop();
182
35
                if (count == target) {
183
8
                    second_number = node.value;
184
27
                } else if (count == target + 1) {
185
8
                    first_number = node.value;
186
8
                    break;
187
8
                }
188
27
                ++count;
189
27
                if (--node.element_index >= 0) {
190
23
                    node.value = _sorted_nums_vec[node.array_index][node.element_index];
191
23
                    max_heap.push(node);
192
23
                }
193
27
            }
194
195
46
        } else {
196
46
            std::priority_queue<Node, std::vector<Node>, std::greater<Node>> min_heap;
197
138
            for (int i = 0; i < _sorted_nums_vec.size(); ++i) {
198
92
                if (!_sorted_nums_vec[i].empty()) {
199
92
                    min_heap.emplace(_sorted_nums_vec[i][0], i, 0);
200
92
                }
201
92
            }
202
203
160
            while (!min_heap.empty()) {
204
160
                Node node = min_heap.top();
205
160
                min_heap.pop();
206
160
                if (count == target) {
207
46
                    first_number = node.value;
208
114
                } else if (count == target + 1) {
209
46
                    second_number = node.value;
210
46
                    break;
211
46
                }
212
114
                ++count;
213
114
                if (++node.element_index < _sorted_nums_vec[node.array_index].size()) {
214
113
                    node.value = _sorted_nums_vec[node.array_index][node.element_index];
215
113
                    min_heap.push(node);
216
113
                }
217
114
            }
218
46
        }
219
220
54
        return {first_number, second_number};
221
54
    }
_ZN5doris6CountsIlE27_merge_sort_and_get_numbersElb
Line
Count
Source
167
19
    std::pair<Ty, Ty> _merge_sort_and_get_numbers(int64_t target, bool reverse) {
168
19
        Ty first_number = 0, second_number = 0;
169
19
        size_t count = 0;
170
19
        if (reverse) {
171
16
            std::priority_queue<Node> max_heap;
172
64
            for (int i = 0; i < _sorted_nums_vec.size(); ++i) {
173
48
                if (!_sorted_nums_vec[i].empty()) {
174
48
                    max_heap.emplace(_sorted_nums_vec[i][_sorted_nums_vec[i].size() - 1], i,
175
48
                                     _sorted_nums_vec[i].size() - 1);
176
48
                }
177
48
            }
178
179
75
            while (!max_heap.empty()) {
180
75
                Node node = max_heap.top();
181
75
                max_heap.pop();
182
75
                if (count == target) {
183
16
                    second_number = node.value;
184
59
                } else if (count == target + 1) {
185
16
                    first_number = node.value;
186
16
                    break;
187
16
                }
188
59
                ++count;
189
59
                if (--node.element_index >= 0) {
190
48
                    node.value = _sorted_nums_vec[node.array_index][node.element_index];
191
48
                    max_heap.push(node);
192
48
                }
193
59
            }
194
195
16
        } else {
196
3
            std::priority_queue<Node, std::vector<Node>, std::greater<Node>> min_heap;
197
9
            for (int i = 0; i < _sorted_nums_vec.size(); ++i) {
198
6
                if (!_sorted_nums_vec[i].empty()) {
199
6
                    min_heap.emplace(_sorted_nums_vec[i][0], i, 0);
200
6
                }
201
6
            }
202
203
12
            while (!min_heap.empty()) {
204
12
                Node node = min_heap.top();
205
12
                min_heap.pop();
206
12
                if (count == target) {
207
3
                    first_number = node.value;
208
9
                } else if (count == target + 1) {
209
3
                    second_number = node.value;
210
3
                    break;
211
3
                }
212
9
                ++count;
213
9
                if (++node.element_index < _sorted_nums_vec[node.array_index].size()) {
214
9
                    node.value = _sorted_nums_vec[node.array_index][node.element_index];
215
9
                    min_heap.push(node);
216
9
                }
217
9
            }
218
3
        }
219
220
19
        return {first_number, second_number};
221
19
    }
Unexecuted instantiation: _ZN5doris6CountsInE27_merge_sort_and_get_numbersElb
Unexecuted instantiation: _ZN5doris6CountsIfE27_merge_sort_and_get_numbersElb
_ZN5doris6CountsIdE27_merge_sort_and_get_numbersElb
Line
Count
Source
167
18
    std::pair<Ty, Ty> _merge_sort_and_get_numbers(int64_t target, bool reverse) {
168
18
        Ty first_number = 0, second_number = 0;
169
18
        size_t count = 0;
170
18
        if (reverse) {
171
2
            std::priority_queue<Node> max_heap;
172
8
            for (int i = 0; i < _sorted_nums_vec.size(); ++i) {
173
6
                if (!_sorted_nums_vec[i].empty()) {
174
6
                    max_heap.emplace(_sorted_nums_vec[i][_sorted_nums_vec[i].size() - 1], i,
175
6
                                     _sorted_nums_vec[i].size() - 1);
176
6
                }
177
6
            }
178
179
5
            while (!max_heap.empty()) {
180
5
                Node node = max_heap.top();
181
5
                max_heap.pop();
182
5
                if (count == target) {
183
2
                    second_number = node.value;
184
3
                } else if (count == target + 1) {
185
2
                    first_number = node.value;
186
2
                    break;
187
2
                }
188
3
                ++count;
189
3
                if (--node.element_index >= 0) {
190
2
                    node.value = _sorted_nums_vec[node.array_index][node.element_index];
191
2
                    max_heap.push(node);
192
2
                }
193
3
            }
194
195
16
        } else {
196
16
            std::priority_queue<Node, std::vector<Node>, std::greater<Node>> min_heap;
197
58
            for (int i = 0; i < _sorted_nums_vec.size(); ++i) {
198
42
                if (!_sorted_nums_vec[i].empty()) {
199
42
                    min_heap.emplace(_sorted_nums_vec[i][0], i, 0);
200
42
                }
201
42
            }
202
203
40
            while (!min_heap.empty()) {
204
40
                Node node = min_heap.top();
205
40
                min_heap.pop();
206
40
                if (count == target) {
207
16
                    first_number = node.value;
208
24
                } else if (count == target + 1) {
209
16
                    second_number = node.value;
210
16
                    break;
211
16
                }
212
24
                ++count;
213
24
                if (++node.element_index < _sorted_nums_vec[node.array_index].size()) {
214
5
                    node.value = _sorted_nums_vec[node.array_index][node.element_index];
215
5
                    min_heap.push(node);
216
5
                }
217
24
            }
218
16
        }
219
220
18
        return {first_number, second_number};
221
18
    }
222
223
    vectorized::PODArray<Ty> _nums;
224
    std::vector<vectorized::PODArray<Ty>> _sorted_nums_vec;
225
};
226
227
} // namespace doris