Coverage Report

Created: 2025-05-08 11:08

/root/doris/be/src/util/counts.h
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <pdqsort.h>
21
22
#include <algorithm>
23
#include <cmath>
24
#include <queue>
25
26
#include "udf/udf.h"
27
#include "vec/common/pod_array.h"
28
#include "vec/common/string_buffer.hpp"
29
#include "vec/io/io_helper.h"
30
31
namespace doris {
32
33
template <typename Ty>
34
class Counts {
35
public:
36
7.61k
    Counts() = default;
Unexecuted instantiation: _ZN5doris6CountsIhEC2Ev
_ZN5doris6CountsIaEC2Ev
Line
Count
Source
36
68
    Counts() = default;
_ZN5doris6CountsIsEC2Ev
Line
Count
Source
36
486
    Counts() = default;
_ZN5doris6CountsIiEC2Ev
Line
Count
Source
36
4.72k
    Counts() = default;
_ZN5doris6CountsIlEC2Ev
Line
Count
Source
36
1.82k
    Counts() = default;
_ZN5doris6CountsInEC2Ev
Line
Count
Source
36
60
    Counts() = default;
Unexecuted instantiation: _ZN5doris6CountsIfEC2Ev
_ZN5doris6CountsIdEC2Ev
Line
Count
Source
36
456
    Counts() = default;
37
38
2.68k
    void merge(Counts* other) {
39
2.68k
        if (other != nullptr && !other->_nums.empty()) {
40
2.68k
            _sorted_nums_vec.emplace_back(std::move(other->_nums));
41
2.68k
        }
42
2.68k
    }
Unexecuted instantiation: _ZN5doris6CountsIhE5mergeEPS1_
Unexecuted instantiation: _ZN5doris6CountsIaE5mergeEPS1_
_ZN5doris6CountsIsE5mergeEPS1_
Line
Count
Source
38
24
    void merge(Counts* other) {
39
24
        if (other != nullptr && !other->_nums.empty()) {
40
24
            _sorted_nums_vec.emplace_back(std::move(other->_nums));
41
24
        }
42
24
    }
_ZN5doris6CountsIiE5mergeEPS1_
Line
Count
Source
38
1.78k
    void merge(Counts* other) {
39
1.78k
        if (other != nullptr && !other->_nums.empty()) {
40
1.78k
            _sorted_nums_vec.emplace_back(std::move(other->_nums));
41
1.78k
        }
42
1.78k
    }
_ZN5doris6CountsIlE5mergeEPS1_
Line
Count
Source
38
756
    void merge(Counts* other) {
39
756
        if (other != nullptr && !other->_nums.empty()) {
40
756
            _sorted_nums_vec.emplace_back(std::move(other->_nums));
41
756
        }
42
756
    }
Unexecuted instantiation: _ZN5doris6CountsInE5mergeEPS1_
Unexecuted instantiation: _ZN5doris6CountsIfE5mergeEPS1_
_ZN5doris6CountsIdE5mergeEPS1_
Line
Count
Source
38
124
    void merge(Counts* other) {
39
124
        if (other != nullptr && !other->_nums.empty()) {
40
124
            _sorted_nums_vec.emplace_back(std::move(other->_nums));
41
124
        }
42
124
    }
43
44
    void increment(Ty key, uint32_t i) {
45
        auto old_size = _nums.size();
46
        _nums.resize(_nums.size() + i);
47
        for (uint32_t j = 0; j < i; ++j) {
48
            _nums[old_size + j] = key;
49
        }
50
    }
51
52
6.33k
    void increment(Ty key) { _nums.push_back(key); }
Unexecuted instantiation: _ZN5doris6CountsIhE9incrementEh
_ZN5doris6CountsIaE9incrementEa
Line
Count
Source
52
128
    void increment(Ty key) { _nums.push_back(key); }
_ZN5doris6CountsIsE9incrementEs
Line
Count
Source
52
1.57k
    void increment(Ty key) { _nums.push_back(key); }
_ZN5doris6CountsIiE9incrementEi
Line
Count
Source
52
3.20k
    void increment(Ty key) { _nums.push_back(key); }
_ZN5doris6CountsIlE9incrementEl
Line
Count
Source
52
824
    void increment(Ty key) { _nums.push_back(key); }
_ZN5doris6CountsInE9incrementEn
Line
Count
Source
52
72
    void increment(Ty key) { _nums.push_back(key); }
Unexecuted instantiation: _ZN5doris6CountsIfE9incrementEf
_ZN5doris6CountsIdE9incrementEd
Line
Count
Source
52
536
    void increment(Ty key) { _nums.push_back(key); }
53
54
10
    void increment_batch(const vectorized::PaddedPODArray<Ty>& keys) {
55
10
        _nums.insert(keys.begin(), keys.end());
56
10
    }
Unexecuted instantiation: _ZN5doris6CountsIhE15increment_batchERKNS_10vectorized8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorEEELm16ELm15EEE
Unexecuted instantiation: _ZN5doris6CountsIaE15increment_batchERKNS_10vectorized8PODArrayIaLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorEEELm16ELm15EEE
Unexecuted instantiation: _ZN5doris6CountsIsE15increment_batchERKNS_10vectorized8PODArrayIsLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorEEELm16ELm15EEE
Unexecuted instantiation: _ZN5doris6CountsIiE15increment_batchERKNS_10vectorized8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorEEELm16ELm15EEE
_ZN5doris6CountsIlE15increment_batchERKNS_10vectorized8PODArrayIlLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorEEELm16ELm15EEE
Line
Count
Source
54
10
    void increment_batch(const vectorized::PaddedPODArray<Ty>& keys) {
55
10
        _nums.insert(keys.begin(), keys.end());
56
10
    }
Unexecuted instantiation: _ZN5doris6CountsInE15increment_batchERKNS_10vectorized8PODArrayInLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorEEELm16ELm15EEE
Unexecuted instantiation: _ZN5doris6CountsIfE15increment_batchERKNS_10vectorized8PODArrayIfLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorEEELm16ELm15EEE
Unexecuted instantiation: _ZN5doris6CountsIdE15increment_batchERKNS_10vectorized8PODArrayIdLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorEEELm16ELm15EEE
57
58
3.62k
    void serialize(vectorized::BufferWritable& buf) {
59
3.62k
        if (!_nums.empty()) {
60
2.90k
            pdqsort(_nums.begin(), _nums.end());
61
2.90k
            size_t size = _nums.size();
62
2.90k
            write_binary(size, buf);
63
2.90k
            buf.write(reinterpret_cast<const char*>(_nums.data()), sizeof(Ty) * size);
64
2.90k
        } else {
65
            // convert _sorted_nums_vec to _nums and do seiralize again
66
716
            _convert_sorted_num_vec_to_nums();
67
716
            serialize(buf);
68
716
        }
69
3.62k
    }
Unexecuted instantiation: _ZN5doris6CountsIhE9serializeERNS_10vectorized14BufferWritableE
Unexecuted instantiation: _ZN5doris6CountsIaE9serializeERNS_10vectorized14BufferWritableE
_ZN5doris6CountsIsE9serializeERNS_10vectorized14BufferWritableE
Line
Count
Source
58
24
    void serialize(vectorized::BufferWritable& buf) {
59
24
        if (!_nums.empty()) {
60
24
            pdqsort(_nums.begin(), _nums.end());
61
24
            size_t size = _nums.size();
62
24
            write_binary(size, buf);
63
24
            buf.write(reinterpret_cast<const char*>(_nums.data()), sizeof(Ty) * size);
64
24
        } else {
65
            // convert _sorted_nums_vec to _nums and do seiralize again
66
0
            _convert_sorted_num_vec_to_nums();
67
0
            serialize(buf);
68
0
        }
69
24
    }
_ZN5doris6CountsIiE9serializeERNS_10vectorized14BufferWritableE
Line
Count
Source
58
2.32k
    void serialize(vectorized::BufferWritable& buf) {
59
2.32k
        if (!_nums.empty()) {
60
1.94k
            pdqsort(_nums.begin(), _nums.end());
61
1.94k
            size_t size = _nums.size();
62
1.94k
            write_binary(size, buf);
63
1.94k
            buf.write(reinterpret_cast<const char*>(_nums.data()), sizeof(Ty) * size);
64
1.94k
        } else {
65
            // convert _sorted_nums_vec to _nums and do seiralize again
66
384
            _convert_sorted_num_vec_to_nums();
67
384
            serialize(buf);
68
384
        }
69
2.32k
    }
_ZN5doris6CountsIlE9serializeERNS_10vectorized14BufferWritableE
Line
Count
Source
58
1.15k
    void serialize(vectorized::BufferWritable& buf) {
59
1.15k
        if (!_nums.empty()) {
60
820
            pdqsort(_nums.begin(), _nums.end());
61
820
            size_t size = _nums.size();
62
820
            write_binary(size, buf);
63
820
            buf.write(reinterpret_cast<const char*>(_nums.data()), sizeof(Ty) * size);
64
820
        } else {
65
            // convert _sorted_nums_vec to _nums and do seiralize again
66
332
            _convert_sorted_num_vec_to_nums();
67
332
            serialize(buf);
68
332
        }
69
1.15k
    }
Unexecuted instantiation: _ZN5doris6CountsInE9serializeERNS_10vectorized14BufferWritableE
Unexecuted instantiation: _ZN5doris6CountsIfE9serializeERNS_10vectorized14BufferWritableE
_ZN5doris6CountsIdE9serializeERNS_10vectorized14BufferWritableE
Line
Count
Source
58
124
    void serialize(vectorized::BufferWritable& buf) {
59
124
        if (!_nums.empty()) {
60
124
            pdqsort(_nums.begin(), _nums.end());
61
124
            size_t size = _nums.size();
62
124
            write_binary(size, buf);
63
124
            buf.write(reinterpret_cast<const char*>(_nums.data()), sizeof(Ty) * size);
64
124
        } else {
65
            // convert _sorted_nums_vec to _nums and do seiralize again
66
0
            _convert_sorted_num_vec_to_nums();
67
0
            serialize(buf);
68
0
        }
69
124
    }
70
71
2.68k
    void unserialize(vectorized::BufferReadable& buf) {
72
2.68k
        size_t size;
73
2.68k
        read_binary(size, buf);
74
2.68k
        _nums.resize(size);
75
2.68k
        auto buff = buf.read(sizeof(Ty) * size);
76
2.68k
        memcpy(_nums.data(), buff.data, buff.size);
77
2.68k
    }
Unexecuted instantiation: _ZN5doris6CountsIhE11unserializeERNS_10vectorized14BufferReadableE
Unexecuted instantiation: _ZN5doris6CountsIaE11unserializeERNS_10vectorized14BufferReadableE
_ZN5doris6CountsIsE11unserializeERNS_10vectorized14BufferReadableE
Line
Count
Source
71
24
    void unserialize(vectorized::BufferReadable& buf) {
72
24
        size_t size;
73
24
        read_binary(size, buf);
74
24
        _nums.resize(size);
75
24
        auto buff = buf.read(sizeof(Ty) * size);
76
24
        memcpy(_nums.data(), buff.data, buff.size);
77
24
    }
_ZN5doris6CountsIiE11unserializeERNS_10vectorized14BufferReadableE
Line
Count
Source
71
1.78k
    void unserialize(vectorized::BufferReadable& buf) {
72
1.78k
        size_t size;
73
1.78k
        read_binary(size, buf);
74
1.78k
        _nums.resize(size);
75
1.78k
        auto buff = buf.read(sizeof(Ty) * size);
76
1.78k
        memcpy(_nums.data(), buff.data, buff.size);
77
1.78k
    }
_ZN5doris6CountsIlE11unserializeERNS_10vectorized14BufferReadableE
Line
Count
Source
71
756
    void unserialize(vectorized::BufferReadable& buf) {
72
756
        size_t size;
73
756
        read_binary(size, buf);
74
756
        _nums.resize(size);
75
756
        auto buff = buf.read(sizeof(Ty) * size);
76
756
        memcpy(_nums.data(), buff.data, buff.size);
77
756
    }
Unexecuted instantiation: _ZN5doris6CountsInE11unserializeERNS_10vectorized14BufferReadableE
Unexecuted instantiation: _ZN5doris6CountsIfE11unserializeERNS_10vectorized14BufferReadableE
_ZN5doris6CountsIdE11unserializeERNS_10vectorized14BufferReadableE
Line
Count
Source
71
124
    void unserialize(vectorized::BufferReadable& buf) {
72
124
        size_t size;
73
124
        read_binary(size, buf);
74
124
        _nums.resize(size);
75
124
        auto buff = buf.read(sizeof(Ty) * size);
76
124
        memcpy(_nums.data(), buff.data, buff.size);
77
124
    }
78
79
2.28k
    double terminate(double quantile) {
80
2.28k
        if (_sorted_nums_vec.size() <= 1) {
81
2.05k
            if (_sorted_nums_vec.size() == 1) {
82
798
                _nums = std::move(_sorted_nums_vec[0]);
83
798
            }
84
85
2.05k
            if (_nums.empty()) {
86
                // Although set null here, but the value is 0.0 and the call method just
87
                // get val in aggregate_function_percentile_approx.h
88
0
                return 0.0;
89
0
            }
90
91
2.05k
            if (UNLIKELY(!std::is_sorted(_nums.begin(), _nums.end()))) {
92
401
                pdqsort(_nums.begin(), _nums.end());
93
401
            }
94
95
2.05k
            if (quantile == 1 || _nums.size() == 1) {
96
1.00k
                return _nums.back();
97
1.00k
            }
98
99
1.05k
            double u = (_nums.size() - 1) * quantile;
100
1.05k
            auto index = static_cast<uint32_t>(u);
101
1.05k
            return _nums[index] +
102
1.05k
                   (u - static_cast<double>(index)) * (_nums[index + 1] - _nums[index]);
103
2.05k
        } else {
104
231
            DCHECK(_nums.empty());
105
231
            size_t rows = 0;
106
622
            for (const auto& i : _sorted_nums_vec) {
107
622
                rows += i.size();
108
622
            }
109
231
            const bool reverse = quantile > 0.5 && rows > 2;
110
231
            double u = (rows - 1) * quantile;
111
231
            auto index = static_cast<uint32_t>(u);
112
            // if reverse, the step of target should start 0 like not reverse
113
            // so here rows need to minus index + 2
114
            // eg: rows = 10, index = 5
115
            // if not reverse, so the first number loc is 5, the second number loc is 6
116
            // if reverse, so the second number is 3, the first number is 4
117
            // 5 + 4 = 3 + 6 = 9 = rows - 1.
118
            // the rows must GE 2 beacuse `_sorted_nums_vec` size GE 2
119
231
            size_t target = reverse ? rows - index - 2 : index;
120
231
            if (quantile == 1) {
121
0
                target = 0;
122
0
            }
123
231
            auto [first_number, second_number] = _merge_sort_and_get_numbers(target, reverse);
124
231
            if (quantile == 1) {
125
0
                return second_number;
126
0
            }
127
231
            return first_number + (u - static_cast<double>(index)) * (second_number - first_number);
128
231
        }
129
2.28k
    }
Unexecuted instantiation: _ZN5doris6CountsIhE9terminateEd
_ZN5doris6CountsIaE9terminateEd
Line
Count
Source
79
116
    double terminate(double quantile) {
80
116
        if (_sorted_nums_vec.size() <= 1) {
81
116
            if (_sorted_nums_vec.size() == 1) {
82
0
                _nums = std::move(_sorted_nums_vec[0]);
83
0
            }
84
85
116
            if (_nums.empty()) {
86
                // Although set null here, but the value is 0.0 and the call method just
87
                // get val in aggregate_function_percentile_approx.h
88
0
                return 0.0;
89
0
            }
90
91
116
            if (UNLIKELY(!std::is_sorted(_nums.begin(), _nums.end()))) {
92
48
                pdqsort(_nums.begin(), _nums.end());
93
48
            }
94
95
116
            if (quantile == 1 || _nums.size() == 1) {
96
68
                return _nums.back();
97
68
            }
98
99
48
            double u = (_nums.size() - 1) * quantile;
100
48
            auto index = static_cast<uint32_t>(u);
101
48
            return _nums[index] +
102
48
                   (u - static_cast<double>(index)) * (_nums[index + 1] - _nums[index]);
103
116
        } else {
104
0
            DCHECK(_nums.empty());
105
0
            size_t rows = 0;
106
0
            for (const auto& i : _sorted_nums_vec) {
107
0
                rows += i.size();
108
0
            }
109
0
            const bool reverse = quantile > 0.5 && rows > 2;
110
0
            double u = (rows - 1) * quantile;
111
0
            auto index = static_cast<uint32_t>(u);
112
            // if reverse, the step of target should start 0 like not reverse
113
            // so here rows need to minus index + 2
114
            // eg: rows = 10, index = 5
115
            // if not reverse, so the first number loc is 5, the second number loc is 6
116
            // if reverse, so the second number is 3, the first number is 4
117
            // 5 + 4 = 3 + 6 = 9 = rows - 1.
118
            // the rows must GE 2 beacuse `_sorted_nums_vec` size GE 2
119
0
            size_t target = reverse ? rows - index - 2 : index;
120
0
            if (quantile == 1) {
121
0
                target = 0;
122
0
            }
123
0
            auto [first_number, second_number] = _merge_sort_and_get_numbers(target, reverse);
124
0
            if (quantile == 1) {
125
0
                return second_number;
126
0
            }
127
0
            return first_number + (u - static_cast<double>(index)) * (second_number - first_number);
128
0
        }
129
116
    }
_ZN5doris6CountsIsE9terminateEd
Line
Count
Source
79
594
    double terminate(double quantile) {
80
594
        if (_sorted_nums_vec.size() <= 1) {
81
588
            if (_sorted_nums_vec.size() == 1) {
82
0
                _nums = std::move(_sorted_nums_vec[0]);
83
0
            }
84
85
588
            if (_nums.empty()) {
86
                // Although set null here, but the value is 0.0 and the call method just
87
                // get val in aggregate_function_percentile_approx.h
88
0
                return 0.0;
89
0
            }
90
91
588
            if (UNLIKELY(!std::is_sorted(_nums.begin(), _nums.end()))) {
92
330
                pdqsort(_nums.begin(), _nums.end());
93
330
            }
94
95
588
            if (quantile == 1 || _nums.size() == 1) {
96
174
                return _nums.back();
97
174
            }
98
99
414
            double u = (_nums.size() - 1) * quantile;
100
414
            auto index = static_cast<uint32_t>(u);
101
414
            return _nums[index] +
102
414
                   (u - static_cast<double>(index)) * (_nums[index + 1] - _nums[index]);
103
588
        } else {
104
6
            DCHECK(_nums.empty());
105
6
            size_t rows = 0;
106
24
            for (const auto& i : _sorted_nums_vec) {
107
24
                rows += i.size();
108
24
            }
109
6
            const bool reverse = quantile > 0.5 && rows > 2;
110
6
            double u = (rows - 1) * quantile;
111
6
            auto index = static_cast<uint32_t>(u);
112
            // if reverse, the step of target should start 0 like not reverse
113
            // so here rows need to minus index + 2
114
            // eg: rows = 10, index = 5
115
            // if not reverse, so the first number loc is 5, the second number loc is 6
116
            // if reverse, so the second number is 3, the first number is 4
117
            // 5 + 4 = 3 + 6 = 9 = rows - 1.
118
            // the rows must GE 2 beacuse `_sorted_nums_vec` size GE 2
119
6
            size_t target = reverse ? rows - index - 2 : index;
120
6
            if (quantile == 1) {
121
0
                target = 0;
122
0
            }
123
6
            auto [first_number, second_number] = _merge_sort_and_get_numbers(target, reverse);
124
6
            if (quantile == 1) {
125
0
                return second_number;
126
0
            }
127
6
            return first_number + (u - static_cast<double>(index)) * (second_number - first_number);
128
6
        }
129
594
    }
_ZN5doris6CountsIiE9terminateEd
Line
Count
Source
79
990
    double terminate(double quantile) {
80
990
        if (_sorted_nums_vec.size() <= 1) {
81
818
            if (_sorted_nums_vec.size() == 1) {
82
630
                _nums = std::move(_sorted_nums_vec[0]);
83
630
            }
84
85
818
            if (_nums.empty()) {
86
                // Although set null here, but the value is 0.0 and the call method just
87
                // get val in aggregate_function_percentile_approx.h
88
0
                return 0.0;
89
0
            }
90
91
818
            if (UNLIKELY(!std::is_sorted(_nums.begin(), _nums.end()))) {
92
22
                pdqsort(_nums.begin(), _nums.end());
93
22
            }
94
95
818
            if (quantile == 1 || _nums.size() == 1) {
96
508
                return _nums.back();
97
508
            }
98
99
310
            double u = (_nums.size() - 1) * quantile;
100
310
            auto index = static_cast<uint32_t>(u);
101
310
            return _nums[index] +
102
310
                   (u - static_cast<double>(index)) * (_nums[index + 1] - _nums[index]);
103
818
        } else {
104
172
            DCHECK(_nums.empty());
105
172
            size_t rows = 0;
106
404
            for (const auto& i : _sorted_nums_vec) {
107
404
                rows += i.size();
108
404
            }
109
172
            const bool reverse = quantile > 0.5 && rows > 2;
110
172
            double u = (rows - 1) * quantile;
111
172
            auto index = static_cast<uint32_t>(u);
112
            // if reverse, the step of target should start 0 like not reverse
113
            // so here rows need to minus index + 2
114
            // eg: rows = 10, index = 5
115
            // if not reverse, so the first number loc is 5, the second number loc is 6
116
            // if reverse, so the second number is 3, the first number is 4
117
            // 5 + 4 = 3 + 6 = 9 = rows - 1.
118
            // the rows must GE 2 beacuse `_sorted_nums_vec` size GE 2
119
172
            size_t target = reverse ? rows - index - 2 : index;
120
172
            if (quantile == 1) {
121
0
                target = 0;
122
0
            }
123
172
            auto [first_number, second_number] = _merge_sort_and_get_numbers(target, reverse);
124
172
            if (quantile == 1) {
125
0
                return second_number;
126
0
            }
127
172
            return first_number + (u - static_cast<double>(index)) * (second_number - first_number);
128
172
        }
129
990
    }
_ZN5doris6CountsIlE9terminateEd
Line
Count
Source
79
233
    double terminate(double quantile) {
80
233
        if (_sorted_nums_vec.size() <= 1) {
81
192
            if (_sorted_nums_vec.size() == 1) {
82
80
                _nums = std::move(_sorted_nums_vec[0]);
83
80
            }
84
85
192
            if (_nums.empty()) {
86
                // Although set null here, but the value is 0.0 and the call method just
87
                // get val in aggregate_function_percentile_approx.h
88
0
                return 0.0;
89
0
            }
90
91
192
            if (UNLIKELY(!std::is_sorted(_nums.begin(), _nums.end()))) {
92
1
                pdqsort(_nums.begin(), _nums.end());
93
1
            }
94
95
192
            if (quantile == 1 || _nums.size() == 1) {
96
120
                return _nums.back();
97
120
            }
98
99
72
            double u = (_nums.size() - 1) * quantile;
100
72
            auto index = static_cast<uint32_t>(u);
101
72
            return _nums[index] +
102
72
                   (u - static_cast<double>(index)) * (_nums[index + 1] - _nums[index]);
103
192
        } else {
104
41
            DCHECK(_nums.empty());
105
41
            size_t rows = 0;
106
158
            for (const auto& i : _sorted_nums_vec) {
107
158
                rows += i.size();
108
158
            }
109
41
            const bool reverse = quantile > 0.5 && rows > 2;
110
41
            double u = (rows - 1) * quantile;
111
41
            auto index = static_cast<uint32_t>(u);
112
            // if reverse, the step of target should start 0 like not reverse
113
            // so here rows need to minus index + 2
114
            // eg: rows = 10, index = 5
115
            // if not reverse, so the first number loc is 5, the second number loc is 6
116
            // if reverse, so the second number is 3, the first number is 4
117
            // 5 + 4 = 3 + 6 = 9 = rows - 1.
118
            // the rows must GE 2 beacuse `_sorted_nums_vec` size GE 2
119
41
            size_t target = reverse ? rows - index - 2 : index;
120
41
            if (quantile == 1) {
121
0
                target = 0;
122
0
            }
123
41
            auto [first_number, second_number] = _merge_sort_and_get_numbers(target, reverse);
124
41
            if (quantile == 1) {
125
0
                return second_number;
126
0
            }
127
41
            return first_number + (u - static_cast<double>(index)) * (second_number - first_number);
128
41
        }
129
233
    }
_ZN5doris6CountsInE9terminateEd
Line
Count
Source
79
60
    double terminate(double quantile) {
80
60
        if (_sorted_nums_vec.size() <= 1) {
81
60
            if (_sorted_nums_vec.size() == 1) {
82
0
                _nums = std::move(_sorted_nums_vec[0]);
83
0
            }
84
85
60
            if (_nums.empty()) {
86
                // Although set null here, but the value is 0.0 and the call method just
87
                // get val in aggregate_function_percentile_approx.h
88
0
                return 0.0;
89
0
            }
90
91
60
            if (UNLIKELY(!std::is_sorted(_nums.begin(), _nums.end()))) {
92
0
                pdqsort(_nums.begin(), _nums.end());
93
0
            }
94
95
60
            if (quantile == 1 || _nums.size() == 1) {
96
48
                return _nums.back();
97
48
            }
98
99
12
            double u = (_nums.size() - 1) * quantile;
100
12
            auto index = static_cast<uint32_t>(u);
101
12
            return _nums[index] +
102
12
                   (u - static_cast<double>(index)) * (_nums[index + 1] - _nums[index]);
103
60
        } else {
104
0
            DCHECK(_nums.empty());
105
0
            size_t rows = 0;
106
0
            for (const auto& i : _sorted_nums_vec) {
107
0
                rows += i.size();
108
0
            }
109
0
            const bool reverse = quantile > 0.5 && rows > 2;
110
0
            double u = (rows - 1) * quantile;
111
0
            auto index = static_cast<uint32_t>(u);
112
            // if reverse, the step of target should start 0 like not reverse
113
            // so here rows need to minus index + 2
114
            // eg: rows = 10, index = 5
115
            // if not reverse, so the first number loc is 5, the second number loc is 6
116
            // if reverse, so the second number is 3, the first number is 4
117
            // 5 + 4 = 3 + 6 = 9 = rows - 1.
118
            // the rows must GE 2 beacuse `_sorted_nums_vec` size GE 2
119
0
            size_t target = reverse ? rows - index - 2 : index;
120
0
            if (quantile == 1) {
121
0
                target = 0;
122
0
            }
123
0
            auto [first_number, second_number] = _merge_sort_and_get_numbers(target, reverse);
124
0
            if (quantile == 1) {
125
0
                return second_number;
126
0
            }
127
0
            return first_number + (u - static_cast<double>(index)) * (second_number - first_number);
128
0
        }
129
60
    }
Unexecuted instantiation: _ZN5doris6CountsIfE9terminateEd
_ZN5doris6CountsIdE9terminateEd
Line
Count
Source
79
296
    double terminate(double quantile) {
80
296
        if (_sorted_nums_vec.size() <= 1) {
81
284
            if (_sorted_nums_vec.size() == 1) {
82
88
                _nums = std::move(_sorted_nums_vec[0]);
83
88
            }
84
85
284
            if (_nums.empty()) {
86
                // Although set null here, but the value is 0.0 and the call method just
87
                // get val in aggregate_function_percentile_approx.h
88
0
                return 0.0;
89
0
            }
90
91
284
            if (UNLIKELY(!std::is_sorted(_nums.begin(), _nums.end()))) {
92
0
                pdqsort(_nums.begin(), _nums.end());
93
0
            }
94
95
284
            if (quantile == 1 || _nums.size() == 1) {
96
84
                return _nums.back();
97
84
            }
98
99
200
            double u = (_nums.size() - 1) * quantile;
100
200
            auto index = static_cast<uint32_t>(u);
101
200
            return _nums[index] +
102
200
                   (u - static_cast<double>(index)) * (_nums[index + 1] - _nums[index]);
103
284
        } else {
104
12
            DCHECK(_nums.empty());
105
12
            size_t rows = 0;
106
36
            for (const auto& i : _sorted_nums_vec) {
107
36
                rows += i.size();
108
36
            }
109
12
            const bool reverse = quantile > 0.5 && rows > 2;
110
12
            double u = (rows - 1) * quantile;
111
12
            auto index = static_cast<uint32_t>(u);
112
            // if reverse, the step of target should start 0 like not reverse
113
            // so here rows need to minus index + 2
114
            // eg: rows = 10, index = 5
115
            // if not reverse, so the first number loc is 5, the second number loc is 6
116
            // if reverse, so the second number is 3, the first number is 4
117
            // 5 + 4 = 3 + 6 = 9 = rows - 1.
118
            // the rows must GE 2 beacuse `_sorted_nums_vec` size GE 2
119
12
            size_t target = reverse ? rows - index - 2 : index;
120
12
            if (quantile == 1) {
121
0
                target = 0;
122
0
            }
123
12
            auto [first_number, second_number] = _merge_sort_and_get_numbers(target, reverse);
124
12
            if (quantile == 1) {
125
0
                return second_number;
126
0
            }
127
12
            return first_number + (u - static_cast<double>(index)) * (second_number - first_number);
128
12
        }
129
296
    }
130
131
private:
132
    struct Node {
133
        Ty value;
134
        int array_index;
135
        int64_t element_index;
136
137
2.30k
        auto operator<=>(const Node& other) const { return value <=> other.value; }
Unexecuted instantiation: _ZNK5doris6CountsIhE4NodessERKS2_
Unexecuted instantiation: _ZNK5doris6CountsIaE4NodessERKS2_
_ZNK5doris6CountsIsE4NodessERKS2_
Line
Count
Source
137
136
        auto operator<=>(const Node& other) const { return value <=> other.value; }
_ZNK5doris6CountsIiE4NodessERKS2_
Line
Count
Source
137
1.29k
        auto operator<=>(const Node& other) const { return value <=> other.value; }
_ZNK5doris6CountsIlE4NodessERKS2_
Line
Count
Source
137
839
        auto operator<=>(const Node& other) const { return value <=> other.value; }
Unexecuted instantiation: _ZNK5doris6CountsInE4NodessERKS2_
Unexecuted instantiation: _ZNK5doris6CountsIfE4NodessERKS2_
_ZNK5doris6CountsIdE4NodessERKS2_
Line
Count
Source
137
36
        auto operator<=>(const Node& other) const { return value <=> other.value; }
138
    };
139
140
716
    void _convert_sorted_num_vec_to_nums() {
141
716
        size_t rows = 0;
142
1.26k
        for (const auto& i : _sorted_nums_vec) {
143
1.26k
            rows += i.size();
144
1.26k
        }
145
716
        _nums.resize(rows);
146
716
        size_t count = 0;
147
148
716
        std::priority_queue<Node, std::vector<Node>, std::greater<Node>> min_heap;
149
1.98k
        for (int i = 0; i < _sorted_nums_vec.size(); ++i) {
150
1.26k
            if (!_sorted_nums_vec[i].empty()) {
151
1.26k
                min_heap.emplace(_sorted_nums_vec[i][0], i, 0);
152
1.26k
            }
153
1.26k
        }
154
155
2.16k
        while (!min_heap.empty()) {
156
1.45k
            Node node = min_heap.top();
157
1.45k
            min_heap.pop();
158
1.45k
            _nums[count++] = node.value;
159
1.45k
            if (++node.element_index < _sorted_nums_vec[node.array_index].size()) {
160
184
                node.value = _sorted_nums_vec[node.array_index][node.element_index];
161
184
                min_heap.push(node);
162
184
            }
163
1.45k
        }
164
716
        _sorted_nums_vec.clear();
165
716
    }
Unexecuted instantiation: _ZN5doris6CountsIhE31_convert_sorted_num_vec_to_numsEv
Unexecuted instantiation: _ZN5doris6CountsIaE31_convert_sorted_num_vec_to_numsEv
Unexecuted instantiation: _ZN5doris6CountsIsE31_convert_sorted_num_vec_to_numsEv
_ZN5doris6CountsIiE31_convert_sorted_num_vec_to_numsEv
Line
Count
Source
140
384
    void _convert_sorted_num_vec_to_nums() {
141
384
        size_t rows = 0;
142
750
        for (const auto& i : _sorted_nums_vec) {
143
750
            rows += i.size();
144
750
        }
145
384
        _nums.resize(rows);
146
384
        size_t count = 0;
147
148
384
        std::priority_queue<Node, std::vector<Node>, std::greater<Node>> min_heap;
149
1.13k
        for (int i = 0; i < _sorted_nums_vec.size(); ++i) {
150
750
            if (!_sorted_nums_vec[i].empty()) {
151
750
                min_heap.emplace(_sorted_nums_vec[i][0], i, 0);
152
750
            }
153
750
        }
154
155
1.27k
        while (!min_heap.empty()) {
156
888
            Node node = min_heap.top();
157
888
            min_heap.pop();
158
888
            _nums[count++] = node.value;
159
888
            if (++node.element_index < _sorted_nums_vec[node.array_index].size()) {
160
138
                node.value = _sorted_nums_vec[node.array_index][node.element_index];
161
138
                min_heap.push(node);
162
138
            }
163
888
        }
164
384
        _sorted_nums_vec.clear();
165
384
    }
_ZN5doris6CountsIlE31_convert_sorted_num_vec_to_numsEv
Line
Count
Source
140
332
    void _convert_sorted_num_vec_to_nums() {
141
332
        size_t rows = 0;
142
518
        for (const auto& i : _sorted_nums_vec) {
143
518
            rows += i.size();
144
518
        }
145
332
        _nums.resize(rows);
146
332
        size_t count = 0;
147
148
332
        std::priority_queue<Node, std::vector<Node>, std::greater<Node>> min_heap;
149
850
        for (int i = 0; i < _sorted_nums_vec.size(); ++i) {
150
518
            if (!_sorted_nums_vec[i].empty()) {
151
518
                min_heap.emplace(_sorted_nums_vec[i][0], i, 0);
152
518
            }
153
518
        }
154
155
896
        while (!min_heap.empty()) {
156
564
            Node node = min_heap.top();
157
564
            min_heap.pop();
158
564
            _nums[count++] = node.value;
159
564
            if (++node.element_index < _sorted_nums_vec[node.array_index].size()) {
160
46
                node.value = _sorted_nums_vec[node.array_index][node.element_index];
161
46
                min_heap.push(node);
162
46
            }
163
564
        }
164
332
        _sorted_nums_vec.clear();
165
332
    }
Unexecuted instantiation: _ZN5doris6CountsInE31_convert_sorted_num_vec_to_numsEv
Unexecuted instantiation: _ZN5doris6CountsIfE31_convert_sorted_num_vec_to_numsEv
Unexecuted instantiation: _ZN5doris6CountsIdE31_convert_sorted_num_vec_to_numsEv
166
167
231
    std::pair<Ty, Ty> _merge_sort_and_get_numbers(int64_t target, bool reverse) {
168
231
        Ty first_number = 0, second_number = 0;
169
231
        size_t count = 0;
170
231
        if (reverse) {
171
50
            std::priority_queue<Node> max_heap;
172
238
            for (int i = 0; i < _sorted_nums_vec.size(); ++i) {
173
188
                if (!_sorted_nums_vec[i].empty()) {
174
188
                    max_heap.emplace(_sorted_nums_vec[i][_sorted_nums_vec[i].size() - 1], i,
175
188
                                     _sorted_nums_vec[i].size() - 1);
176
188
                }
177
188
            }
178
179
234
            while (!max_heap.empty()) {
180
234
                Node node = max_heap.top();
181
234
                max_heap.pop();
182
234
                if (count == target) {
183
50
                    second_number = node.value;
184
184
                } else if (count == target + 1) {
185
50
                    first_number = node.value;
186
50
                    break;
187
50
                }
188
184
                ++count;
189
184
                if (--node.element_index >= 0) {
190
152
                    node.value = _sorted_nums_vec[node.array_index][node.element_index];
191
152
                    max_heap.push(node);
192
152
                }
193
184
            }
194
195
181
        } else {
196
181
            std::priority_queue<Node, std::vector<Node>, std::greater<Node>> min_heap;
197
615
            for (int i = 0; i < _sorted_nums_vec.size(); ++i) {
198
434
                if (!_sorted_nums_vec[i].empty()) {
199
434
                    min_heap.emplace(_sorted_nums_vec[i][0], i, 0);
200
434
                }
201
434
            }
202
203
535
            while (!min_heap.empty()) {
204
535
                Node node = min_heap.top();
205
535
                min_heap.pop();
206
535
                if (count == target) {
207
181
                    first_number = node.value;
208
354
                } else if (count == target + 1) {
209
181
                    second_number = node.value;
210
181
                    break;
211
181
                }
212
354
                ++count;
213
354
                if (++node.element_index < _sorted_nums_vec[node.array_index].size()) {
214
228
                    node.value = _sorted_nums_vec[node.array_index][node.element_index];
215
228
                    min_heap.push(node);
216
228
                }
217
354
            }
218
181
        }
219
220
231
        return {first_number, second_number};
221
231
    }
Unexecuted instantiation: _ZN5doris6CountsIhE27_merge_sort_and_get_numbersElb
Unexecuted instantiation: _ZN5doris6CountsIaE27_merge_sort_and_get_numbersElb
_ZN5doris6CountsIsE27_merge_sort_and_get_numbersElb
Line
Count
Source
167
6
    std::pair<Ty, Ty> _merge_sort_and_get_numbers(int64_t target, bool reverse) {
168
6
        Ty first_number = 0, second_number = 0;
169
6
        size_t count = 0;
170
6
        if (reverse) {
171
2
            std::priority_queue<Node> max_heap;
172
10
            for (int i = 0; i < _sorted_nums_vec.size(); ++i) {
173
8
                if (!_sorted_nums_vec[i].empty()) {
174
8
                    max_heap.emplace(_sorted_nums_vec[i][_sorted_nums_vec[i].size() - 1], i,
175
8
                                     _sorted_nums_vec[i].size() - 1);
176
8
                }
177
8
            }
178
179
12
            while (!max_heap.empty()) {
180
12
                Node node = max_heap.top();
181
12
                max_heap.pop();
182
12
                if (count == target) {
183
2
                    second_number = node.value;
184
10
                } else if (count == target + 1) {
185
2
                    first_number = node.value;
186
2
                    break;
187
2
                }
188
10
                ++count;
189
10
                if (--node.element_index >= 0) {
190
6
                    node.value = _sorted_nums_vec[node.array_index][node.element_index];
191
6
                    max_heap.push(node);
192
6
                }
193
10
            }
194
195
4
        } else {
196
4
            std::priority_queue<Node, std::vector<Node>, std::greater<Node>> min_heap;
197
20
            for (int i = 0; i < _sorted_nums_vec.size(); ++i) {
198
16
                if (!_sorted_nums_vec[i].empty()) {
199
16
                    min_heap.emplace(_sorted_nums_vec[i][0], i, 0);
200
16
                }
201
16
            }
202
203
26
            while (!min_heap.empty()) {
204
26
                Node node = min_heap.top();
205
26
                min_heap.pop();
206
26
                if (count == target) {
207
4
                    first_number = node.value;
208
22
                } else if (count == target + 1) {
209
4
                    second_number = node.value;
210
4
                    break;
211
4
                }
212
22
                ++count;
213
22
                if (++node.element_index < _sorted_nums_vec[node.array_index].size()) {
214
22
                    node.value = _sorted_nums_vec[node.array_index][node.element_index];
215
22
                    min_heap.push(node);
216
22
                }
217
22
            }
218
4
        }
219
220
6
        return {first_number, second_number};
221
6
    }
_ZN5doris6CountsIiE27_merge_sort_and_get_numbersElb
Line
Count
Source
167
172
    std::pair<Ty, Ty> _merge_sort_and_get_numbers(int64_t target, bool reverse) {
168
172
        Ty first_number = 0, second_number = 0;
169
172
        size_t count = 0;
170
172
        if (reverse) {
171
12
            std::priority_queue<Node> max_heap;
172
44
            for (int i = 0; i < _sorted_nums_vec.size(); ++i) {
173
32
                if (!_sorted_nums_vec[i].empty()) {
174
32
                    max_heap.emplace(_sorted_nums_vec[i][_sorted_nums_vec[i].size() - 1], i,
175
32
                                     _sorted_nums_vec[i].size() - 1);
176
32
                }
177
32
            }
178
179
62
            while (!max_heap.empty()) {
180
62
                Node node = max_heap.top();
181
62
                max_heap.pop();
182
62
                if (count == target) {
183
12
                    second_number = node.value;
184
50
                } else if (count == target + 1) {
185
12
                    first_number = node.value;
186
12
                    break;
187
12
                }
188
50
                ++count;
189
50
                if (--node.element_index >= 0) {
190
50
                    node.value = _sorted_nums_vec[node.array_index][node.element_index];
191
50
                    max_heap.push(node);
192
50
                }
193
50
            }
194
195
160
        } else {
196
160
            std::priority_queue<Node, std::vector<Node>, std::greater<Node>> min_heap;
197
532
            for (int i = 0; i < _sorted_nums_vec.size(); ++i) {
198
372
                if (!_sorted_nums_vec[i].empty()) {
199
372
                    min_heap.emplace(_sorted_nums_vec[i][0], i, 0);
200
372
                }
201
372
            }
202
203
456
            while (!min_heap.empty()) {
204
456
                Node node = min_heap.top();
205
456
                min_heap.pop();
206
456
                if (count == target) {
207
160
                    first_number = node.value;
208
296
                } else if (count == target + 1) {
209
160
                    second_number = node.value;
210
160
                    break;
211
160
                }
212
296
                ++count;
213
296
                if (++node.element_index < _sorted_nums_vec[node.array_index].size()) {
214
190
                    node.value = _sorted_nums_vec[node.array_index][node.element_index];
215
190
                    min_heap.push(node);
216
190
                }
217
296
            }
218
160
        }
219
220
172
        return {first_number, second_number};
221
172
    }
_ZN5doris6CountsIlE27_merge_sort_and_get_numbersElb
Line
Count
Source
167
41
    std::pair<Ty, Ty> _merge_sort_and_get_numbers(int64_t target, bool reverse) {
168
41
        Ty first_number = 0, second_number = 0;
169
41
        size_t count = 0;
170
41
        if (reverse) {
171
34
            std::priority_queue<Node> max_heap;
172
176
            for (int i = 0; i < _sorted_nums_vec.size(); ++i) {
173
142
                if (!_sorted_nums_vec[i].empty()) {
174
142
                    max_heap.emplace(_sorted_nums_vec[i][_sorted_nums_vec[i].size() - 1], i,
175
142
                                     _sorted_nums_vec[i].size() - 1);
176
142
                }
177
142
            }
178
179
154
            while (!max_heap.empty()) {
180
154
                Node node = max_heap.top();
181
154
                max_heap.pop();
182
154
                if (count == target) {
183
34
                    second_number = node.value;
184
120
                } else if (count == target + 1) {
185
34
                    first_number = node.value;
186
34
                    break;
187
34
                }
188
120
                ++count;
189
120
                if (--node.element_index >= 0) {
190
96
                    node.value = _sorted_nums_vec[node.array_index][node.element_index];
191
96
                    max_heap.push(node);
192
96
                }
193
120
            }
194
195
34
        } else {
196
7
            std::priority_queue<Node, std::vector<Node>, std::greater<Node>> min_heap;
197
23
            for (int i = 0; i < _sorted_nums_vec.size(); ++i) {
198
16
                if (!_sorted_nums_vec[i].empty()) {
199
16
                    min_heap.emplace(_sorted_nums_vec[i][0], i, 0);
200
16
                }
201
16
            }
202
203
21
            while (!min_heap.empty()) {
204
21
                Node node = min_heap.top();
205
21
                min_heap.pop();
206
21
                if (count == target) {
207
7
                    first_number = node.value;
208
14
                } else if (count == target + 1) {
209
7
                    second_number = node.value;
210
7
                    break;
211
7
                }
212
14
                ++count;
213
14
                if (++node.element_index < _sorted_nums_vec[node.array_index].size()) {
214
12
                    node.value = _sorted_nums_vec[node.array_index][node.element_index];
215
12
                    min_heap.push(node);
216
12
                }
217
14
            }
218
7
        }
219
220
41
        return {first_number, second_number};
221
41
    }
Unexecuted instantiation: _ZN5doris6CountsInE27_merge_sort_and_get_numbersElb
Unexecuted instantiation: _ZN5doris6CountsIfE27_merge_sort_and_get_numbersElb
_ZN5doris6CountsIdE27_merge_sort_and_get_numbersElb
Line
Count
Source
167
12
    std::pair<Ty, Ty> _merge_sort_and_get_numbers(int64_t target, bool reverse) {
168
12
        Ty first_number = 0, second_number = 0;
169
12
        size_t count = 0;
170
12
        if (reverse) {
171
2
            std::priority_queue<Node> max_heap;
172
8
            for (int i = 0; i < _sorted_nums_vec.size(); ++i) {
173
6
                if (!_sorted_nums_vec[i].empty()) {
174
6
                    max_heap.emplace(_sorted_nums_vec[i][_sorted_nums_vec[i].size() - 1], i,
175
6
                                     _sorted_nums_vec[i].size() - 1);
176
6
                }
177
6
            }
178
179
6
            while (!max_heap.empty()) {
180
6
                Node node = max_heap.top();
181
6
                max_heap.pop();
182
6
                if (count == target) {
183
2
                    second_number = node.value;
184
4
                } else if (count == target + 1) {
185
2
                    first_number = node.value;
186
2
                    break;
187
2
                }
188
4
                ++count;
189
4
                if (--node.element_index >= 0) {
190
0
                    node.value = _sorted_nums_vec[node.array_index][node.element_index];
191
0
                    max_heap.push(node);
192
0
                }
193
4
            }
194
195
10
        } else {
196
10
            std::priority_queue<Node, std::vector<Node>, std::greater<Node>> min_heap;
197
40
            for (int i = 0; i < _sorted_nums_vec.size(); ++i) {
198
30
                if (!_sorted_nums_vec[i].empty()) {
199
30
                    min_heap.emplace(_sorted_nums_vec[i][0], i, 0);
200
30
                }
201
30
            }
202
203
32
            while (!min_heap.empty()) {
204
32
                Node node = min_heap.top();
205
32
                min_heap.pop();
206
32
                if (count == target) {
207
10
                    first_number = node.value;
208
22
                } else if (count == target + 1) {
209
10
                    second_number = node.value;
210
10
                    break;
211
10
                }
212
22
                ++count;
213
22
                if (++node.element_index < _sorted_nums_vec[node.array_index].size()) {
214
4
                    node.value = _sorted_nums_vec[node.array_index][node.element_index];
215
4
                    min_heap.push(node);
216
4
                }
217
22
            }
218
10
        }
219
220
12
        return {first_number, second_number};
221
12
    }
222
223
    vectorized::PODArray<Ty> _nums;
224
    std::vector<vectorized::PODArray<Ty>> _sorted_nums_vec;
225
};
226
227
} // namespace doris