Coverage Report

Created: 2025-07-27 15:45

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/root/doris/be/src/util/counts.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <pdqsort.h>
21
22
#include <algorithm>
23
#include <cmath>
24
#include <queue>
25
26
#include "udf/udf.h"
27
#include "vec/common/pod_array.h"
28
#include "vec/common/string_buffer.hpp"
29
#include "vec/io/io_helper.h"
30
31
namespace doris {
32
33
template <typename Ty>
34
class Counts {
35
public:
36
7.82k
    Counts() = default;
Unexecuted instantiation: _ZN5doris6CountsIhEC2Ev
_ZN5doris6CountsIaEC2Ev
Line
Count
Source
36
68
    Counts() = default;
_ZN5doris6CountsIsEC2Ev
Line
Count
Source
36
372
    Counts() = default;
_ZN5doris6CountsIiEC2Ev
Line
Count
Source
36
4.97k
    Counts() = default;
_ZN5doris6CountsIlEC2Ev
Line
Count
Source
36
1.79k
    Counts() = default;
_ZN5doris6CountsInEC2Ev
Line
Count
Source
36
60
    Counts() = default;
Unexecuted instantiation: _ZN5doris6CountsIfEC2Ev
_ZN5doris6CountsIdEC2Ev
Line
Count
Source
36
562
    Counts() = default;
37
38
2.87k
    void merge(Counts* other) {
39
2.87k
        if (other != nullptr && !other->_nums.empty()) {
40
2.87k
            _sorted_nums_vec.emplace_back(std::move(other->_nums));
41
2.87k
        }
42
2.87k
    }
Unexecuted instantiation: _ZN5doris6CountsIhE5mergeEPS1_
Unexecuted instantiation: _ZN5doris6CountsIaE5mergeEPS1_
_ZN5doris6CountsIsE5mergeEPS1_
Line
Count
Source
38
6
    void merge(Counts* other) {
39
6
        if (other != nullptr && !other->_nums.empty()) {
40
6
            _sorted_nums_vec.emplace_back(std::move(other->_nums));
41
6
        }
42
6
    }
_ZN5doris6CountsIiE5mergeEPS1_
Line
Count
Source
38
1.92k
    void merge(Counts* other) {
39
1.92k
        if (other != nullptr && !other->_nums.empty()) {
40
1.92k
            _sorted_nums_vec.emplace_back(std::move(other->_nums));
41
1.92k
        }
42
1.92k
    }
_ZN5doris6CountsIlE5mergeEPS1_
Line
Count
Source
38
740
    void merge(Counts* other) {
39
740
        if (other != nullptr && !other->_nums.empty()) {
40
740
            _sorted_nums_vec.emplace_back(std::move(other->_nums));
41
740
        }
42
740
    }
Unexecuted instantiation: _ZN5doris6CountsInE5mergeEPS1_
Unexecuted instantiation: _ZN5doris6CountsIfE5mergeEPS1_
_ZN5doris6CountsIdE5mergeEPS1_
Line
Count
Source
38
196
    void merge(Counts* other) {
39
196
        if (other != nullptr && !other->_nums.empty()) {
40
196
            _sorted_nums_vec.emplace_back(std::move(other->_nums));
41
196
        }
42
196
    }
43
44
    void increment(Ty key, uint32_t i) {
45
        auto old_size = _nums.size();
46
        _nums.resize(_nums.size() + i);
47
        for (uint32_t j = 0; j < i; ++j) {
48
            _nums[old_size + j] = key;
49
        }
50
    }
51
52
5.77k
    void increment(Ty key) { _nums.push_back(key); }
Unexecuted instantiation: _ZN5doris6CountsIhE9incrementEh
_ZN5doris6CountsIaE9incrementEa
Line
Count
Source
52
128
    void increment(Ty key) { _nums.push_back(key); }
_ZN5doris6CountsIsE9incrementEs
Line
Count
Source
52
1.08k
    void increment(Ty key) { _nums.push_back(key); }
_ZN5doris6CountsIiE9incrementEi
Line
Count
Source
52
3.16k
    void increment(Ty key) { _nums.push_back(key); }
_ZN5doris6CountsIlE9incrementEl
Line
Count
Source
52
824
    void increment(Ty key) { _nums.push_back(key); }
_ZN5doris6CountsInE9incrementEn
Line
Count
Source
52
72
    void increment(Ty key) { _nums.push_back(key); }
Unexecuted instantiation: _ZN5doris6CountsIfE9incrementEf
_ZN5doris6CountsIdE9incrementEd
Line
Count
Source
52
500
    void increment(Ty key) { _nums.push_back(key); }
53
54
10
    void increment_batch(const vectorized::PaddedPODArray<Ty>& keys) {
55
10
        _nums.insert(keys.begin(), keys.end());
56
10
    }
Unexecuted instantiation: _ZN5doris6CountsIhE15increment_batchERKNS_10vectorized8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEE
Unexecuted instantiation: _ZN5doris6CountsIaE15increment_batchERKNS_10vectorized8PODArrayIaLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEE
Unexecuted instantiation: _ZN5doris6CountsIsE15increment_batchERKNS_10vectorized8PODArrayIsLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEE
Unexecuted instantiation: _ZN5doris6CountsIiE15increment_batchERKNS_10vectorized8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEE
_ZN5doris6CountsIlE15increment_batchERKNS_10vectorized8PODArrayIlLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEE
Line
Count
Source
54
10
    void increment_batch(const vectorized::PaddedPODArray<Ty>& keys) {
55
10
        _nums.insert(keys.begin(), keys.end());
56
10
    }
Unexecuted instantiation: _ZN5doris6CountsInE15increment_batchERKNS_10vectorized8PODArrayInLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEE
Unexecuted instantiation: _ZN5doris6CountsIfE15increment_batchERKNS_10vectorized8PODArrayIfLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEE
Unexecuted instantiation: _ZN5doris6CountsIdE15increment_batchERKNS_10vectorized8PODArrayIdLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEE
57
58
3.87k
    void serialize(vectorized::BufferWritable& buf) {
59
3.87k
        if (!_nums.empty()) {
60
3.08k
            pdqsort(_nums.begin(), _nums.end());
61
3.08k
            size_t size = _nums.size();
62
3.08k
            buf.write_binary(size);
63
3.08k
            buf.write(reinterpret_cast<const char*>(_nums.data()), sizeof(Ty) * size);
64
3.08k
        } else {
65
            // convert _sorted_nums_vec to _nums and do seiralize again
66
784
            _convert_sorted_num_vec_to_nums();
67
784
            serialize(buf);
68
784
        }
69
3.87k
    }
Unexecuted instantiation: _ZN5doris6CountsIhE9serializeERNS_10vectorized14BufferWritableE
Unexecuted instantiation: _ZN5doris6CountsIaE9serializeERNS_10vectorized14BufferWritableE
_ZN5doris6CountsIsE9serializeERNS_10vectorized14BufferWritableE
Line
Count
Source
58
6
    void serialize(vectorized::BufferWritable& buf) {
59
6
        if (!_nums.empty()) {
60
6
            pdqsort(_nums.begin(), _nums.end());
61
6
            size_t size = _nums.size();
62
6
            buf.write_binary(size);
63
6
            buf.write(reinterpret_cast<const char*>(_nums.data()), sizeof(Ty) * size);
64
6
        } else {
65
            // convert _sorted_nums_vec to _nums and do seiralize again
66
0
            _convert_sorted_num_vec_to_nums();
67
0
            serialize(buf);
68
0
        }
69
6
    }
_ZN5doris6CountsIiE9serializeERNS_10vectorized14BufferWritableE
Line
Count
Source
58
2.52k
    void serialize(vectorized::BufferWritable& buf) {
59
2.52k
        if (!_nums.empty()) {
60
2.08k
            pdqsort(_nums.begin(), _nums.end());
61
2.08k
            size_t size = _nums.size();
62
2.08k
            buf.write_binary(size);
63
2.08k
            buf.write(reinterpret_cast<const char*>(_nums.data()), sizeof(Ty) * size);
64
2.08k
        } else {
65
            // convert _sorted_nums_vec to _nums and do seiralize again
66
444
            _convert_sorted_num_vec_to_nums();
67
444
            serialize(buf);
68
444
        }
69
2.52k
    }
_ZN5doris6CountsIlE9serializeERNS_10vectorized14BufferWritableE
Line
Count
Source
58
1.14k
    void serialize(vectorized::BufferWritable& buf) {
59
1.14k
        if (!_nums.empty()) {
60
802
            pdqsort(_nums.begin(), _nums.end());
61
802
            size_t size = _nums.size();
62
802
            buf.write_binary(size);
63
802
            buf.write(reinterpret_cast<const char*>(_nums.data()), sizeof(Ty) * size);
64
802
        } else {
65
            // convert _sorted_nums_vec to _nums and do seiralize again
66
340
            _convert_sorted_num_vec_to_nums();
67
340
            serialize(buf);
68
340
        }
69
1.14k
    }
Unexecuted instantiation: _ZN5doris6CountsInE9serializeERNS_10vectorized14BufferWritableE
Unexecuted instantiation: _ZN5doris6CountsIfE9serializeERNS_10vectorized14BufferWritableE
_ZN5doris6CountsIdE9serializeERNS_10vectorized14BufferWritableE
Line
Count
Source
58
196
    void serialize(vectorized::BufferWritable& buf) {
59
196
        if (!_nums.empty()) {
60
196
            pdqsort(_nums.begin(), _nums.end());
61
196
            size_t size = _nums.size();
62
196
            buf.write_binary(size);
63
196
            buf.write(reinterpret_cast<const char*>(_nums.data()), sizeof(Ty) * size);
64
196
        } else {
65
            // convert _sorted_nums_vec to _nums and do seiralize again
66
0
            _convert_sorted_num_vec_to_nums();
67
0
            serialize(buf);
68
0
        }
69
196
    }
70
71
2.86k
    void unserialize(vectorized::BufferReadable& buf) {
72
2.86k
        size_t size;
73
2.86k
        buf.read_binary(size);
74
2.86k
        _nums.resize(size);
75
2.86k
        auto buff = buf.read(sizeof(Ty) * size);
76
2.86k
        memcpy(_nums.data(), buff.data, buff.size);
77
2.86k
    }
Unexecuted instantiation: _ZN5doris6CountsIhE11unserializeERNS_10vectorized14BufferReadableE
Unexecuted instantiation: _ZN5doris6CountsIaE11unserializeERNS_10vectorized14BufferReadableE
_ZN5doris6CountsIsE11unserializeERNS_10vectorized14BufferReadableE
Line
Count
Source
71
6
    void unserialize(vectorized::BufferReadable& buf) {
72
6
        size_t size;
73
6
        buf.read_binary(size);
74
6
        _nums.resize(size);
75
6
        auto buff = buf.read(sizeof(Ty) * size);
76
6
        memcpy(_nums.data(), buff.data, buff.size);
77
6
    }
_ZN5doris6CountsIiE11unserializeERNS_10vectorized14BufferReadableE
Line
Count
Source
71
1.92k
    void unserialize(vectorized::BufferReadable& buf) {
72
1.92k
        size_t size;
73
1.92k
        buf.read_binary(size);
74
1.92k
        _nums.resize(size);
75
1.92k
        auto buff = buf.read(sizeof(Ty) * size);
76
1.92k
        memcpy(_nums.data(), buff.data, buff.size);
77
1.92k
    }
_ZN5doris6CountsIlE11unserializeERNS_10vectorized14BufferReadableE
Line
Count
Source
71
738
    void unserialize(vectorized::BufferReadable& buf) {
72
738
        size_t size;
73
738
        buf.read_binary(size);
74
738
        _nums.resize(size);
75
738
        auto buff = buf.read(sizeof(Ty) * size);
76
738
        memcpy(_nums.data(), buff.data, buff.size);
77
738
    }
Unexecuted instantiation: _ZN5doris6CountsInE11unserializeERNS_10vectorized14BufferReadableE
Unexecuted instantiation: _ZN5doris6CountsIfE11unserializeERNS_10vectorized14BufferReadableE
_ZN5doris6CountsIdE11unserializeERNS_10vectorized14BufferReadableE
Line
Count
Source
71
196
    void unserialize(vectorized::BufferReadable& buf) {
72
196
        size_t size;
73
196
        buf.read_binary(size);
74
196
        _nums.resize(size);
75
196
        auto buff = buf.read(sizeof(Ty) * size);
76
196
        memcpy(_nums.data(), buff.data, buff.size);
77
196
    }
78
79
2.22k
    double terminate(double quantile) {
80
2.22k
        if (_sorted_nums_vec.size() <= 1) {
81
1.95k
            if (_sorted_nums_vec.size() == 1) {
82
718
                _nums = std::move(_sorted_nums_vec[0]);
83
718
            }
84
85
1.95k
            if (_nums.empty()) {
86
                // Although set null here, but the value is 0.0 and the call method just
87
                // get val in aggregate_function_percentile_approx.h
88
0
                return 0.0;
89
0
            }
90
91
1.95k
            if (UNLIKELY(!std::is_sorted(_nums.begin(), _nums.end()))) {
92
359
                pdqsort(_nums.begin(), _nums.end());
93
359
            }
94
95
1.95k
            if (quantile == 1 || _nums.size() == 1) {
96
954
                return _nums.back();
97
954
            }
98
99
996
            double u = (_nums.size() - 1) * quantile;
100
996
            auto index = static_cast<uint32_t>(u);
101
996
            return _nums[index] +
102
996
                   (u - static_cast<double>(index)) * (_nums[index + 1] - _nums[index]);
103
1.95k
        } else {
104
279
            DCHECK(_nums.empty());
105
279
            size_t rows = 0;
106
816
            for (const auto& i : _sorted_nums_vec) {
107
816
                rows += i.size();
108
816
            }
109
279
            const bool reverse = quantile > 0.5 && rows > 2;
110
279
            double u = (rows - 1) * quantile;
111
279
            auto index = static_cast<uint32_t>(u);
112
            // if reverse, the step of target should start 0 like not reverse
113
            // so here rows need to minus index + 2
114
            // eg: rows = 10, index = 5
115
            // if not reverse, so the first number loc is 5, the second number loc is 6
116
            // if reverse, so the second number is 3, the first number is 4
117
            // 5 + 4 = 3 + 6 = 9 = rows - 1.
118
            // the rows must GE 2 beacuse `_sorted_nums_vec` size GE 2
119
279
            size_t target = reverse ? rows - index - 2 : index;
120
279
            if (quantile == 1) {
121
0
                target = 0;
122
0
            }
123
279
            auto [first_number, second_number] = _merge_sort_and_get_numbers(target, reverse);
124
279
            if (quantile == 1) {
125
0
                return second_number;
126
0
            }
127
279
            return first_number + (u - static_cast<double>(index)) * (second_number - first_number);
128
279
        }
129
2.22k
    }
Unexecuted instantiation: _ZN5doris6CountsIhE9terminateEd
_ZN5doris6CountsIaE9terminateEd
Line
Count
Source
79
116
    double terminate(double quantile) {
80
116
        if (_sorted_nums_vec.size() <= 1) {
81
116
            if (_sorted_nums_vec.size() == 1) {
82
0
                _nums = std::move(_sorted_nums_vec[0]);
83
0
            }
84
85
116
            if (_nums.empty()) {
86
                // Although set null here, but the value is 0.0 and the call method just
87
                // get val in aggregate_function_percentile_approx.h
88
0
                return 0.0;
89
0
            }
90
91
116
            if (UNLIKELY(!std::is_sorted(_nums.begin(), _nums.end()))) {
92
48
                pdqsort(_nums.begin(), _nums.end());
93
48
            }
94
95
116
            if (quantile == 1 || _nums.size() == 1) {
96
68
                return _nums.back();
97
68
            }
98
99
48
            double u = (_nums.size() - 1) * quantile;
100
48
            auto index = static_cast<uint32_t>(u);
101
48
            return _nums[index] +
102
48
                   (u - static_cast<double>(index)) * (_nums[index + 1] - _nums[index]);
103
116
        } else {
104
0
            DCHECK(_nums.empty());
105
0
            size_t rows = 0;
106
0
            for (const auto& i : _sorted_nums_vec) {
107
0
                rows += i.size();
108
0
            }
109
0
            const bool reverse = quantile > 0.5 && rows > 2;
110
0
            double u = (rows - 1) * quantile;
111
0
            auto index = static_cast<uint32_t>(u);
112
            // if reverse, the step of target should start 0 like not reverse
113
            // so here rows need to minus index + 2
114
            // eg: rows = 10, index = 5
115
            // if not reverse, so the first number loc is 5, the second number loc is 6
116
            // if reverse, so the second number is 3, the first number is 4
117
            // 5 + 4 = 3 + 6 = 9 = rows - 1.
118
            // the rows must GE 2 beacuse `_sorted_nums_vec` size GE 2
119
0
            size_t target = reverse ? rows - index - 2 : index;
120
0
            if (quantile == 1) {
121
0
                target = 0;
122
0
            }
123
0
            auto [first_number, second_number] = _merge_sort_and_get_numbers(target, reverse);
124
0
            if (quantile == 1) {
125
0
                return second_number;
126
0
            }
127
0
            return first_number + (u - static_cast<double>(index)) * (second_number - first_number);
128
0
        }
129
116
    }
_ZN5doris6CountsIsE9terminateEd
Line
Count
Source
79
594
    double terminate(double quantile) {
80
594
        if (_sorted_nums_vec.size() <= 1) {
81
594
            if (_sorted_nums_vec.size() == 1) {
82
6
                _nums = std::move(_sorted_nums_vec[0]);
83
6
            }
84
85
594
            if (_nums.empty()) {
86
                // Although set null here, but the value is 0.0 and the call method just
87
                // get val in aggregate_function_percentile_approx.h
88
0
                return 0.0;
89
0
            }
90
91
594
            if (UNLIKELY(!std::is_sorted(_nums.begin(), _nums.end()))) {
92
288
                pdqsort(_nums.begin(), _nums.end());
93
288
            }
94
95
594
            if (quantile == 1 || _nums.size() == 1) {
96
174
                return _nums.back();
97
174
            }
98
99
420
            double u = (_nums.size() - 1) * quantile;
100
420
            auto index = static_cast<uint32_t>(u);
101
420
            return _nums[index] +
102
420
                   (u - static_cast<double>(index)) * (_nums[index + 1] - _nums[index]);
103
594
        } else {
104
0
            DCHECK(_nums.empty());
105
0
            size_t rows = 0;
106
0
            for (const auto& i : _sorted_nums_vec) {
107
0
                rows += i.size();
108
0
            }
109
0
            const bool reverse = quantile > 0.5 && rows > 2;
110
0
            double u = (rows - 1) * quantile;
111
0
            auto index = static_cast<uint32_t>(u);
112
            // if reverse, the step of target should start 0 like not reverse
113
            // so here rows need to minus index + 2
114
            // eg: rows = 10, index = 5
115
            // if not reverse, so the first number loc is 5, the second number loc is 6
116
            // if reverse, so the second number is 3, the first number is 4
117
            // 5 + 4 = 3 + 6 = 9 = rows - 1.
118
            // the rows must GE 2 beacuse `_sorted_nums_vec` size GE 2
119
0
            size_t target = reverse ? rows - index - 2 : index;
120
0
            if (quantile == 1) {
121
0
                target = 0;
122
0
            }
123
0
            auto [first_number, second_number] = _merge_sort_and_get_numbers(target, reverse);
124
0
            if (quantile == 1) {
125
0
                return second_number;
126
0
            }
127
0
            return first_number + (u - static_cast<double>(index)) * (second_number - first_number);
128
0
        }
129
594
    }
_ZN5doris6CountsIiE9terminateEd
Line
Count
Source
79
960
    double terminate(double quantile) {
80
960
        if (_sorted_nums_vec.size() <= 1) {
81
766
            if (_sorted_nums_vec.size() == 1) {
82
584
                _nums = std::move(_sorted_nums_vec[0]);
83
584
            }
84
85
766
            if (_nums.empty()) {
86
                // Although set null here, but the value is 0.0 and the call method just
87
                // get val in aggregate_function_percentile_approx.h
88
0
                return 0.0;
89
0
            }
90
91
766
            if (UNLIKELY(!std::is_sorted(_nums.begin(), _nums.end()))) {
92
22
                pdqsort(_nums.begin(), _nums.end());
93
22
            }
94
95
766
            if (quantile == 1 || _nums.size() == 1) {
96
484
                return _nums.back();
97
484
            }
98
99
282
            double u = (_nums.size() - 1) * quantile;
100
282
            auto index = static_cast<uint32_t>(u);
101
282
            return _nums[index] +
102
282
                   (u - static_cast<double>(index)) * (_nums[index + 1] - _nums[index]);
103
766
        } else {
104
194
            DCHECK(_nums.empty());
105
194
            size_t rows = 0;
106
534
            for (const auto& i : _sorted_nums_vec) {
107
534
                rows += i.size();
108
534
            }
109
194
            const bool reverse = quantile > 0.5 && rows > 2;
110
194
            double u = (rows - 1) * quantile;
111
194
            auto index = static_cast<uint32_t>(u);
112
            // if reverse, the step of target should start 0 like not reverse
113
            // so here rows need to minus index + 2
114
            // eg: rows = 10, index = 5
115
            // if not reverse, so the first number loc is 5, the second number loc is 6
116
            // if reverse, so the second number is 3, the first number is 4
117
            // 5 + 4 = 3 + 6 = 9 = rows - 1.
118
            // the rows must GE 2 beacuse `_sorted_nums_vec` size GE 2
119
194
            size_t target = reverse ? rows - index - 2 : index;
120
194
            if (quantile == 1) {
121
0
                target = 0;
122
0
            }
123
194
            auto [first_number, second_number] = _merge_sort_and_get_numbers(target, reverse);
124
194
            if (quantile == 1) {
125
0
                return second_number;
126
0
            }
127
194
            return first_number + (u - static_cast<double>(index)) * (second_number - first_number);
128
194
        }
129
960
    }
_ZN5doris6CountsIlE9terminateEd
Line
Count
Source
79
233
    double terminate(double quantile) {
80
233
        if (_sorted_nums_vec.size() <= 1) {
81
196
            if (_sorted_nums_vec.size() == 1) {
82
76
                _nums = std::move(_sorted_nums_vec[0]);
83
76
            }
84
85
196
            if (_nums.empty()) {
86
                // Although set null here, but the value is 0.0 and the call method just
87
                // get val in aggregate_function_percentile_approx.h
88
0
                return 0.0;
89
0
            }
90
91
196
            if (UNLIKELY(!std::is_sorted(_nums.begin(), _nums.end()))) {
92
1
                pdqsort(_nums.begin(), _nums.end());
93
1
            }
94
95
196
            if (quantile == 1 || _nums.size() == 1) {
96
120
                return _nums.back();
97
120
            }
98
99
76
            double u = (_nums.size() - 1) * quantile;
100
76
            auto index = static_cast<uint32_t>(u);
101
76
            return _nums[index] +
102
76
                   (u - static_cast<double>(index)) * (_nums[index + 1] - _nums[index]);
103
196
        } else {
104
37
            DCHECK(_nums.empty());
105
37
            size_t rows = 0;
106
138
            for (const auto& i : _sorted_nums_vec) {
107
138
                rows += i.size();
108
138
            }
109
37
            const bool reverse = quantile > 0.5 && rows > 2;
110
37
            double u = (rows - 1) * quantile;
111
37
            auto index = static_cast<uint32_t>(u);
112
            // if reverse, the step of target should start 0 like not reverse
113
            // so here rows need to minus index + 2
114
            // eg: rows = 10, index = 5
115
            // if not reverse, so the first number loc is 5, the second number loc is 6
116
            // if reverse, so the second number is 3, the first number is 4
117
            // 5 + 4 = 3 + 6 = 9 = rows - 1.
118
            // the rows must GE 2 beacuse `_sorted_nums_vec` size GE 2
119
37
            size_t target = reverse ? rows - index - 2 : index;
120
37
            if (quantile == 1) {
121
0
                target = 0;
122
0
            }
123
37
            auto [first_number, second_number] = _merge_sort_and_get_numbers(target, reverse);
124
37
            if (quantile == 1) {
125
0
                return second_number;
126
0
            }
127
37
            return first_number + (u - static_cast<double>(index)) * (second_number - first_number);
128
37
        }
129
233
    }
_ZN5doris6CountsInE9terminateEd
Line
Count
Source
79
60
    double terminate(double quantile) {
80
60
        if (_sorted_nums_vec.size() <= 1) {
81
60
            if (_sorted_nums_vec.size() == 1) {
82
0
                _nums = std::move(_sorted_nums_vec[0]);
83
0
            }
84
85
60
            if (_nums.empty()) {
86
                // Although set null here, but the value is 0.0 and the call method just
87
                // get val in aggregate_function_percentile_approx.h
88
0
                return 0.0;
89
0
            }
90
91
60
            if (UNLIKELY(!std::is_sorted(_nums.begin(), _nums.end()))) {
92
0
                pdqsort(_nums.begin(), _nums.end());
93
0
            }
94
95
60
            if (quantile == 1 || _nums.size() == 1) {
96
48
                return _nums.back();
97
48
            }
98
99
12
            double u = (_nums.size() - 1) * quantile;
100
12
            auto index = static_cast<uint32_t>(u);
101
12
            return _nums[index] +
102
12
                   (u - static_cast<double>(index)) * (_nums[index + 1] - _nums[index]);
103
60
        } else {
104
0
            DCHECK(_nums.empty());
105
0
            size_t rows = 0;
106
0
            for (const auto& i : _sorted_nums_vec) {
107
0
                rows += i.size();
108
0
            }
109
0
            const bool reverse = quantile > 0.5 && rows > 2;
110
0
            double u = (rows - 1) * quantile;
111
0
            auto index = static_cast<uint32_t>(u);
112
            // if reverse, the step of target should start 0 like not reverse
113
            // so here rows need to minus index + 2
114
            // eg: rows = 10, index = 5
115
            // if not reverse, so the first number loc is 5, the second number loc is 6
116
            // if reverse, so the second number is 3, the first number is 4
117
            // 5 + 4 = 3 + 6 = 9 = rows - 1.
118
            // the rows must GE 2 beacuse `_sorted_nums_vec` size GE 2
119
0
            size_t target = reverse ? rows - index - 2 : index;
120
0
            if (quantile == 1) {
121
0
                target = 0;
122
0
            }
123
0
            auto [first_number, second_number] = _merge_sort_and_get_numbers(target, reverse);
124
0
            if (quantile == 1) {
125
0
                return second_number;
126
0
            }
127
0
            return first_number + (u - static_cast<double>(index)) * (second_number - first_number);
128
0
        }
129
60
    }
Unexecuted instantiation: _ZN5doris6CountsIfE9terminateEd
_ZN5doris6CountsIdE9terminateEd
Line
Count
Source
79
266
    double terminate(double quantile) {
80
266
        if (_sorted_nums_vec.size() <= 1) {
81
218
            if (_sorted_nums_vec.size() == 1) {
82
52
                _nums = std::move(_sorted_nums_vec[0]);
83
52
            }
84
85
218
            if (_nums.empty()) {
86
                // Although set null here, but the value is 0.0 and the call method just
87
                // get val in aggregate_function_percentile_approx.h
88
0
                return 0.0;
89
0
            }
90
91
218
            if (UNLIKELY(!std::is_sorted(_nums.begin(), _nums.end()))) {
92
0
                pdqsort(_nums.begin(), _nums.end());
93
0
            }
94
95
218
            if (quantile == 1 || _nums.size() == 1) {
96
60
                return _nums.back();
97
60
            }
98
99
158
            double u = (_nums.size() - 1) * quantile;
100
158
            auto index = static_cast<uint32_t>(u);
101
158
            return _nums[index] +
102
158
                   (u - static_cast<double>(index)) * (_nums[index + 1] - _nums[index]);
103
218
        } else {
104
48
            DCHECK(_nums.empty());
105
48
            size_t rows = 0;
106
144
            for (const auto& i : _sorted_nums_vec) {
107
144
                rows += i.size();
108
144
            }
109
48
            const bool reverse = quantile > 0.5 && rows > 2;
110
48
            double u = (rows - 1) * quantile;
111
48
            auto index = static_cast<uint32_t>(u);
112
            // if reverse, the step of target should start 0 like not reverse
113
            // so here rows need to minus index + 2
114
            // eg: rows = 10, index = 5
115
            // if not reverse, so the first number loc is 5, the second number loc is 6
116
            // if reverse, so the second number is 3, the first number is 4
117
            // 5 + 4 = 3 + 6 = 9 = rows - 1.
118
            // the rows must GE 2 beacuse `_sorted_nums_vec` size GE 2
119
48
            size_t target = reverse ? rows - index - 2 : index;
120
48
            if (quantile == 1) {
121
0
                target = 0;
122
0
            }
123
48
            auto [first_number, second_number] = _merge_sort_and_get_numbers(target, reverse);
124
48
            if (quantile == 1) {
125
0
                return second_number;
126
0
            }
127
48
            return first_number + (u - static_cast<double>(index)) * (second_number - first_number);
128
48
        }
129
266
    }
130
131
private:
132
    struct Node {
133
        Ty value;
134
        int array_index;
135
        int64_t element_index;
136
137
2.78k
        auto operator<=>(const Node& other) const { return value <=> other.value; }
Unexecuted instantiation: _ZNK5doris6CountsIhE4NodessERKS2_
Unexecuted instantiation: _ZNK5doris6CountsIaE4NodessERKS2_
Unexecuted instantiation: _ZNK5doris6CountsIsE4NodessERKS2_
_ZNK5doris6CountsIiE4NodessERKS2_
Line
Count
Source
137
1.85k
        auto operator<=>(const Node& other) const { return value <=> other.value; }
_ZNK5doris6CountsIlE4NodessERKS2_
Line
Count
Source
137
747
        auto operator<=>(const Node& other) const { return value <=> other.value; }
Unexecuted instantiation: _ZNK5doris6CountsInE4NodessERKS2_
Unexecuted instantiation: _ZNK5doris6CountsIfE4NodessERKS2_
_ZNK5doris6CountsIdE4NodessERKS2_
Line
Count
Source
137
182
        auto operator<=>(const Node& other) const { return value <=> other.value; }
138
    };
139
140
784
    void _convert_sorted_num_vec_to_nums() {
141
784
        size_t rows = 0;
142
1.33k
        for (const auto& i : _sorted_nums_vec) {
143
1.33k
            rows += i.size();
144
1.33k
        }
145
784
        _nums.resize(rows);
146
784
        size_t count = 0;
147
148
784
        std::priority_queue<Node, std::vector<Node>, std::greater<Node>> min_heap;
149
2.12k
        for (int i = 0; i < _sorted_nums_vec.size(); ++i) {
150
1.33k
            if (!_sorted_nums_vec[i].empty()) {
151
1.33k
                min_heap.emplace(_sorted_nums_vec[i][0], i, 0);
152
1.33k
            }
153
1.33k
        }
154
155
2.38k
        while (!min_heap.empty()) {
156
1.60k
            Node node = min_heap.top();
157
1.60k
            min_heap.pop();
158
1.60k
            _nums[count++] = node.value;
159
1.60k
            if (++node.element_index < _sorted_nums_vec[node.array_index].size()) {
160
264
                node.value = _sorted_nums_vec[node.array_index][node.element_index];
161
264
                min_heap.push(node);
162
264
            }
163
1.60k
        }
164
784
        _sorted_nums_vec.clear();
165
784
    }
Unexecuted instantiation: _ZN5doris6CountsIhE31_convert_sorted_num_vec_to_numsEv
Unexecuted instantiation: _ZN5doris6CountsIaE31_convert_sorted_num_vec_to_numsEv
Unexecuted instantiation: _ZN5doris6CountsIsE31_convert_sorted_num_vec_to_numsEv
_ZN5doris6CountsIiE31_convert_sorted_num_vec_to_numsEv
Line
Count
Source
140
444
    void _convert_sorted_num_vec_to_nums() {
141
444
        size_t rows = 0;
142
810
        for (const auto& i : _sorted_nums_vec) {
143
810
            rows += i.size();
144
810
        }
145
444
        _nums.resize(rows);
146
444
        size_t count = 0;
147
148
444
        std::priority_queue<Node, std::vector<Node>, std::greater<Node>> min_heap;
149
1.25k
        for (int i = 0; i < _sorted_nums_vec.size(); ++i) {
150
810
            if (!_sorted_nums_vec[i].empty()) {
151
810
                min_heap.emplace(_sorted_nums_vec[i][0], i, 0);
152
810
            }
153
810
        }
154
155
1.45k
        while (!min_heap.empty()) {
156
1.00k
            Node node = min_heap.top();
157
1.00k
            min_heap.pop();
158
1.00k
            _nums[count++] = node.value;
159
1.00k
            if (++node.element_index < _sorted_nums_vec[node.array_index].size()) {
160
198
                node.value = _sorted_nums_vec[node.array_index][node.element_index];
161
198
                min_heap.push(node);
162
198
            }
163
1.00k
        }
164
444
        _sorted_nums_vec.clear();
165
444
    }
_ZN5doris6CountsIlE31_convert_sorted_num_vec_to_numsEv
Line
Count
Source
140
340
    void _convert_sorted_num_vec_to_nums() {
141
340
        size_t rows = 0;
142
526
        for (const auto& i : _sorted_nums_vec) {
143
526
            rows += i.size();
144
526
        }
145
340
        _nums.resize(rows);
146
340
        size_t count = 0;
147
148
340
        std::priority_queue<Node, std::vector<Node>, std::greater<Node>> min_heap;
149
866
        for (int i = 0; i < _sorted_nums_vec.size(); ++i) {
150
526
            if (!_sorted_nums_vec[i].empty()) {
151
526
                min_heap.emplace(_sorted_nums_vec[i][0], i, 0);
152
526
            }
153
526
        }
154
155
932
        while (!min_heap.empty()) {
156
592
            Node node = min_heap.top();
157
592
            min_heap.pop();
158
592
            _nums[count++] = node.value;
159
592
            if (++node.element_index < _sorted_nums_vec[node.array_index].size()) {
160
66
                node.value = _sorted_nums_vec[node.array_index][node.element_index];
161
66
                min_heap.push(node);
162
66
            }
163
592
        }
164
340
        _sorted_nums_vec.clear();
165
340
    }
Unexecuted instantiation: _ZN5doris6CountsInE31_convert_sorted_num_vec_to_numsEv
Unexecuted instantiation: _ZN5doris6CountsIfE31_convert_sorted_num_vec_to_numsEv
Unexecuted instantiation: _ZN5doris6CountsIdE31_convert_sorted_num_vec_to_numsEv
166
167
279
    std::pair<Ty, Ty> _merge_sort_and_get_numbers(int64_t target, bool reverse) {
168
279
        Ty first_number = 0, second_number = 0;
169
279
        size_t count = 0;
170
279
        if (reverse) {
171
52
            std::priority_queue<Node> max_heap;
172
240
            for (int i = 0; i < _sorted_nums_vec.size(); ++i) {
173
188
                if (!_sorted_nums_vec[i].empty()) {
174
188
                    max_heap.emplace(_sorted_nums_vec[i][_sorted_nums_vec[i].size() - 1], i,
175
188
                                     _sorted_nums_vec[i].size() - 1);
176
188
                }
177
188
            }
178
179
220
            while (!max_heap.empty()) {
180
220
                Node node = max_heap.top();
181
220
                max_heap.pop();
182
220
                if (count == target) {
183
52
                    second_number = node.value;
184
168
                } else if (count == target + 1) {
185
52
                    first_number = node.value;
186
52
                    break;
187
52
                }
188
168
                ++count;
189
168
                if (--node.element_index >= 0) {
190
122
                    node.value = _sorted_nums_vec[node.array_index][node.element_index];
191
122
                    max_heap.push(node);
192
122
                }
193
168
            }
194
195
227
        } else {
196
227
            std::priority_queue<Node, std::vector<Node>, std::greater<Node>> min_heap;
197
855
            for (int i = 0; i < _sorted_nums_vec.size(); ++i) {
198
628
                if (!_sorted_nums_vec[i].empty()) {
199
628
                    min_heap.emplace(_sorted_nums_vec[i][0], i, 0);
200
628
                }
201
628
            }
202
203
617
            while (!min_heap.empty()) {
204
617
                Node node = min_heap.top();
205
617
                min_heap.pop();
206
617
                if (count == target) {
207
227
                    first_number = node.value;
208
390
                } else if (count == target + 1) {
209
227
                    second_number = node.value;
210
227
                    break;
211
227
                }
212
390
                ++count;
213
390
                if (++node.element_index < _sorted_nums_vec[node.array_index].size()) {
214
216
                    node.value = _sorted_nums_vec[node.array_index][node.element_index];
215
216
                    min_heap.push(node);
216
216
                }
217
390
            }
218
227
        }
219
220
279
        return {first_number, second_number};
221
279
    }
Unexecuted instantiation: _ZN5doris6CountsIhE27_merge_sort_and_get_numbersElb
Unexecuted instantiation: _ZN5doris6CountsIaE27_merge_sort_and_get_numbersElb
Unexecuted instantiation: _ZN5doris6CountsIsE27_merge_sort_and_get_numbersElb
_ZN5doris6CountsIiE27_merge_sort_and_get_numbersElb
Line
Count
Source
167
194
    std::pair<Ty, Ty> _merge_sort_and_get_numbers(int64_t target, bool reverse) {
168
194
        Ty first_number = 0, second_number = 0;
169
194
        size_t count = 0;
170
194
        if (reverse) {
171
20
            std::priority_queue<Node> max_heap;
172
76
            for (int i = 0; i < _sorted_nums_vec.size(); ++i) {
173
56
                if (!_sorted_nums_vec[i].empty()) {
174
56
                    max_heap.emplace(_sorted_nums_vec[i][_sorted_nums_vec[i].size() - 1], i,
175
56
                                     _sorted_nums_vec[i].size() - 1);
176
56
                }
177
56
            }
178
179
78
            while (!max_heap.empty()) {
180
78
                Node node = max_heap.top();
181
78
                max_heap.pop();
182
78
                if (count == target) {
183
20
                    second_number = node.value;
184
58
                } else if (count == target + 1) {
185
20
                    first_number = node.value;
186
20
                    break;
187
20
                }
188
58
                ++count;
189
58
                if (--node.element_index >= 0) {
190
44
                    node.value = _sorted_nums_vec[node.array_index][node.element_index];
191
44
                    max_heap.push(node);
192
44
                }
193
58
            }
194
195
174
        } else {
196
174
            std::priority_queue<Node, std::vector<Node>, std::greater<Node>> min_heap;
197
652
            for (int i = 0; i < _sorted_nums_vec.size(); ++i) {
198
478
                if (!_sorted_nums_vec[i].empty()) {
199
478
                    min_heap.emplace(_sorted_nums_vec[i][0], i, 0);
200
478
                }
201
478
            }
202
203
488
            while (!min_heap.empty()) {
204
488
                Node node = min_heap.top();
205
488
                min_heap.pop();
206
488
                if (count == target) {
207
174
                    first_number = node.value;
208
314
                } else if (count == target + 1) {
209
174
                    second_number = node.value;
210
174
                    break;
211
174
                }
212
314
                ++count;
213
314
                if (++node.element_index < _sorted_nums_vec[node.array_index].size()) {
214
194
                    node.value = _sorted_nums_vec[node.array_index][node.element_index];
215
194
                    min_heap.push(node);
216
194
                }
217
314
            }
218
174
        }
219
220
194
        return {first_number, second_number};
221
194
    }
_ZN5doris6CountsIlE27_merge_sort_and_get_numbersElb
Line
Count
Source
167
37
    std::pair<Ty, Ty> _merge_sort_and_get_numbers(int64_t target, bool reverse) {
168
37
        Ty first_number = 0, second_number = 0;
169
37
        size_t count = 0;
170
37
        if (reverse) {
171
28
            std::priority_queue<Node> max_heap;
172
144
            for (int i = 0; i < _sorted_nums_vec.size(); ++i) {
173
116
                if (!_sorted_nums_vec[i].empty()) {
174
116
                    max_heap.emplace(_sorted_nums_vec[i][_sorted_nums_vec[i].size() - 1], i,
175
116
                                     _sorted_nums_vec[i].size() - 1);
176
116
                }
177
116
            }
178
179
132
            while (!max_heap.empty()) {
180
132
                Node node = max_heap.top();
181
132
                max_heap.pop();
182
132
                if (count == target) {
183
28
                    second_number = node.value;
184
104
                } else if (count == target + 1) {
185
28
                    first_number = node.value;
186
28
                    break;
187
28
                }
188
104
                ++count;
189
104
                if (--node.element_index >= 0) {
190
78
                    node.value = _sorted_nums_vec[node.array_index][node.element_index];
191
78
                    max_heap.push(node);
192
78
                }
193
104
            }
194
195
28
        } else {
196
9
            std::priority_queue<Node, std::vector<Node>, std::greater<Node>> min_heap;
197
31
            for (int i = 0; i < _sorted_nums_vec.size(); ++i) {
198
22
                if (!_sorted_nums_vec[i].empty()) {
199
22
                    min_heap.emplace(_sorted_nums_vec[i][0], i, 0);
200
22
                }
201
22
            }
202
203
25
            while (!min_heap.empty()) {
204
25
                Node node = min_heap.top();
205
25
                min_heap.pop();
206
25
                if (count == target) {
207
9
                    first_number = node.value;
208
16
                } else if (count == target + 1) {
209
9
                    second_number = node.value;
210
9
                    break;
211
9
                }
212
16
                ++count;
213
16
                if (++node.element_index < _sorted_nums_vec[node.array_index].size()) {
214
14
                    node.value = _sorted_nums_vec[node.array_index][node.element_index];
215
14
                    min_heap.push(node);
216
14
                }
217
16
            }
218
9
        }
219
220
37
        return {first_number, second_number};
221
37
    }
Unexecuted instantiation: _ZN5doris6CountsInE27_merge_sort_and_get_numbersElb
Unexecuted instantiation: _ZN5doris6CountsIfE27_merge_sort_and_get_numbersElb
_ZN5doris6CountsIdE27_merge_sort_and_get_numbersElb
Line
Count
Source
167
48
    std::pair<Ty, Ty> _merge_sort_and_get_numbers(int64_t target, bool reverse) {
168
48
        Ty first_number = 0, second_number = 0;
169
48
        size_t count = 0;
170
48
        if (reverse) {
171
4
            std::priority_queue<Node> max_heap;
172
20
            for (int i = 0; i < _sorted_nums_vec.size(); ++i) {
173
16
                if (!_sorted_nums_vec[i].empty()) {
174
16
                    max_heap.emplace(_sorted_nums_vec[i][_sorted_nums_vec[i].size() - 1], i,
175
16
                                     _sorted_nums_vec[i].size() - 1);
176
16
                }
177
16
            }
178
179
10
            while (!max_heap.empty()) {
180
10
                Node node = max_heap.top();
181
10
                max_heap.pop();
182
10
                if (count == target) {
183
4
                    second_number = node.value;
184
6
                } else if (count == target + 1) {
185
4
                    first_number = node.value;
186
4
                    break;
187
4
                }
188
6
                ++count;
189
6
                if (--node.element_index >= 0) {
190
0
                    node.value = _sorted_nums_vec[node.array_index][node.element_index];
191
0
                    max_heap.push(node);
192
0
                }
193
6
            }
194
195
44
        } else {
196
44
            std::priority_queue<Node, std::vector<Node>, std::greater<Node>> min_heap;
197
172
            for (int i = 0; i < _sorted_nums_vec.size(); ++i) {
198
128
                if (!_sorted_nums_vec[i].empty()) {
199
128
                    min_heap.emplace(_sorted_nums_vec[i][0], i, 0);
200
128
                }
201
128
            }
202
203
104
            while (!min_heap.empty()) {
204
104
                Node node = min_heap.top();
205
104
                min_heap.pop();
206
104
                if (count == target) {
207
44
                    first_number = node.value;
208
60
                } else if (count == target + 1) {
209
44
                    second_number = node.value;
210
44
                    break;
211
44
                }
212
60
                ++count;
213
60
                if (++node.element_index < _sorted_nums_vec[node.array_index].size()) {
214
8
                    node.value = _sorted_nums_vec[node.array_index][node.element_index];
215
8
                    min_heap.push(node);
216
8
                }
217
60
            }
218
44
        }
219
220
48
        return {first_number, second_number};
221
48
    }
222
223
    vectorized::PODArray<Ty> _nums;
224
    std::vector<vectorized::PODArray<Ty>> _sorted_nums_vec;
225
};
226
227
} // namespace doris