Coverage Report

Created: 2024-11-18 10:37

/root/doris/be/src/util/counts.h
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <algorithm>
21
#include <cmath>
22
#include <unordered_map>
23
#include <vector>
24
25
#include "udf/udf.h"
26
27
namespace doris {
28
29
class Counts {
30
public:
31
3
    Counts() = default;
32
33
1
    inline void merge(const Counts* other) {
34
1
        if (other == nullptr || other->_counts.empty()) {
35
0
            return;
36
0
        }
37
38
5
        for (auto& cell : other->_counts) {
39
5
            increment(cell.first, cell.second);
40
5
        }
41
1
    }
42
43
17
    void increment(int64_t key, uint32_t i) {
44
17
        auto item = _counts.find(key);
45
17
        if (item != _counts.end()) {
46
2
            item->second += i;
47
15
        } else {
48
15
            _counts.emplace(std::make_pair(key, i));
49
15
        }
50
17
    }
51
52
1
    uint32_t serialized_size() const {
53
1
        return sizeof(uint32_t) + sizeof(int64_t) * _counts.size() +
54
1
               sizeof(uint32_t) * _counts.size();
55
1
    }
56
57
1
    void serialize(uint8_t* writer) const {
58
1
        uint32_t size = _counts.size();
59
1
        memcpy(writer, &size, sizeof(uint32_t));
60
1
        writer += sizeof(uint32_t);
61
6
        for (auto& cell : _counts) {
62
6
            memcpy(writer, &cell.first, sizeof(int64_t));
63
6
            writer += sizeof(int64_t);
64
6
            memcpy(writer, &cell.second, sizeof(uint32_t));
65
6
            writer += sizeof(uint32_t);
66
6
        }
67
1
    }
68
69
1
    void unserialize(const uint8_t* type_reader) {
70
1
        uint32_t size;
71
1
        memcpy(&size, type_reader, sizeof(uint32_t));
72
1
        type_reader += sizeof(uint32_t);
73
7
        for (uint32_t i = 0; i < size; ++i) {
74
6
            int64_t key;
75
6
            uint32_t count;
76
6
            memcpy(&key, type_reader, sizeof(int64_t));
77
6
            type_reader += sizeof(int64_t);
78
6
            memcpy(&count, type_reader, sizeof(uint32_t));
79
6
            type_reader += sizeof(uint32_t);
80
6
            _counts.emplace(std::make_pair(key, count));
81
6
        }
82
1
    }
83
84
    double get_percentile(std::vector<std::pair<int64_t, uint32_t>>& counts,
85
3
                          double position) const {
86
3
        long lower = std::floor(position);
87
3
        long higher = std::ceil(position);
88
89
3
        auto iter = counts.begin();
90
5
        for (; iter != counts.end() && iter->second < lower + 1; ++iter)
91
2
            ;
92
93
3
        int64_t lower_key = iter->first;
94
3
        if (higher == lower) {
95
0
            return lower_key;
96
0
        }
97
98
3
        if (iter->second < higher + 1) {
99
1
            iter++;
100
1
        }
101
102
3
        int64_t higher_key = iter->first;
103
3
        if (lower_key == higher_key) {
104
2
            return lower_key;
105
2
        }
106
107
1
        return (higher - position) * lower_key + (position - lower) * higher_key;
108
3
    }
109
110
3
    double terminate(double quantile) const {
111
3
        if (_counts.empty()) {
112
            // Although set null here, but the value is 0.0 and the call method just
113
            // get val in aggregate_function_percentile_approx.h
114
0
            return 0.0;
115
0
        }
116
117
3
        std::vector<std::pair<int64_t, uint32_t>> elems(_counts.begin(), _counts.end());
118
3
        sort(elems.begin(), elems.end(),
119
53
             [](const std::pair<int64_t, uint32_t> l, const std::pair<int64_t, uint32_t> r) {
120
53
                 return l.first < r.first;
121
53
             });
122
123
3
        long total = 0;
124
22
        for (auto& cell : elems) {
125
22
            total += cell.second;
126
22
            cell.second = total;
127
22
        }
128
129
3
        long max_position = total - 1;
130
3
        double position = max_position * quantile;
131
3
        return get_percentile(elems, position);
132
3
    }
133
134
private:
135
    std::unordered_map<int64_t, uint32_t> _counts;
136
};
137
138
} // namespace doris