Coverage Report

Created: 2026-03-13 19:41

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/util/frame_of_reference_coding.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <stdint.h>
21
22
#include <cstdlib>
23
#include <vector>
24
25
#include "common/cast_set.h"
26
#include "core/uint24.h"
27
#include "storage/olap_common.h"
28
#include "util/faststring.h"
29
30
namespace doris {
31
#include "common/compile_check_begin.h"
32
33
134k
inline uint8_t leading_zeroes(const uint64_t v) {
34
134k
    if (v == 0) {
35
2.12k
        return 64;
36
2.12k
    }
37
132k
    return cast_set<uint8_t>(__builtin_clzll(v));
38
134k
}
39
40
44.8k
inline uint8_t bits_less_than_64(const uint64_t v) {
41
44.8k
    return 64 - leading_zeroes(v);
42
44.8k
}
43
44
// See https://stackoverflow.com/questions/28423405/counting-the-number-of-leading-zeros-in-a-128-bit-integer
45
41.5k
inline uint8_t bits_may_more_than_64(const uint128_t v) {
46
    // See https://stackoverflow.com/questions/49580083/builtin-clz-returns-incorrect-value-for-input-zero
47
41.5k
    if (v == 0) {
48
0
        return 0;
49
0
    }
50
41.5k
    uint64_t hi = v >> 64;
51
41.5k
    auto lo = static_cast<uint64_t>(v); // Use static_cast to get low 64 bits without range check
52
41.5k
    int z[3] = {leading_zeroes(hi), leading_zeroes(lo) + 64, 128};
53
41.5k
    int idx = !hi + ((!lo) & (!hi));
54
41.5k
    return cast_set<uint8_t>(128 - z[idx]);
55
41.5k
}
56
57
template <typename T>
58
86.4k
uint8_t bits(const T v) {
59
86.4k
    if (sizeof(T) <= 8) {
60
44.8k
        return bits_less_than_64(static_cast<uint64_t>(v));
61
44.8k
    } else {
62
41.5k
        return bits_may_more_than_64(static_cast<uint128_t>(v));
63
41.5k
    }
64
86.4k
}
_ZN5doris4bitsIiEEhT_
Line
Count
Source
58
762
uint8_t bits(const T v) {
59
762
    if (sizeof(T) <= 8) {
60
762
        return bits_less_than_64(static_cast<uint64_t>(v));
61
762
    } else {
62
0
        return bits_may_more_than_64(static_cast<uint128_t>(v));
63
0
    }
64
762
}
Unexecuted instantiation: _ZN5doris4bitsIaEEhT_
Unexecuted instantiation: _ZN5doris4bitsIsEEhT_
_ZN5doris4bitsIlEEhT_
Line
Count
Source
58
43.3k
uint8_t bits(const T v) {
59
43.3k
    if (sizeof(T) <= 8) {
60
43.3k
        return bits_less_than_64(static_cast<uint64_t>(v));
61
43.3k
    } else {
62
0
        return bits_may_more_than_64(static_cast<uint128_t>(v));
63
0
    }
64
43.3k
}
_ZN5doris4bitsInEEhT_
Line
Count
Source
58
41.5k
uint8_t bits(const T v) {
59
41.5k
    if (sizeof(T) <= 8) {
60
0
        return bits_less_than_64(static_cast<uint64_t>(v));
61
41.5k
    } else {
62
41.5k
        return bits_may_more_than_64(static_cast<uint128_t>(v));
63
41.5k
    }
64
41.5k
}
Unexecuted instantiation: _ZN5doris4bitsIhEEhT_
Unexecuted instantiation: _ZN5doris4bitsItEEhT_
_ZN5doris4bitsIjEEhT_
Line
Count
Source
58
762
uint8_t bits(const T v) {
59
762
    if (sizeof(T) <= 8) {
60
762
        return bits_less_than_64(static_cast<uint64_t>(v));
61
762
    } else {
62
0
        return bits_may_more_than_64(static_cast<uint128_t>(v));
63
0
    }
64
762
}
Unexecuted instantiation: _ZN5doris4bitsImEEhT_
Unexecuted instantiation: _ZN5doris4bitsINS_8uint24_tEEEhT_
Unexecuted instantiation: _ZN5doris4bitsIoEEhT_
65
66
// The implementation for frame-of-reference coding
67
// The detail of frame-of-reference coding, please refer to
68
// https://lemire.me/blog/2012/02/08/effective-compression-using-frame-of-reference-and-delta-coding/
69
// and https://www.elastic.co/cn/blog/frame-of-reference-and-roaring-bitmaps
70
//
71
// The encoded data format is as follows:
72
//
73
// 1. Body:
74
//      BitPackingFrame * FrameCount
75
// 2. Footer:
76
//
77
//      (8 bit StorageFormat + 8 bit BitWidth) * FrameCount
78
//       8 bit FrameValueNum
79
//      32 bit ValuesNum
80
//
81
// There are currently three storage formats
82
// (1) if the StorageFormat == 0: When input data order is not ascending and the BitPackingFrame format is:
83
//          MinValue, (Value[i] - MinVale) * FrameValueNum
84
//
85
// (2) if the StorageFormat == 1: When input data order is ascending and the BitPackingFrame format is:
86
//      MinValue, (Value[i] - Value[i - 1]) * FrameValueNum
87
//
88
// (3) if the StorageFormat == 2:  When overflow occurs when using (1) or (2) and save original values:
89
//      MinValue, (Value[i]) * FrameValueNum
90
//
91
// len(MinValue) can be 32(uint32_t), 64(uint64_t), 128(uint128_t)
92
//
93
// The OrderFlag is 1 represents ascending order, 0 represents  not ascending order
94
// The last frame value num maybe less than 128
95
template <typename T>
96
class ForEncoder {
97
public:
98
32.6k
    explicit ForEncoder(faststring* buffer) : _buffer(buffer) {}
_ZN5doris10ForEncoderIiEC2EPNS_10faststringE
Line
Count
Source
98
7
    explicit ForEncoder(faststring* buffer) : _buffer(buffer) {}
_ZN5doris10ForEncoderIjEC2EPNS_10faststringE
Line
Count
Source
98
3
    explicit ForEncoder(faststring* buffer) : _buffer(buffer) {}
_ZN5doris10ForEncoderIlEC2EPNS_10faststringE
Line
Count
Source
98
16.3k
    explicit ForEncoder(faststring* buffer) : _buffer(buffer) {}
_ZN5doris10ForEncoderInEC2EPNS_10faststringE
Line
Count
Source
98
16.3k
    explicit ForEncoder(faststring* buffer) : _buffer(buffer) {}
Unexecuted instantiation: _ZN5doris10ForEncoderIaEC2EPNS_10faststringE
Unexecuted instantiation: _ZN5doris10ForEncoderIsEC2EPNS_10faststringE
Unexecuted instantiation: _ZN5doris10ForEncoderINS_8uint24_tEEC2EPNS_10faststringE
Unexecuted instantiation: _ZN5doris10ForEncoderImEC2EPNS_10faststringE
Unexecuted instantiation: _ZN5doris10ForEncoderIhEC2EPNS_10faststringE
Unexecuted instantiation: _ZN5doris10ForEncoderItEC2EPNS_10faststringE
Unexecuted instantiation: _ZN5doris10ForEncoderIoEC2EPNS_10faststringE
99
100
4.17M
    void put(const T value) { return put_batch(&value, 1); }
_ZN5doris10ForEncoderIiE3putEi
Line
Count
Source
100
3
    void put(const T value) { return put_batch(&value, 1); }
_ZN5doris10ForEncoderIlE3putEl
Line
Count
Source
100
2.08M
    void put(const T value) { return put_batch(&value, 1); }
_ZN5doris10ForEncoderInE3putEn
Line
Count
Source
100
2.08M
    void put(const T value) { return put_batch(&value, 1); }
Unexecuted instantiation: _ZN5doris10ForEncoderIaE3putEa
Unexecuted instantiation: _ZN5doris10ForEncoderIsE3putEs
Unexecuted instantiation: _ZN5doris10ForEncoderIhE3putEh
Unexecuted instantiation: _ZN5doris10ForEncoderItE3putEt
Unexecuted instantiation: _ZN5doris10ForEncoderIjE3putEj
Unexecuted instantiation: _ZN5doris10ForEncoderImE3putEm
Unexecuted instantiation: _ZN5doris10ForEncoderINS_8uint24_tEE3putES1_
Unexecuted instantiation: _ZN5doris10ForEncoderIoE3putEo
101
102
    void put_batch(const T* value, size_t count);
103
104
    // Flushes any pending values to the underlying buffer.
105
    // Returns the total number of bytes written
106
    uint32_t flush();
107
108
    // underlying buffer size + footer meta size.
109
    // Note: should call this method before flush.
110
0
    uint32_t len() {
111
0
        return cast_set<uint32_t>(_buffer->size() + _storage_formats.size() + _bit_widths.size() +
112
0
                                  5);
113
0
    }
Unexecuted instantiation: _ZN5doris10ForEncoderIaE3lenEv
Unexecuted instantiation: _ZN5doris10ForEncoderIsE3lenEv
Unexecuted instantiation: _ZN5doris10ForEncoderIiE3lenEv
Unexecuted instantiation: _ZN5doris10ForEncoderIlE3lenEv
Unexecuted instantiation: _ZN5doris10ForEncoderInE3lenEv
Unexecuted instantiation: _ZN5doris10ForEncoderINS_8uint24_tEE3lenEv
Unexecuted instantiation: _ZN5doris10ForEncoderIjE3lenEv
Unexecuted instantiation: _ZN5doris10ForEncoderImE3lenEv
Unexecuted instantiation: _ZN5doris10ForEncoderIhE3lenEv
Unexecuted instantiation: _ZN5doris10ForEncoderItE3lenEv
Unexecuted instantiation: _ZN5doris10ForEncoderIoE3lenEv
114
115
    // Resets all the state in the encoder.
116
0
    void clear() {
117
0
        _values_num = 0;
118
0
        _buffered_values_num = 0;
119
0
        _buffer->clear();
120
0
    }
Unexecuted instantiation: _ZN5doris10ForEncoderIaE5clearEv
Unexecuted instantiation: _ZN5doris10ForEncoderIsE5clearEv
Unexecuted instantiation: _ZN5doris10ForEncoderIiE5clearEv
Unexecuted instantiation: _ZN5doris10ForEncoderIlE5clearEv
Unexecuted instantiation: _ZN5doris10ForEncoderInE5clearEv
Unexecuted instantiation: _ZN5doris10ForEncoderINS_8uint24_tEE5clearEv
Unexecuted instantiation: _ZN5doris10ForEncoderIjE5clearEv
Unexecuted instantiation: _ZN5doris10ForEncoderImE5clearEv
Unexecuted instantiation: _ZN5doris10ForEncoderIhE5clearEv
Unexecuted instantiation: _ZN5doris10ForEncoderItE5clearEv
Unexecuted instantiation: _ZN5doris10ForEncoderIoE5clearEv
121
122
private:
123
    void bit_pack(const T* input, uint8_t in_num, int bit_width, uint8_t* output);
124
125
    void bit_pack_8(const T* input, uint8_t in_num, int bit_width, uint8_t* output);
126
127
    template <typename U>
128
    void bit_pack_4(const T* input, uint8_t in_num, int bit_width, uint8_t* output);
129
130
    void bit_pack_1(const T* input, uint8_t in_num, int bit_width, uint8_t* output);
131
132
    void bit_packing_one_frame_value(const T* input);
133
134
    const T* copy_value(const T* val, size_t count);
135
136
    const T numeric_limits_max();
137
138
    uint32_t _values_num = 0;
139
    uint8_t _buffered_values_num = 0;
140
    static const uint8_t FRAME_VALUE_NUM = 128;
141
    T _buffered_values[FRAME_VALUE_NUM];
142
143
    faststring* _buffer = nullptr;
144
    std::vector<uint8_t> _storage_formats;
145
    std::vector<uint8_t> _bit_widths;
146
};
147
148
template <typename T>
149
class ForDecoder {
150
public:
151
    explicit ForDecoder(const uint8_t* in_buffer, size_t buffer_len)
152
32.6k
            : _buffer(in_buffer), _buffer_len(buffer_len), _parsed(false) {}
_ZN5doris10ForDecoderIiEC2EPKhm
Line
Count
Source
152
7
            : _buffer(in_buffer), _buffer_len(buffer_len), _parsed(false) {}
_ZN5doris10ForDecoderIjEC2EPKhm
Line
Count
Source
152
3
            : _buffer(in_buffer), _buffer_len(buffer_len), _parsed(false) {}
_ZN5doris10ForDecoderIlEC2EPKhm
Line
Count
Source
152
16.3k
            : _buffer(in_buffer), _buffer_len(buffer_len), _parsed(false) {}
_ZN5doris10ForDecoderInEC2EPKhm
Line
Count
Source
152
16.3k
            : _buffer(in_buffer), _buffer_len(buffer_len), _parsed(false) {}
Unexecuted instantiation: _ZN5doris10ForDecoderIaEC2EPKhm
Unexecuted instantiation: _ZN5doris10ForDecoderIsEC2EPKhm
Unexecuted instantiation: _ZN5doris10ForDecoderINS_8uint24_tEEC2EPKhm
Unexecuted instantiation: _ZN5doris10ForDecoderImEC2EPKhm
Unexecuted instantiation: _ZN5doris10ForDecoderIhEC2EPKhm
Unexecuted instantiation: _ZN5doris10ForDecoderItEC2EPKhm
Unexecuted instantiation: _ZN5doris10ForDecoderIoEC2EPKhm
153
154
    // read footer metadata
155
    bool init();
156
157
4.17M
    bool get(T* val) { return get_batch(val, 1); }
_ZN5doris10ForDecoderIiE3getEPi
Line
Count
Source
157
4
    bool get(T* val) { return get_batch(val, 1); }
_ZN5doris10ForDecoderIlE3getEPl
Line
Count
Source
157
2.08M
    bool get(T* val) { return get_batch(val, 1); }
_ZN5doris10ForDecoderInE3getEPn
Line
Count
Source
157
2.08M
    bool get(T* val) { return get_batch(val, 1); }
Unexecuted instantiation: _ZN5doris10ForDecoderIaE3getEPa
Unexecuted instantiation: _ZN5doris10ForDecoderIsE3getEPs
Unexecuted instantiation: _ZN5doris10ForDecoderIhE3getEPh
Unexecuted instantiation: _ZN5doris10ForDecoderItE3getEPt
Unexecuted instantiation: _ZN5doris10ForDecoderIjE3getEPj
Unexecuted instantiation: _ZN5doris10ForDecoderImE3getEPm
Unexecuted instantiation: _ZN5doris10ForDecoderINS_8uint24_tEE3getEPS1_
Unexecuted instantiation: _ZN5doris10ForDecoderIoE3getEPo
158
159
    // Gets the next batch value.  Returns false if there are no more.
160
    bool get_batch(T* val, size_t count);
161
162
    // The skip_num is positive means move forwards
163
    // The skip_num is negative means move backwards
164
    bool skip(int32_t skip_num);
165
166
    bool seek_at_or_after_value(const void* value, bool* exact_match);
167
168
0
    uint32_t current_index() const { return _current_index; }
Unexecuted instantiation: _ZNK5doris10ForDecoderIaE13current_indexEv
Unexecuted instantiation: _ZNK5doris10ForDecoderIsE13current_indexEv
Unexecuted instantiation: _ZNK5doris10ForDecoderIiE13current_indexEv
Unexecuted instantiation: _ZNK5doris10ForDecoderIlE13current_indexEv
Unexecuted instantiation: _ZNK5doris10ForDecoderInE13current_indexEv
Unexecuted instantiation: _ZNK5doris10ForDecoderINS_8uint24_tEE13current_indexEv
Unexecuted instantiation: _ZNK5doris10ForDecoderIjE13current_indexEv
Unexecuted instantiation: _ZNK5doris10ForDecoderImE13current_indexEv
Unexecuted instantiation: _ZNK5doris10ForDecoderIhE13current_indexEv
Unexecuted instantiation: _ZNK5doris10ForDecoderItE13current_indexEv
Unexecuted instantiation: _ZNK5doris10ForDecoderIoE13current_indexEv
169
170
0
    uint32_t count() const { return _values_num; }
Unexecuted instantiation: _ZNK5doris10ForDecoderIaE5countEv
Unexecuted instantiation: _ZNK5doris10ForDecoderIsE5countEv
Unexecuted instantiation: _ZNK5doris10ForDecoderIiE5countEv
Unexecuted instantiation: _ZNK5doris10ForDecoderIlE5countEv
Unexecuted instantiation: _ZNK5doris10ForDecoderInE5countEv
Unexecuted instantiation: _ZNK5doris10ForDecoderINS_8uint24_tEE5countEv
Unexecuted instantiation: _ZNK5doris10ForDecoderIjE5countEv
Unexecuted instantiation: _ZNK5doris10ForDecoderImE5countEv
Unexecuted instantiation: _ZNK5doris10ForDecoderIhE5countEv
Unexecuted instantiation: _ZNK5doris10ForDecoderItE5countEv
Unexecuted instantiation: _ZNK5doris10ForDecoderIoE5countEv
171
172
private:
173
    void bit_unpack(const uint8_t* input, uint8_t in_num, int bit_width, T* output);
174
175
    template <typename U>
176
    void bit_unpack_optimize(const uint8_t* input, uint8_t in_num, int bit_width, T* output);
177
178
48.9k
    uint32_t frame_size(uint32_t frame_index) {
179
48.9k
        return (frame_index == _frame_count - 1) ? _last_frame_size : _max_frame_size;
180
48.9k
    }
Unexecuted instantiation: _ZN5doris10ForDecoderIaE10frame_sizeEj
Unexecuted instantiation: _ZN5doris10ForDecoderIsE10frame_sizeEj
_ZN5doris10ForDecoderIiE10frame_sizeEj
Line
Count
Source
178
9
    uint32_t frame_size(uint32_t frame_index) {
179
9
        return (frame_index == _frame_count - 1) ? _last_frame_size : _max_frame_size;
180
9
    }
_ZN5doris10ForDecoderIlE10frame_sizeEj
Line
Count
Source
178
24.4k
    uint32_t frame_size(uint32_t frame_index) {
179
24.4k
        return (frame_index == _frame_count - 1) ? _last_frame_size : _max_frame_size;
180
24.4k
    }
_ZN5doris10ForDecoderInE10frame_sizeEj
Line
Count
Source
178
24.4k
    uint32_t frame_size(uint32_t frame_index) {
179
24.4k
        return (frame_index == _frame_count - 1) ? _last_frame_size : _max_frame_size;
180
24.4k
    }
Unexecuted instantiation: _ZN5doris10ForDecoderIhE10frame_sizeEj
Unexecuted instantiation: _ZN5doris10ForDecoderItE10frame_sizeEj
_ZN5doris10ForDecoderIjE10frame_sizeEj
Line
Count
Source
178
5
    uint32_t frame_size(uint32_t frame_index) {
179
5
        return (frame_index == _frame_count - 1) ? _last_frame_size : _max_frame_size;
180
5
    }
Unexecuted instantiation: _ZN5doris10ForDecoderImE10frame_sizeEj
Unexecuted instantiation: _ZN5doris10ForDecoderINS_8uint24_tEE10frame_sizeEj
Unexecuted instantiation: _ZN5doris10ForDecoderIoE10frame_sizeEj
181
182
    void decode_current_frame(T* output);
183
184
    T decode_frame_min_value(uint32_t frame_index);
185
186
    // Return index of the last frame which contains value < target.
187
    // Return `_frame_count - 1` when all frames are < target.
188
    // Return `_frame_count` when not found (all frames are >= target).
189
    uint32_t seek_last_frame_before_value(T target);
190
191
    // Seek to the first value in frame that >= target.
192
    // Return true when found and update exact_match.
193
    // Return false otherwise.
194
    bool seek_lower_bound_inside_frame(uint32_t frame_index, T target, bool* exact_match);
195
196
    T* copy_value(T* val, size_t count);
197
198
    const uint8_t* _buffer = nullptr;
199
    size_t _buffer_len = 0;
200
    bool _parsed = false;
201
202
    uint8_t _max_frame_size = 0;
203
    uint8_t _last_frame_size = 0;
204
    uint32_t _values_num = 0;
205
    uint32_t _frame_count = 0;
206
    std::vector<uint32_t> _frame_offsets;
207
    std::vector<uint8_t> _bit_widths;
208
    std::vector<uint8_t> _storage_formats;
209
210
    uint32_t _current_index = 0;
211
    uint32_t _current_decoded_frame = -1;
212
    std::vector<T> _out_buffer; // store values of decoded frame
213
};
214
#include "common/compile_check_end.h"
215
} // namespace doris