Coverage Report

Created: 2026-04-10 16:11

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/util/frame_of_reference_coding.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <stdint.h>
21
22
#include <cstdlib>
23
#include <vector>
24
25
#include "common/cast_set.h"
26
#include "core/uint24.h"
27
#include "storage/olap_common.h"
28
#include "util/faststring.h"
29
30
namespace doris {
31
32
806k
inline uint8_t leading_zeroes(const uint64_t v) {
33
806k
    if (v == 0) {
34
2.12k
        return 64;
35
2.12k
    }
36
804k
    return cast_set<uint8_t>(__builtin_clzll(v));
37
806k
}
38
39
44.8k
inline uint8_t bits_less_than_64(const uint64_t v) {
40
44.8k
    return 64 - leading_zeroes(v);
41
44.8k
}
42
43
// See https://stackoverflow.com/questions/28423405/counting-the-number-of-leading-zeros-in-a-128-bit-integer
44
41.5k
inline uint8_t bits_may_more_than_64(const uint128_t v) {
45
    // See https://stackoverflow.com/questions/49580083/builtin-clz-returns-incorrect-value-for-input-zero
46
41.5k
    if (v == 0) {
47
0
        return 0;
48
0
    }
49
41.5k
    uint64_t hi = v >> 64;
50
41.5k
    auto lo = static_cast<uint64_t>(v); // Use static_cast to get low 64 bits without range check
51
41.5k
    int z[3] = {leading_zeroes(hi), leading_zeroes(lo) + 64, 128};
52
41.5k
    int idx = !hi + ((!lo) & (!hi));
53
41.5k
    return cast_set<uint8_t>(128 - z[idx]);
54
41.5k
}
55
56
template <typename T>
57
86.4k
uint8_t bits(const T v) {
58
86.4k
    if (sizeof(T) <= 8) {
59
44.8k
        return bits_less_than_64(static_cast<uint64_t>(v));
60
44.8k
    } else {
61
41.5k
        return bits_may_more_than_64(static_cast<uint128_t>(v));
62
41.5k
    }
63
86.4k
}
_ZN5doris4bitsIiEEhT_
Line
Count
Source
57
762
uint8_t bits(const T v) {
58
762
    if (sizeof(T) <= 8) {
59
762
        return bits_less_than_64(static_cast<uint64_t>(v));
60
762
    } else {
61
0
        return bits_may_more_than_64(static_cast<uint128_t>(v));
62
0
    }
63
762
}
Unexecuted instantiation: _ZN5doris4bitsIaEEhT_
Unexecuted instantiation: _ZN5doris4bitsIsEEhT_
_ZN5doris4bitsIlEEhT_
Line
Count
Source
57
43.3k
uint8_t bits(const T v) {
58
43.3k
    if (sizeof(T) <= 8) {
59
43.3k
        return bits_less_than_64(static_cast<uint64_t>(v));
60
43.3k
    } else {
61
0
        return bits_may_more_than_64(static_cast<uint128_t>(v));
62
0
    }
63
43.3k
}
_ZN5doris4bitsInEEhT_
Line
Count
Source
57
41.5k
uint8_t bits(const T v) {
58
41.5k
    if (sizeof(T) <= 8) {
59
0
        return bits_less_than_64(static_cast<uint64_t>(v));
60
41.5k
    } else {
61
41.5k
        return bits_may_more_than_64(static_cast<uint128_t>(v));
62
41.5k
    }
63
41.5k
}
Unexecuted instantiation: _ZN5doris4bitsIhEEhT_
Unexecuted instantiation: _ZN5doris4bitsItEEhT_
_ZN5doris4bitsIjEEhT_
Line
Count
Source
57
762
uint8_t bits(const T v) {
58
762
    if (sizeof(T) <= 8) {
59
762
        return bits_less_than_64(static_cast<uint64_t>(v));
60
762
    } else {
61
0
        return bits_may_more_than_64(static_cast<uint128_t>(v));
62
0
    }
63
762
}
Unexecuted instantiation: _ZN5doris4bitsImEEhT_
Unexecuted instantiation: _ZN5doris4bitsINS_8uint24_tEEEhT_
Unexecuted instantiation: _ZN5doris4bitsIoEEhT_
64
65
// The implementation for frame-of-reference coding
66
// The detail of frame-of-reference coding, please refer to
67
// https://lemire.me/blog/2012/02/08/effective-compression-using-frame-of-reference-and-delta-coding/
68
// and https://www.elastic.co/cn/blog/frame-of-reference-and-roaring-bitmaps
69
//
70
// The encoded data format is as follows:
71
//
72
// 1. Body:
73
//      BitPackingFrame * FrameCount
74
// 2. Footer:
75
//
76
//      (8 bit StorageFormat + 8 bit BitWidth) * FrameCount
77
//       8 bit FrameValueNum
78
//      32 bit ValuesNum
79
//
80
// There are currently three storage formats
81
// (1) if the StorageFormat == 0: When input data order is not ascending and the BitPackingFrame format is:
82
//          MinValue, (Value[i] - MinVale) * FrameValueNum
83
//
84
// (2) if the StorageFormat == 1: When input data order is ascending and the BitPackingFrame format is:
85
//      MinValue, (Value[i] - Value[i - 1]) * FrameValueNum
86
//
87
// (3) if the StorageFormat == 2:  When overflow occurs when using (1) or (2) and save original values:
88
//      MinValue, (Value[i]) * FrameValueNum
89
//
90
// len(MinValue) can be 32(uint32_t), 64(uint64_t), 128(uint128_t)
91
//
92
// The OrderFlag is 1 represents ascending order, 0 represents  not ascending order
93
// The last frame value num maybe less than 128
94
template <typename T>
95
class ForEncoder {
96
public:
97
32.6k
    explicit ForEncoder(faststring* buffer) : _buffer(buffer) {}
Unexecuted instantiation: _ZN5doris10ForEncoderIaEC2EPNS_10faststringE
Unexecuted instantiation: _ZN5doris10ForEncoderIsEC2EPNS_10faststringE
_ZN5doris10ForEncoderIiEC2EPNS_10faststringE
Line
Count
Source
97
7
    explicit ForEncoder(faststring* buffer) : _buffer(buffer) {}
_ZN5doris10ForEncoderIlEC2EPNS_10faststringE
Line
Count
Source
97
16.3k
    explicit ForEncoder(faststring* buffer) : _buffer(buffer) {}
_ZN5doris10ForEncoderInEC2EPNS_10faststringE
Line
Count
Source
97
16.3k
    explicit ForEncoder(faststring* buffer) : _buffer(buffer) {}
Unexecuted instantiation: _ZN5doris10ForEncoderINS_8uint24_tEEC2EPNS_10faststringE
_ZN5doris10ForEncoderIjEC2EPNS_10faststringE
Line
Count
Source
97
3
    explicit ForEncoder(faststring* buffer) : _buffer(buffer) {}
Unexecuted instantiation: _ZN5doris10ForEncoderImEC2EPNS_10faststringE
Unexecuted instantiation: _ZN5doris10ForEncoderIhEC2EPNS_10faststringE
Unexecuted instantiation: _ZN5doris10ForEncoderItEC2EPNS_10faststringE
Unexecuted instantiation: _ZN5doris10ForEncoderIoEC2EPNS_10faststringE
98
99
4.17M
    void put(const T value) { return put_batch(&value, 1); }
Unexecuted instantiation: _ZN5doris10ForEncoderIaE3putEa
Unexecuted instantiation: _ZN5doris10ForEncoderIsE3putEs
_ZN5doris10ForEncoderIiE3putEi
Line
Count
Source
99
3
    void put(const T value) { return put_batch(&value, 1); }
_ZN5doris10ForEncoderIlE3putEl
Line
Count
Source
99
2.08M
    void put(const T value) { return put_batch(&value, 1); }
_ZN5doris10ForEncoderInE3putEn
Line
Count
Source
99
2.08M
    void put(const T value) { return put_batch(&value, 1); }
Unexecuted instantiation: _ZN5doris10ForEncoderIhE3putEh
Unexecuted instantiation: _ZN5doris10ForEncoderItE3putEt
Unexecuted instantiation: _ZN5doris10ForEncoderIjE3putEj
Unexecuted instantiation: _ZN5doris10ForEncoderImE3putEm
Unexecuted instantiation: _ZN5doris10ForEncoderINS_8uint24_tEE3putES1_
Unexecuted instantiation: _ZN5doris10ForEncoderIoE3putEo
100
101
    void put_batch(const T* value, size_t count);
102
103
    // Flushes any pending values to the underlying buffer.
104
    // Returns the total number of bytes written
105
    uint32_t flush();
106
107
    // underlying buffer size + footer meta size.
108
    // Note: should call this method before flush.
109
0
    uint32_t len() {
110
0
        return cast_set<uint32_t>(_buffer->size() + _storage_formats.size() + _bit_widths.size() +
111
0
                                  5);
112
0
    }
Unexecuted instantiation: _ZN5doris10ForEncoderIaE3lenEv
Unexecuted instantiation: _ZN5doris10ForEncoderIsE3lenEv
Unexecuted instantiation: _ZN5doris10ForEncoderIiE3lenEv
Unexecuted instantiation: _ZN5doris10ForEncoderIlE3lenEv
Unexecuted instantiation: _ZN5doris10ForEncoderInE3lenEv
Unexecuted instantiation: _ZN5doris10ForEncoderINS_8uint24_tEE3lenEv
Unexecuted instantiation: _ZN5doris10ForEncoderIjE3lenEv
Unexecuted instantiation: _ZN5doris10ForEncoderImE3lenEv
Unexecuted instantiation: _ZN5doris10ForEncoderIhE3lenEv
Unexecuted instantiation: _ZN5doris10ForEncoderItE3lenEv
Unexecuted instantiation: _ZN5doris10ForEncoderIoE3lenEv
113
114
    // Resets all the state in the encoder.
115
0
    void clear() {
116
0
        _values_num = 0;
117
0
        _buffered_values_num = 0;
118
0
        _buffer->clear();
119
0
    }
Unexecuted instantiation: _ZN5doris10ForEncoderIaE5clearEv
Unexecuted instantiation: _ZN5doris10ForEncoderIsE5clearEv
Unexecuted instantiation: _ZN5doris10ForEncoderIiE5clearEv
Unexecuted instantiation: _ZN5doris10ForEncoderIlE5clearEv
Unexecuted instantiation: _ZN5doris10ForEncoderInE5clearEv
Unexecuted instantiation: _ZN5doris10ForEncoderINS_8uint24_tEE5clearEv
Unexecuted instantiation: _ZN5doris10ForEncoderIjE5clearEv
Unexecuted instantiation: _ZN5doris10ForEncoderImE5clearEv
Unexecuted instantiation: _ZN5doris10ForEncoderIhE5clearEv
Unexecuted instantiation: _ZN5doris10ForEncoderItE5clearEv
Unexecuted instantiation: _ZN5doris10ForEncoderIoE5clearEv
120
121
private:
122
    void bit_pack(const T* input, uint8_t in_num, int bit_width, uint8_t* output);
123
124
    void bit_pack_8(const T* input, uint8_t in_num, int bit_width, uint8_t* output);
125
126
    template <typename U>
127
    void bit_pack_4(const T* input, uint8_t in_num, int bit_width, uint8_t* output);
128
129
    void bit_pack_1(const T* input, uint8_t in_num, int bit_width, uint8_t* output);
130
131
    void bit_packing_one_frame_value(const T* input);
132
133
    const T* copy_value(const T* val, size_t count);
134
135
    const T numeric_limits_max();
136
137
    uint32_t _values_num = 0;
138
    uint8_t _buffered_values_num = 0;
139
    static const uint8_t FRAME_VALUE_NUM = 128;
140
    T _buffered_values[FRAME_VALUE_NUM];
141
142
    faststring* _buffer = nullptr;
143
    std::vector<uint8_t> _storage_formats;
144
    std::vector<uint8_t> _bit_widths;
145
};
146
147
template <typename T>
148
class ForDecoder {
149
public:
150
    explicit ForDecoder(const uint8_t* in_buffer, size_t buffer_len)
151
32.6k
            : _buffer(in_buffer), _buffer_len(buffer_len), _parsed(false) {}
Unexecuted instantiation: _ZN5doris10ForDecoderIaEC2EPKhm
Unexecuted instantiation: _ZN5doris10ForDecoderIsEC2EPKhm
_ZN5doris10ForDecoderIiEC2EPKhm
Line
Count
Source
151
7
            : _buffer(in_buffer), _buffer_len(buffer_len), _parsed(false) {}
_ZN5doris10ForDecoderIlEC2EPKhm
Line
Count
Source
151
16.3k
            : _buffer(in_buffer), _buffer_len(buffer_len), _parsed(false) {}
_ZN5doris10ForDecoderInEC2EPKhm
Line
Count
Source
151
16.3k
            : _buffer(in_buffer), _buffer_len(buffer_len), _parsed(false) {}
Unexecuted instantiation: _ZN5doris10ForDecoderINS_8uint24_tEEC2EPKhm
_ZN5doris10ForDecoderIjEC2EPKhm
Line
Count
Source
151
3
            : _buffer(in_buffer), _buffer_len(buffer_len), _parsed(false) {}
Unexecuted instantiation: _ZN5doris10ForDecoderImEC2EPKhm
Unexecuted instantiation: _ZN5doris10ForDecoderIhEC2EPKhm
Unexecuted instantiation: _ZN5doris10ForDecoderItEC2EPKhm
Unexecuted instantiation: _ZN5doris10ForDecoderIoEC2EPKhm
152
153
    // read footer metadata
154
    bool init();
155
156
4.17M
    bool get(T* val) { return get_batch(val, 1); }
Unexecuted instantiation: _ZN5doris10ForDecoderIaE3getEPa
Unexecuted instantiation: _ZN5doris10ForDecoderIsE3getEPs
_ZN5doris10ForDecoderIiE3getEPi
Line
Count
Source
156
4
    bool get(T* val) { return get_batch(val, 1); }
_ZN5doris10ForDecoderIlE3getEPl
Line
Count
Source
156
2.08M
    bool get(T* val) { return get_batch(val, 1); }
_ZN5doris10ForDecoderInE3getEPn
Line
Count
Source
156
2.08M
    bool get(T* val) { return get_batch(val, 1); }
Unexecuted instantiation: _ZN5doris10ForDecoderIhE3getEPh
Unexecuted instantiation: _ZN5doris10ForDecoderItE3getEPt
Unexecuted instantiation: _ZN5doris10ForDecoderIjE3getEPj
Unexecuted instantiation: _ZN5doris10ForDecoderImE3getEPm
Unexecuted instantiation: _ZN5doris10ForDecoderINS_8uint24_tEE3getEPS1_
Unexecuted instantiation: _ZN5doris10ForDecoderIoE3getEPo
157
158
    // Gets the next batch value.  Returns false if there are no more.
159
    bool get_batch(T* val, size_t count);
160
161
    // The skip_num is positive means move forwards
162
    // The skip_num is negative means move backwards
163
    bool skip(int32_t skip_num);
164
165
    bool seek_at_or_after_value(const void* value, bool* exact_match);
166
167
0
    uint32_t current_index() const { return _current_index; }
Unexecuted instantiation: _ZNK5doris10ForDecoderIaE13current_indexEv
Unexecuted instantiation: _ZNK5doris10ForDecoderIsE13current_indexEv
Unexecuted instantiation: _ZNK5doris10ForDecoderIiE13current_indexEv
Unexecuted instantiation: _ZNK5doris10ForDecoderIlE13current_indexEv
Unexecuted instantiation: _ZNK5doris10ForDecoderInE13current_indexEv
Unexecuted instantiation: _ZNK5doris10ForDecoderINS_8uint24_tEE13current_indexEv
Unexecuted instantiation: _ZNK5doris10ForDecoderIjE13current_indexEv
Unexecuted instantiation: _ZNK5doris10ForDecoderImE13current_indexEv
Unexecuted instantiation: _ZNK5doris10ForDecoderIhE13current_indexEv
Unexecuted instantiation: _ZNK5doris10ForDecoderItE13current_indexEv
Unexecuted instantiation: _ZNK5doris10ForDecoderIoE13current_indexEv
168
169
0
    uint32_t count() const { return _values_num; }
Unexecuted instantiation: _ZNK5doris10ForDecoderIaE5countEv
Unexecuted instantiation: _ZNK5doris10ForDecoderIsE5countEv
Unexecuted instantiation: _ZNK5doris10ForDecoderIiE5countEv
Unexecuted instantiation: _ZNK5doris10ForDecoderIlE5countEv
Unexecuted instantiation: _ZNK5doris10ForDecoderInE5countEv
Unexecuted instantiation: _ZNK5doris10ForDecoderINS_8uint24_tEE5countEv
Unexecuted instantiation: _ZNK5doris10ForDecoderIjE5countEv
Unexecuted instantiation: _ZNK5doris10ForDecoderImE5countEv
Unexecuted instantiation: _ZNK5doris10ForDecoderIhE5countEv
Unexecuted instantiation: _ZNK5doris10ForDecoderItE5countEv
Unexecuted instantiation: _ZNK5doris10ForDecoderIoE5countEv
170
171
private:
172
    void bit_unpack(const uint8_t* input, uint8_t in_num, int bit_width, T* output);
173
174
    template <typename U>
175
    void bit_unpack_optimize(const uint8_t* input, uint8_t in_num, int bit_width, T* output);
176
177
48.9k
    uint32_t frame_size(uint32_t frame_index) {
178
48.9k
        return (frame_index == _frame_count - 1) ? _last_frame_size : _max_frame_size;
179
48.9k
    }
Unexecuted instantiation: _ZN5doris10ForDecoderIaE10frame_sizeEj
Unexecuted instantiation: _ZN5doris10ForDecoderIsE10frame_sizeEj
_ZN5doris10ForDecoderIiE10frame_sizeEj
Line
Count
Source
177
9
    uint32_t frame_size(uint32_t frame_index) {
178
9
        return (frame_index == _frame_count - 1) ? _last_frame_size : _max_frame_size;
179
9
    }
_ZN5doris10ForDecoderIlE10frame_sizeEj
Line
Count
Source
177
24.4k
    uint32_t frame_size(uint32_t frame_index) {
178
24.4k
        return (frame_index == _frame_count - 1) ? _last_frame_size : _max_frame_size;
179
24.4k
    }
_ZN5doris10ForDecoderInE10frame_sizeEj
Line
Count
Source
177
24.4k
    uint32_t frame_size(uint32_t frame_index) {
178
24.4k
        return (frame_index == _frame_count - 1) ? _last_frame_size : _max_frame_size;
179
24.4k
    }
Unexecuted instantiation: _ZN5doris10ForDecoderIhE10frame_sizeEj
Unexecuted instantiation: _ZN5doris10ForDecoderItE10frame_sizeEj
_ZN5doris10ForDecoderIjE10frame_sizeEj
Line
Count
Source
177
5
    uint32_t frame_size(uint32_t frame_index) {
178
5
        return (frame_index == _frame_count - 1) ? _last_frame_size : _max_frame_size;
179
5
    }
Unexecuted instantiation: _ZN5doris10ForDecoderImE10frame_sizeEj
Unexecuted instantiation: _ZN5doris10ForDecoderINS_8uint24_tEE10frame_sizeEj
Unexecuted instantiation: _ZN5doris10ForDecoderIoE10frame_sizeEj
180
181
    void decode_current_frame(T* output);
182
183
    T decode_frame_min_value(uint32_t frame_index);
184
185
    // Return index of the last frame which contains value < target.
186
    // Return `_frame_count - 1` when all frames are < target.
187
    // Return `_frame_count` when not found (all frames are >= target).
188
    uint32_t seek_last_frame_before_value(T target);
189
190
    // Seek to the first value in frame that >= target.
191
    // Return true when found and update exact_match.
192
    // Return false otherwise.
193
    bool seek_lower_bound_inside_frame(uint32_t frame_index, T target, bool* exact_match);
194
195
    T* copy_value(T* val, size_t count);
196
197
    const uint8_t* _buffer = nullptr;
198
    size_t _buffer_len = 0;
199
    bool _parsed = false;
200
201
    uint8_t _max_frame_size = 0;
202
    uint8_t _last_frame_size = 0;
203
    uint32_t _values_num = 0;
204
    uint32_t _frame_count = 0;
205
    std::vector<uint32_t> _frame_offsets;
206
    std::vector<uint8_t> _bit_widths;
207
    std::vector<uint8_t> _storage_formats;
208
209
    uint32_t _current_index = 0;
210
    uint32_t _current_decoded_frame = -1;
211
    std::vector<T> _out_buffer; // store values of decoded frame
212
};
213
} // namespace doris