Coverage Report

Created: 2026-04-14 03:58

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/storage/segment/rle_page.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include "common/cast_set.h"
21
#include "storage/segment/options.h"      // for PageBuilderOptions/PageDecoderOptions
22
#include "storage/segment/page_builder.h" // for PageBuilder
23
#include "storage/segment/page_decoder.h" // for PageDecoder
24
#include "util/coding.h"                  // for encode_fixed32_le/decode_fixed32_le
25
#include "util/rle_encoding.h"            // for RleEncoder/RleDecoder
26
#include "util/slice.h"                   // for OwnedSlice
27
28
namespace doris {
29
namespace segment_v2 {
30
31
enum { RLE_PAGE_HEADER_SIZE = 4 };
32
33
// RLE builder for generic integer and bool types. What is missing is some way
34
// to enforce that this can only be instantiated for INT and BOOL types.
35
//
36
// The page format is as follows:
37
//
38
// 1. Header: (4 bytes total)
39
//
40
//    <num_elements> [32-bit]
41
//      The number of elements encoded in the page.
42
//
43
//    NOTE: all on-disk ints are encoded little-endian
44
//
45
// 2. Element data
46
//
47
//    The header is followed by the rle-encoded element data.
48
//
49
// This Rle encoding algorithm is only effective for repeated INT type and bool type,
50
// It is not good for sequence number or random number. BitshufflePage is recommended
51
// for these case.
52
//
53
// TODO(hkp): optimize rle algorithm
54
template <FieldType Type>
55
class RlePageBuilder : public PageBuilderHelper<RlePageBuilder<Type> > {
56
public:
57
    using Self = RlePageBuilder<Type>;
58
    friend class PageBuilderHelper<Self>;
59
60
420
    Status init() override {
61
420
        switch (Type) {
62
420
        case FieldType::OLAP_FIELD_TYPE_BOOL: {
63
420
            _bit_width = 1;
64
420
            break;
65
0
        }
66
0
        default: {
67
0
            _bit_width = SIZE_OF_TYPE * 8;
68
0
            break;
69
0
        }
70
420
        }
71
420
        _rle_encoder = new RleEncoder<CppType>(&_buf, _bit_width);
72
420
        return reset();
73
420
    }
74
75
420
    ~RlePageBuilder() { delete _rle_encoder; }
76
77
412
    bool is_page_full() override { return _rle_encoder->len() >= _options.data_page_size; }
78
79
412
    Status add(const uint8_t* vals, size_t* count) override {
80
412
        DCHECK(!_finished);
81
412
        auto new_vals = reinterpret_cast<const CppType*>(vals);
82
3.91k
        for (int i = 0; i < *count; ++i) {
83
            // note: vals is not guaranteed to be aligned for now, thus memcpy here
84
3.50k
            CppType value;
85
3.50k
            memcpy(&value, &new_vals[i], SIZE_OF_TYPE);
86
3.50k
            _rle_encoder->Put(value);
87
3.50k
        }
88
89
412
        if (_count == 0) {
90
366
            memcpy(&_first_value, new_vals, SIZE_OF_TYPE);
91
366
        }
92
412
        memcpy(&_last_value, &new_vals[*count - 1], SIZE_OF_TYPE);
93
94
412
        _count += *count;
95
412
        _raw_data_size += *count * SIZE_OF_TYPE;
96
412
        return Status::OK();
97
412
    }
98
99
412
    Status finish(OwnedSlice* slice) override {
100
412
        DCHECK(!_finished);
101
412
        _finished = true;
102
        // here should Flush first and then encode the count header
103
        // or it will lead to a bug if the header is less than 8 byte and the data is small
104
412
        _rle_encoder->Flush();
105
412
        encode_fixed32_le(&_buf[0], cast_set<uint32_t>(_count));
106
412
        *slice = _buf.build();
107
412
        return Status::OK();
108
412
    }
109
110
832
    Status reset() override {
111
832
        RETURN_IF_CATCH_EXCEPTION({
112
832
            _count = 0;
113
832
            _finished = false;
114
832
            _raw_data_size = 0;
115
832
            _rle_encoder->Clear();
116
832
            _rle_encoder->Reserve(RLE_PAGE_HEADER_SIZE, 0);
117
832
        });
118
832
        return Status::OK();
119
832
    }
120
121
0
    size_t count() const override { return _count; }
122
123
12
    uint64_t size() const override { return _rle_encoder->len(); }
124
125
412
    uint64_t get_raw_data_size() const override { return _raw_data_size; }
126
127
0
    Status get_first_value(void* value) const override {
128
0
        DCHECK(_finished);
129
0
        if (_count == 0) {
130
0
            return Status::Error<ErrorCode::ENTRY_NOT_FOUND>("page is empty");
131
0
        }
132
0
        memcpy(value, &_first_value, SIZE_OF_TYPE);
133
0
        return Status::OK();
134
0
    }
135
136
0
    Status get_last_value(void* value) const override {
137
0
        DCHECK(_finished);
138
0
        if (_count == 0) {
139
0
            return Status::Error<ErrorCode::ENTRY_NOT_FOUND>("page is empty");
140
0
        }
141
0
        memcpy(value, &_last_value, SIZE_OF_TYPE);
142
0
        return Status::OK();
143
0
    }
144
145
private:
146
    RlePageBuilder(const PageBuilderOptions& options)
147
420
            : _options(options),
148
420
              _count(0),
149
420
              _finished(false),
150
420
              _bit_width(0),
151
420
              _rle_encoder(nullptr) {}
152
153
    typedef typename TypeTraits<Type>::CppType CppType;
154
    enum { SIZE_OF_TYPE = TypeTraits<Type>::size };
155
156
    PageBuilderOptions _options;
157
    size_t _count;
158
    bool _finished;
159
    int _bit_width;
160
    RleEncoder<CppType>* _rle_encoder = nullptr;
161
    faststring _buf;
162
    CppType _first_value;
163
    CppType _last_value;
164
    uint64_t _raw_data_size = 0;
165
};
166
167
template <FieldType Type>
168
class RlePageDecoder : public PageDecoder {
169
public:
170
    RlePageDecoder(Slice slice, const PageDecoderOptions& options)
171
672
            : _data(slice),
172
672
              _options(options),
173
672
              _parsed(false),
174
672
              _num_elements(0),
175
672
              _cur_index(0),
176
672
              _bit_width(0) {}
177
178
672
    Status init() override {
179
672
        CHECK(!_parsed);
180
181
672
        if (_data.size < RLE_PAGE_HEADER_SIZE) {
182
0
            return Status::Corruption("not enough bytes for header in RleBitMapBlockDecoder");
183
0
        }
184
672
        _num_elements = decode_fixed32_le((const uint8_t*)&_data[0]);
185
186
672
        _parsed = true;
187
188
672
        switch (Type) {
189
672
        case FieldType::OLAP_FIELD_TYPE_BOOL: {
190
672
            _bit_width = 1;
191
672
            break;
192
0
        }
193
0
        default: {
194
0
            _bit_width = SIZE_OF_TYPE * 8;
195
0
            break;
196
0
        }
197
672
        }
198
199
672
        _rle_decoder =
200
672
                RleDecoder<CppType>((uint8_t*)_data.data + RLE_PAGE_HEADER_SIZE,
201
672
                                    cast_set<int>(_data.size - RLE_PAGE_HEADER_SIZE), _bit_width);
202
203
672
        RETURN_IF_ERROR(seek_to_position_in_page(0));
204
672
        return Status::OK();
205
672
    }
206
207
1.20k
    Status seek_to_position_in_page(size_t pos) override {
208
1.20k
        DCHECK(_parsed) << "Must call init()";
209
1.20k
        DCHECK_LE(pos, _num_elements)
210
0
                << "Tried to seek to " << pos << " which is > number of elements (" << _num_elements
211
0
                << ") in the block!";
212
        // If the block is empty (e.g. the column is filled with nulls), there is no data to seek.
213
1.20k
        if (_num_elements == 0) [[unlikely]] {
214
132
            if (pos != 0) {
215
0
                return Status::Error<ErrorCode::INTERNAL_ERROR, false>(
216
0
                        "seek pos {} is larger than total elements  {}", pos, _num_elements);
217
132
            } else {
218
132
                return Status::OK();
219
132
            }
220
132
        }
221
1.07k
        if (_cur_index == pos) {
222
            // No need to seek.
223
590
            return Status::OK();
224
590
        } else if (_cur_index < pos) {
225
280
            size_t nskip = pos - _cur_index;
226
280
            _rle_decoder.Skip(nskip);
227
280
        } else {
228
206
            _rle_decoder = RleDecoder<CppType>((uint8_t*)_data.data + RLE_PAGE_HEADER_SIZE,
229
206
                                               cast_set<int>(_data.size - RLE_PAGE_HEADER_SIZE),
230
206
                                               _bit_width);
231
206
            _rle_decoder.Skip(pos);
232
206
        }
233
486
        _cur_index = pos;
234
486
        return Status::OK();
235
1.07k
    }
236
237
828
    Status next_batch(size_t* n, MutableColumnPtr& dst) override {
238
828
        DCHECK(_parsed);
239
828
        if (*n == 0 || _cur_index >= _num_elements) [[unlikely]] {
240
0
            *n = 0;
241
0
            return Status::OK();
242
0
        }
243
244
828
        size_t to_fetch = std::min(*n, static_cast<size_t>(_num_elements - _cur_index));
245
828
        size_t remaining = to_fetch;
246
828
        bool result = false;
247
828
        CppType value;
248
5.29k
        while (remaining > 0) {
249
4.46k
            result = _rle_decoder.Get(&value);
250
4.46k
            DCHECK(result);
251
4.46k
            dst->insert_data((char*)(&value), SIZE_OF_TYPE);
252
4.46k
            remaining--;
253
4.46k
        }
254
255
828
        _cur_index += to_fetch;
256
828
        *n = to_fetch;
257
828
        return Status::OK();
258
828
    }
259
260
    Status read_by_rowids(const rowid_t* rowids, ordinal_t page_first_ordinal, size_t* n,
261
214
                          MutableColumnPtr& dst) override {
262
214
        DCHECK(_parsed);
263
214
        if (*n == 0 || _cur_index >= _num_elements) [[unlikely]] {
264
0
            *n = 0;
265
0
            return Status::OK();
266
0
        }
267
268
214
        auto total = *n;
269
214
        bool result = false;
270
214
        size_t read_count = 0;
271
214
        CppType value;
272
446
        for (size_t i = 0; i < total; ++i) {
273
232
            ordinal_t ord = rowids[i] - page_first_ordinal;
274
232
            if (UNLIKELY(ord >= _num_elements)) {
275
0
                *n = read_count;
276
0
                return Status::OK();
277
0
            }
278
279
232
            _rle_decoder.Skip(ord - _cur_index);
280
232
            _cur_index = ord;
281
282
232
            result = _rle_decoder.Get(&value);
283
232
            _cur_index++;
284
232
            DCHECK(result);
285
232
            dst->insert_data((char*)(&value), SIZE_OF_TYPE);
286
232
            read_count++;
287
232
        }
288
214
        *n = read_count;
289
214
        return Status::OK();
290
214
    }
291
292
0
    size_t count() const override { return _num_elements; }
293
294
126
    size_t current_index() const override { return _cur_index; }
295
296
private:
297
    typedef typename TypeTraits<Type>::CppType CppType;
298
    enum { SIZE_OF_TYPE = TypeTraits<Type>::size };
299
300
    Slice _data;
301
    PageDecoderOptions _options;
302
    bool _parsed;
303
    uint32_t _num_elements;
304
    size_t _cur_index;
305
    int _bit_width;
306
    RleDecoder<CppType> _rle_decoder;
307
};
308
309
} // namespace segment_v2
310
} // namespace doris