Coverage Report

Created: 2026-06-04 13:50

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/storage/segment/binary_dict_page.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "storage/segment/binary_dict_page.h"
19
20
#include <gen_cpp/olap_file.pb.h>
21
#include <gen_cpp/segment_v2.pb.h>
22
23
#include <algorithm>
24
#include <ostream>
25
#include <utility>
26
27
#include "common/compiler_util.h" // IWYU pragma: keep
28
#include "common/config.h"
29
#include "common/logging.h"
30
#include "common/status.h"
31
#include "core/column/column.h"
32
#include "core/column/column_string.h"
33
#include "storage/segment/binary_plain_page_v2.h"
34
#include "storage/segment/binary_plain_page_v3.h"
35
#include "storage/segment/bitshuffle_page.h"
36
#include "storage/segment/encoding_info.h"
37
#include "util/coding.h"
38
#include "util/slice.h" // for Slice
39
40
namespace doris {
41
struct StringRef;
42
43
namespace segment_v2 {
44
45
BinaryDictPageBuilder::BinaryDictPageBuilder(const PageBuilderOptions& options)
46
3.85k
        : _options(options),
47
3.85k
          _finished(false),
48
3.85k
          _data_page_builder(nullptr),
49
3.85k
          _dict_builder(nullptr),
50
3.85k
          _encoding_type(DICT_ENCODING),
51
3.85k
          _binary_plain_encoding_type(options.dict_binary_plain_encoding) {}
52
53
3.85k
Status BinaryDictPageBuilder::init() {
54
    // initially use DICT_ENCODING
55
    // TODO: the data page builder type can be created by Factory according to user config
56
3.85k
    PageBuilder* data_page_builder_ptr = nullptr;
57
3.85k
    RETURN_IF_ERROR(BitshufflePageBuilder<FieldType::OLAP_FIELD_TYPE_INT>::create(
58
3.85k
            &data_page_builder_ptr, _options));
59
3.85k
    _data_page_builder.reset(data_page_builder_ptr);
60
3.85k
    PageBuilderOptions dict_builder_options;
61
    // here the binary plain page is used to store the dictionary items so
62
    // the data page size is set to the same as the dict page size
63
3.85k
    dict_builder_options.data_page_size = _options.dict_page_size;
64
3.85k
    dict_builder_options.dict_page_size = _options.dict_page_size;
65
3.85k
    dict_builder_options.is_dict_page = true;
66
67
3.85k
    const EncodingInfo* encoding_info;
68
3.85k
    RETURN_IF_ERROR(EncodingInfo::get(FieldType::OLAP_FIELD_TYPE_VARCHAR,
69
3.85k
                                      _binary_plain_encoding_type, &encoding_info));
70
3.85k
    RETURN_IF_ERROR(encoding_info->create_page_builder(dict_builder_options, _dict_builder));
71
3.85k
    return reset();
72
3.85k
}
73
74
1.16M
bool BinaryDictPageBuilder::is_page_full() {
75
1.16M
    if (_data_page_builder->is_page_full()) {
76
2.04k
        return true;
77
2.04k
    }
78
1.16M
    if (_encoding_type == DICT_ENCODING && _dict_builder->is_page_full()) {
79
3.13k
        return true;
80
3.13k
    }
81
1.15M
    return false;
82
1.16M
}
83
84
319k
Status BinaryDictPageBuilder::add(const uint8_t* vals, size_t* count) {
85
319k
    if (_encoding_type == DICT_ENCODING) {
86
16.5k
        DCHECK(!_finished);
87
16.5k
        DCHECK_GT(*count, 0);
88
16.5k
        const Slice* src = reinterpret_cast<const Slice*>(vals);
89
16.5k
        size_t num_added = 0;
90
16.5k
        uint32_t value_code = -1;
91
16.5k
        auto* actual_builder = dynamic_cast<BitshufflePageBuilder<FieldType::OLAP_FIELD_TYPE_INT>*>(
92
16.5k
                _data_page_builder.get());
93
94
859k
        for (int i = 0; i < *count; ++i, ++src) {
95
844k
            if (is_page_full()) {
96
1.56k
                break;
97
1.56k
            }
98
99
842k
            if (src->empty() && _has_empty) {
100
87.8k
                value_code = _empty_code;
101
754k
            } else if (auto iter = _dictionary.find(*src); iter != _dictionary.end()) {
102
631k
                value_code = iter->second;
103
631k
            } else {
104
123k
                Slice dict_item(src->data, src->size);
105
123k
                if (src->size > 0) {
106
123k
                    char* item_mem = _arena.alloc(src->size);
107
123k
                    if (item_mem == nullptr) {
108
0
                        return Status::MemoryAllocFailed("memory allocate failed, size:{}",
109
0
                                                         src->size);
110
0
                    }
111
123k
                    dict_item.relocate(item_mem);
112
123k
                }
113
123k
                value_code = cast_set<uint32_t>(_dictionary.size());
114
123k
                size_t add_count = 1;
115
123k
                RETURN_IF_ERROR(_dict_builder->add(reinterpret_cast<const uint8_t*>(&dict_item),
116
123k
                                                   &add_count));
117
123k
                if (add_count == 0) {
118
                    // current dict page is full, stop processing remaining inputs
119
0
                    break;
120
0
                }
121
123k
                _dictionary.emplace(dict_item, value_code);
122
123k
                if (src->empty()) {
123
319
                    _has_empty = true;
124
319
                    _empty_code = value_code;
125
319
                }
126
123k
            }
127
842k
            size_t add_count = 1;
128
842k
            RETURN_IF_ERROR(actual_builder->single_add(
129
842k
                    reinterpret_cast<const uint8_t*>(&value_code), &add_count));
130
842k
            if (add_count == 0) {
131
                // current data page is full, stop processing remaining inputs
132
0
                break;
133
0
            }
134
            // Track raw data size: the original string size
135
842k
            _raw_data_size += src->size;
136
842k
            num_added += 1;
137
842k
        }
138
16.5k
        *count = num_added;
139
16.5k
        return Status::OK();
140
303k
    } else {
141
303k
        DCHECK(_encoding_type == PLAIN_ENCODING || _encoding_type == PLAIN_ENCODING_V2 ||
142
303k
               _encoding_type == PLAIN_ENCODING_V3);
143
303k
        RETURN_IF_ERROR(_data_page_builder->add(vals, count));
144
        // For plain encoding, track raw data size from the input
145
303k
        const Slice* src = reinterpret_cast<const Slice*>(vals);
146
643k
        for (size_t i = 0; i < *count; ++i) {
147
340k
            _raw_data_size += src[i].size;
148
340k
        }
149
303k
        return Status::OK();
150
303k
    }
151
319k
}
152
153
7.39k
Status BinaryDictPageBuilder::finish(OwnedSlice* slice) {
154
7.39k
    if (VLOG_DEBUG_IS_ON && _encoding_type == DICT_ENCODING) {
155
0
        VLOG_DEBUG << "dict page size:" << _dict_builder->size();
156
0
    }
157
158
7.39k
    DCHECK(!_finished);
159
7.39k
    _finished = true;
160
161
7.39k
    OwnedSlice data_slice;
162
7.39k
    RETURN_IF_ERROR(_data_page_builder->finish(&data_slice));
163
    // TODO(gaodayue) separate page header and content to avoid this copy
164
7.39k
    RETURN_IF_CATCH_EXCEPTION(
165
7.39k
            { _buffer.append(data_slice.slice().data, data_slice.slice().size); });
166
7.39k
    encode_fixed32_le(&_buffer[0], _encoding_type);
167
7.39k
    *slice = _buffer.build();
168
7.39k
    return Status::OK();
169
7.39k
}
170
171
11.2k
Status BinaryDictPageBuilder::reset() {
172
11.2k
    RETURN_IF_CATCH_EXCEPTION({
173
11.2k
        _finished = false;
174
11.2k
        _raw_data_size = 0;
175
11.2k
        _buffer.reserve(_options.data_page_size + BINARY_DICT_PAGE_HEADER_SIZE);
176
11.2k
        _buffer.resize(BINARY_DICT_PAGE_HEADER_SIZE);
177
178
11.2k
        if (_encoding_type == DICT_ENCODING && _dict_builder->is_page_full()) {
179
11.2k
            const EncodingInfo* encoding_info;
180
11.2k
            RETURN_IF_ERROR(EncodingInfo::get(FieldType::OLAP_FIELD_TYPE_VARCHAR,
181
11.2k
                                              _binary_plain_encoding_type, &encoding_info));
182
11.2k
            RETURN_IF_ERROR(encoding_info->create_page_builder(_options, _data_page_builder));
183
11.2k
            _encoding_type = _binary_plain_encoding_type;
184
11.2k
        } else {
185
11.2k
            RETURN_IF_ERROR(_data_page_builder->reset());
186
11.2k
        }
187
11.2k
    });
188
11.2k
    return Status::OK();
189
11.2k
}
190
191
4
size_t BinaryDictPageBuilder::count() const {
192
4
    return _data_page_builder->count();
193
4
}
194
195
3.47k
uint64_t BinaryDictPageBuilder::size() const {
196
3.47k
    return _arena.used_size() + _data_page_builder->size();
197
3.47k
}
198
199
3.82k
Status BinaryDictPageBuilder::get_dictionary_page(OwnedSlice* dictionary_page) {
200
3.82k
    return _dict_builder->finish(dictionary_page);
201
3.82k
}
202
203
3.82k
Status BinaryDictPageBuilder::get_dictionary_page_encoding(EncodingTypePB* encoding) const {
204
3.82k
    *encoding = _binary_plain_encoding_type;
205
3.82k
    return Status::OK();
206
3.82k
}
207
208
7.23k
uint64_t BinaryDictPageBuilder::get_raw_data_size() const {
209
7.23k
    return _raw_data_size;
210
7.23k
}
211
212
BinaryDictPageDecoder::BinaryDictPageDecoder(Slice data, const PageDecoderOptions& options)
213
4.86k
        : _data(data),
214
4.86k
          _options(options),
215
4.86k
          _data_page_decoder(nullptr),
216
4.86k
          _parsed(false),
217
4.86k
          _encoding_type(UNKNOWN_ENCODING) {}
218
219
4.86k
Status BinaryDictPageDecoder::init() {
220
4.86k
    CHECK(!_parsed);
221
4.86k
    if (_data.size < BINARY_DICT_PAGE_HEADER_SIZE) {
222
0
        return Status::Corruption("invalid data size:{}, header size:{}", _data.size,
223
0
                                  BINARY_DICT_PAGE_HEADER_SIZE);
224
0
    }
225
4.86k
    size_t type = decode_fixed32_le((const uint8_t*)&_data.data[0]);
226
4.86k
    _encoding_type = static_cast<EncodingTypePB>(type);
227
4.86k
    _data.remove_prefix(BINARY_DICT_PAGE_HEADER_SIZE);
228
4.86k
    if (_encoding_type == DICT_ENCODING) {
229
2.79k
        _data_page_decoder.reset(
230
2.79k
                _bit_shuffle_ptr =
231
2.79k
                        new BitShufflePageDecoder<FieldType::OLAP_FIELD_TYPE_INT>(_data, _options));
232
2.79k
    } else if (_encoding_type == PLAIN_ENCODING) {
233
1.78k
        _data_page_decoder.reset(
234
1.78k
                new BinaryPlainPageDecoder<FieldType::OLAP_FIELD_TYPE_VARCHAR>(_data, _options));
235
1.78k
    } else if (_encoding_type == PLAIN_ENCODING_V2) {
236
141
        _data_page_decoder.reset(
237
141
                new BinaryPlainPageV2Decoder<FieldType::OLAP_FIELD_TYPE_VARCHAR>(_data, _options));
238
141
    } else if (_encoding_type == PLAIN_ENCODING_V3) {
239
        // The V3 pre-decoder has already rewritten the inner page into the V1 layout, so the
240
        // V3 decoder (a BinaryPlainPageDecoder subclass) reads it like V1.
241
141
        _data_page_decoder.reset(
242
141
                new BinaryPlainPageV3Decoder<FieldType::OLAP_FIELD_TYPE_VARCHAR>(_data, _options));
243
141
    } else {
244
0
        LOG(WARNING) << "invalid encoding type:" << _encoding_type;
245
0
        return Status::Corruption("invalid encoding type:{}", _encoding_type);
246
0
    }
247
248
4.86k
    RETURN_IF_ERROR(_data_page_decoder->init());
249
4.86k
    _parsed = true;
250
4.86k
    return Status::OK();
251
4.86k
}
252
253
4.86k
BinaryDictPageDecoder::~BinaryDictPageDecoder() {}
254
255
877
Status BinaryDictPageDecoder::seek_to_position_in_page(size_t pos) {
256
877
    return _data_page_decoder->seek_to_position_in_page(pos);
257
877
}
258
259
18.9k
bool BinaryDictPageDecoder::is_dict_encoding() const {
260
18.9k
    return _encoding_type == DICT_ENCODING;
261
18.9k
}
262
263
2.79k
void BinaryDictPageDecoder::set_dict_decoder(uint32_t num_dict_items, StringRef* dict_word_info) {
264
2.79k
    _num_dict_items = num_dict_items;
265
2.79k
    _dict_word_info = dict_word_info;
266
2.79k
};
267
268
8.61k
Status BinaryDictPageDecoder::next_batch(size_t* n, MutableColumnPtr& dst) {
269
8.61k
    if (!is_dict_encoding()) {
270
1.51k
        dst = dst->convert_to_predicate_column_if_dictionary();
271
1.51k
        return _data_page_decoder->next_batch(n, dst);
272
1.51k
    }
273
    // dictionary encoding
274
8.61k
    DCHECK(_parsed);
275
7.10k
    DCHECK(_dict_word_info != nullptr) << "_dict_word_info is nullptr";
276
277
7.10k
    if (*n == 0 || _bit_shuffle_ptr->_cur_index >= _bit_shuffle_ptr->_num_elements) [[unlikely]] {
278
0
        *n = 0;
279
0
        return Status::OK();
280
0
    }
281
282
7.10k
    size_t max_fetch = std::min(*n, static_cast<size_t>(_bit_shuffle_ptr->_num_elements -
283
7.10k
                                                        _bit_shuffle_ptr->_cur_index));
284
7.10k
    *n = max_fetch;
285
286
7.10k
    if (_options.only_read_offsets) {
287
        // OFFSET_ONLY mode: resolve dict codes to get real string lengths
288
        // without copying actual char data. This allows length() to work.
289
        // ColumnDictI32 does not implement insert_offsets_from_lengths, so convert
290
        // it to a predicate column (ColumnString) first. This is a no-op for
291
        // non-dictionary columns and for ColumnNullable it converts the nested column.
292
0
        dst = dst->convert_to_predicate_column_if_dictionary();
293
0
        const auto* data_array = reinterpret_cast<const int32_t*>(_bit_shuffle_ptr->get_data(0));
294
0
        size_t start_index = _bit_shuffle_ptr->_cur_index;
295
        // Reuse _buffer (int32_t vector) to store uint32_t lengths.
296
        // int32_t and uint32_t have the same size/alignment, and string
297
        // lengths are always non-negative, so the bit patterns are identical.
298
0
        _buffer.resize(max_fetch);
299
0
        for (size_t i = 0; i < max_fetch; ++i) {
300
0
            int32_t codeword = data_array[start_index + i];
301
0
            _buffer[i] = static_cast<int32_t>(_dict_word_info[codeword].size);
302
0
        }
303
0
        dst->insert_offsets_from_lengths(reinterpret_cast<const uint32_t*>(_buffer.data()),
304
0
                                         max_fetch);
305
7.10k
    } else {
306
7.10k
        const auto* data_array = reinterpret_cast<const int32_t*>(_bit_shuffle_ptr->get_data(0));
307
7.10k
        size_t start_index = _bit_shuffle_ptr->_cur_index;
308
309
7.10k
        dst->insert_many_dict_data(data_array, start_index, _dict_word_info, max_fetch,
310
7.10k
                                   _num_dict_items);
311
7.10k
    }
312
313
7.10k
    _bit_shuffle_ptr->_cur_index += max_fetch;
314
315
7.10k
    return Status::OK();
316
7.10k
}
317
318
Status BinaryDictPageDecoder::read_by_rowids(const rowid_t* rowids, ordinal_t page_first_ordinal,
319
1.09k
                                             size_t* n, MutableColumnPtr& dst) {
320
1.09k
    if (!is_dict_encoding()) {
321
554
        dst = dst->convert_to_predicate_column_if_dictionary();
322
554
        return _data_page_decoder->read_by_rowids(rowids, page_first_ordinal, n, dst);
323
554
    }
324
1.09k
    DCHECK(_parsed);
325
536
    DCHECK(_dict_word_info != nullptr) << "_dict_word_info is nullptr";
326
327
536
    if (*n == 0) [[unlikely]] {
328
0
        *n = 0;
329
0
        return Status::OK();
330
0
    }
331
332
536
    auto total = *n;
333
334
536
    if (_options.only_read_offsets) {
335
        // OFFSET_ONLY mode: resolve dict codes to get real string lengths
336
        // without copying actual char data. This allows length() to work correctly.
337
        // ColumnDictI32 does not implement insert_offsets_from_lengths, so convert
338
        // it to a predicate column (ColumnString) first.
339
0
        dst = dst->convert_to_predicate_column_if_dictionary();
340
0
        const auto* data_array = reinterpret_cast<const int32_t*>(_bit_shuffle_ptr->get_data(0));
341
0
        size_t read_count = 0;
342
0
        _buffer.resize(total);
343
0
        for (size_t i = 0; i < total; ++i) {
344
0
            ordinal_t ord = rowids[i] - page_first_ordinal;
345
0
            if (ord >= _bit_shuffle_ptr->_num_elements) [[unlikely]] {
346
0
                break;
347
0
            }
348
0
            int32_t codeword = data_array[ord];
349
0
            _buffer[read_count] = static_cast<int32_t>(_dict_word_info[codeword].size);
350
0
            read_count++;
351
0
        }
352
0
        if (read_count > 0) {
353
0
            dst->insert_offsets_from_lengths(reinterpret_cast<const uint32_t*>(_buffer.data()),
354
0
                                             read_count);
355
0
        }
356
0
        *n = read_count;
357
0
        return Status::OK();
358
0
    }
359
360
536
    const auto* data_array = reinterpret_cast<const int32_t*>(_bit_shuffle_ptr->get_data(0));
361
536
    size_t read_count = 0;
362
536
    _buffer.resize(total);
363
24.6k
    for (size_t i = 0; i < total; ++i) {
364
24.1k
        ordinal_t ord = rowids[i] - page_first_ordinal;
365
24.1k
        if (ord >= _bit_shuffle_ptr->_num_elements) [[unlikely]] {
366
0
            break;
367
0
        }
368
369
24.1k
        _buffer[read_count++] = data_array[ord];
370
24.1k
    }
371
372
536
    if (LIKELY(read_count > 0)) {
373
536
        dst->insert_many_dict_data(_buffer.data(), 0, _dict_word_info, read_count, _num_dict_items);
374
536
    }
375
536
    *n = read_count;
376
536
    return Status::OK();
377
536
}
378
379
} // namespace segment_v2
380
} // namespace doris