Coverage Report

Created: 2026-03-14 04:23

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/storage/segment/binary_prefix_page.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <glog/logging.h>
21
#include <stddef.h>
22
#include <stdint.h>
23
24
#include <vector>
25
26
#include "common/status.h"
27
#include "core/column/column.h"
28
#include "storage/segment/options.h"
29
#include "storage/segment/page_builder.h"
30
#include "storage/segment/page_decoder.h"
31
#include "util/coding.h"
32
#include "util/faststring.h"
33
#include "util/slice.h"
34
35
namespace doris {
36
namespace segment_v2 {
37
38
// prefix encoding for string dictionary
39
//
40
// BinaryPrefixPage := Entry^EntryNum, Trailer
41
// Entry := SharedPrefixLength(vint), UnsharedLength(vint), Byte^UnsharedLength
42
// Trailer := NumEntry(uint32_t), RESTART_POINT_INTERVAL(uint8_t)
43
//            RestartPointStartOffset(uint32_t)^NumRestartPoints,NumRestartPoints(uint32_t)
44
class BinaryPrefixPageBuilder : public PageBuilderHelper<BinaryPrefixPageBuilder> {
45
public:
46
    using Self = BinaryPrefixPageBuilder;
47
    friend class PageBuilderHelper<Self>;
48
49
206
    Status init() override { return reset(); }
50
51
269k
    bool is_page_full() override { return size() >= _options.data_page_size; }
52
53
    Status add(const uint8_t* vals, size_t* add_count) override;
54
55
    Status finish(OwnedSlice* slice) override;
56
57
445
    Status reset() override {
58
445
        _restart_points_offset.clear();
59
445
        _last_entry.clear();
60
445
        _count = 0;
61
445
        _buffer.clear();
62
445
        _finished = false;
63
445
        _raw_data_size = 0;
64
445
        return Status::OK();
65
445
    }
66
67
269k
    uint64_t size() const override {
68
269k
        if (_finished) {
69
0
            return _buffer.size();
70
269k
        } else {
71
269k
            return _buffer.size() + (_restart_points_offset.size() + 2) * sizeof(uint32_t);
72
269k
        }
73
269k
    }
74
75
134k
    size_t count() const override { return _count; }
76
77
0
    uint64_t get_raw_data_size() const override { return _raw_data_size; }
78
79
0
    Status get_first_value(void* value) const override {
80
0
        DCHECK(_finished);
81
0
        if (_count == 0) {
82
0
            return Status::Error<ErrorCode::ENTRY_NOT_FOUND>("page is empty");
83
0
        }
84
0
        *reinterpret_cast<Slice*>(value) = Slice(_first_entry);
85
0
        return Status::OK();
86
0
    }
87
88
0
    Status get_last_value(void* value) const override {
89
0
        DCHECK(_finished);
90
0
        if (_count == 0) {
91
0
            return Status::Error<ErrorCode::ENTRY_NOT_FOUND>("page is empty");
92
0
        }
93
0
        *reinterpret_cast<Slice*>(value) = Slice(_last_entry);
94
0
        return Status::OK();
95
0
    }
96
97
private:
98
206
    BinaryPrefixPageBuilder(const PageBuilderOptions& options) : _options(options) {}
99
100
    PageBuilderOptions _options;
101
    std::vector<uint32_t> _restart_points_offset;
102
    faststring _first_entry;
103
    faststring _last_entry;
104
    size_t _count = 0;
105
    bool _finished = false;
106
    faststring _buffer;
107
    uint64_t _raw_data_size = 0;
108
    // This is a empirical value, Kudu and LevelDB use this default value
109
    static const uint8_t RESTART_POINT_INTERVAL = 16;
110
};
111
112
class BinaryPrefixPageDecoder : public PageDecoder {
113
public:
114
    BinaryPrefixPageDecoder(Slice data, const PageDecoderOptions& options)
115
199
            : _data(data), _parsed(false) {}
116
117
    Status init() override;
118
119
    Status seek_to_position_in_page(size_t pos) override;
120
121
    Status seek_at_or_after_value(const void* value, bool* exact_match) override;
122
123
    Status next_batch(size_t* n, MutableColumnPtr& dst) override;
124
125
0
    size_t count() const override {
126
0
        DCHECK(_parsed);
127
0
        return _num_values;
128
0
    }
129
130
5.45k
    size_t current_index() const override {
131
5.45k
        DCHECK(_parsed);
132
5.45k
        return _cur_pos;
133
5.45k
    }
134
135
private:
136
    // decode shared and non-shared entry length from `ptr`.
137
    // return ptr past the parsed value when success.
138
    // return nullptr on failure
139
    const uint8_t* _decode_value_lengths(const uint8_t* ptr, uint32_t* shared,
140
                                         uint32_t* non_shared);
141
142
    // return start pointer of the restart point at index `restart_point_index`
143
46.0k
    const uint8_t* _get_restart_point(size_t restart_point_index) const {
144
46.0k
        return reinterpret_cast<const uint8_t*>(_data.get_data()) +
145
46.0k
               decode_fixed32_le(_restarts_ptr + restart_point_index * sizeof(uint32_t));
146
46.0k
    }
147
148
    // read next value at `_cur_pos` and `_next_ptr` into `_current_value`.
149
    // return OK and advance `_next_ptr` on success. `_cur_pos` is not modified.
150
    // return EndOfFile when no more entry can be read.
151
    // return other error status otherwise.
152
    Status _read_next_value();
153
154
    // seek to the first value at the given restart point
155
    Status _seek_to_restart_point(size_t restart_point_index);
156
157
    Slice _data;
158
    bool _parsed = false;
159
    size_t _num_values = 0;
160
    uint8_t _restart_point_internal = 0;
161
    uint32_t _num_restarts = 0;
162
    // pointer to _footer start
163
    const uint8_t* _footer_start = nullptr;
164
    // pointer to restart offsets array
165
    const uint8_t* _restarts_ptr = nullptr;
166
    // ordinal of the first value to return in next_batch()
167
    uint32_t _cur_pos = 0;
168
    // first value to return in next_batch()
169
    faststring _current_value;
170
    // pointer to the start of next value to read, advanced by `_read_next_value`
171
    const uint8_t* _next_ptr = nullptr;
172
};
173
174
} // namespace segment_v2
175
} // namespace doris