Coverage Report

Created: 2026-04-22 18:57

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/storage/segment/binary_prefix_page.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <glog/logging.h>
21
#include <stddef.h>
22
#include <stdint.h>
23
24
#include <vector>
25
26
#include "common/status.h"
27
#include "core/column/column.h"
28
#include "storage/segment/options.h"
29
#include "storage/segment/page_builder.h"
30
#include "storage/segment/page_decoder.h"
31
#include "util/coding.h"
32
#include "util/faststring.h"
33
#include "util/slice.h"
34
35
namespace doris {
36
namespace segment_v2 {
37
38
// prefix encoding for string dictionary
39
//
40
// BinaryPrefixPage := Entry^EntryNum, Trailer
41
// Entry := SharedPrefixLength(vint), UnsharedLength(vint), Byte^UnsharedLength
42
// Trailer := NumEntry(uint32_t), RESTART_POINT_INTERVAL(uint8_t)
43
//            RestartPointStartOffset(uint32_t)^NumRestartPoints,NumRestartPoints(uint32_t)
44
class BinaryPrefixPageBuilder : public PageBuilderHelper<BinaryPrefixPageBuilder> {
45
public:
46
    using Self = BinaryPrefixPageBuilder;
47
    friend class PageBuilderHelper<Self>;
48
49
210
    Status init() override { return reset(); }
50
51
269k
    bool is_page_full() override { return size() >= _options.data_page_size; }
52
53
    Status add(const uint8_t* vals, size_t* add_count) override;
54
55
    Status finish(OwnedSlice* slice) override;
56
57
453
    Status reset() override {
58
453
        _restart_points_offset.clear();
59
453
        _last_entry.clear();
60
453
        _count = 0;
61
453
        _buffer.clear();
62
453
        _finished = false;
63
453
        _raw_data_size = 0;
64
453
        return Status::OK();
65
453
    }
66
67
269k
    uint64_t size() const override {
68
269k
        if (_finished) {
69
0
            return _buffer.size();
70
269k
        } else {
71
269k
            return _buffer.size() + (_restart_points_offset.size() + 2) * sizeof(uint32_t);
72
269k
        }
73
269k
    }
74
75
134k
    size_t count() const override { return _count; }
76
77
0
    uint64_t get_raw_data_size() const override { return _raw_data_size; }
78
79
private:
80
210
    BinaryPrefixPageBuilder(const PageBuilderOptions& options) : _options(options) {}
81
82
    PageBuilderOptions _options;
83
    std::vector<uint32_t> _restart_points_offset;
84
    // Holds the most recently added entry; used by add() to compute the
85
    // shared-prefix length against the next entry.
86
    faststring _last_entry;
87
    size_t _count = 0;
88
    bool _finished = false;
89
    faststring _buffer;
90
    uint64_t _raw_data_size = 0;
91
    // This is a empirical value, Kudu and LevelDB use this default value
92
    static const uint8_t RESTART_POINT_INTERVAL = 16;
93
};
94
95
class BinaryPrefixPageDecoder : public PageDecoder {
96
public:
97
    BinaryPrefixPageDecoder(Slice data, const PageDecoderOptions& options)
98
203
            : _data(data), _parsed(false) {}
99
100
    Status init() override;
101
102
    Status seek_to_position_in_page(size_t pos) override;
103
104
    Status seek_at_or_after_value(const void* value, bool* exact_match) override;
105
106
    Status next_batch(size_t* n, MutableColumnPtr& dst) override;
107
108
0
    size_t count() const override {
109
0
        DCHECK(_parsed);
110
0
        return _num_values;
111
0
    }
112
113
5.45k
    size_t current_index() const override {
114
5.45k
        DCHECK(_parsed);
115
5.45k
        return _cur_pos;
116
5.45k
    }
117
118
private:
119
    // decode shared and non-shared entry length from `ptr`.
120
    // return ptr past the parsed value when success.
121
    // return nullptr on failure
122
    const uint8_t* _decode_value_lengths(const uint8_t* ptr, uint32_t* shared,
123
                                         uint32_t* non_shared);
124
125
    // return start pointer of the restart point at index `restart_point_index`
126
46.0k
    const uint8_t* _get_restart_point(size_t restart_point_index) const {
127
46.0k
        return reinterpret_cast<const uint8_t*>(_data.get_data()) +
128
46.0k
               decode_fixed32_le(_restarts_ptr + restart_point_index * sizeof(uint32_t));
129
46.0k
    }
130
131
    // read next value at `_cur_pos` and `_next_ptr` into `_current_value`.
132
    // return OK and advance `_next_ptr` on success. `_cur_pos` is not modified.
133
    // return EndOfFile when no more entry can be read.
134
    // return other error status otherwise.
135
    Status _read_next_value();
136
137
    // seek to the first value at the given restart point
138
    Status _seek_to_restart_point(size_t restart_point_index);
139
140
    Slice _data;
141
    bool _parsed = false;
142
    size_t _num_values = 0;
143
    uint8_t _restart_point_internal = 0;
144
    uint32_t _num_restarts = 0;
145
    // pointer to _footer start
146
    const uint8_t* _footer_start = nullptr;
147
    // pointer to restart offsets array
148
    const uint8_t* _restarts_ptr = nullptr;
149
    // ordinal of the first value to return in next_batch()
150
    uint32_t _cur_pos = 0;
151
    // first value to return in next_batch()
152
    faststring _current_value;
153
    // pointer to the start of next value to read, advanced by `_read_next_value`
154
    const uint8_t* _next_ptr = nullptr;
155
};
156
157
} // namespace segment_v2
158
} // namespace doris