Coverage Report

Created: 2026-05-28 14:31

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/storage/index/indexed_column_writer.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "storage/index/indexed_column_writer.h"
19
20
#include <gen_cpp/segment_v2.pb.h>
21
22
#include <ostream>
23
#include <string>
24
25
#include "common/logging.h"
26
#include "io/fs/file_writer.h"
27
#include "storage/index/index_page.h"
28
#include "storage/key_coder.h"
29
#include "storage/olap_common.h"
30
#include "storage/segment/encoding_info.h"
31
#include "storage/segment/options.h"
32
#include "storage/segment/page_builder.h"
33
#include "storage/segment/page_io.h"
34
#include "storage/segment/page_pointer.h"
35
#include "storage/types.h"
36
#include "util/block_compression.h"
37
#include "util/slice.h"
38
39
namespace doris {
40
namespace segment_v2 {
41
42
IndexedColumnWriter::IndexedColumnWriter(const IndexedColumnWriterOptions& options, FieldType type,
43
                                         io::FileWriter* file_writer)
44
10.7k
        : _options(options),
45
10.7k
          _type(type),
46
10.7k
          _file_writer(file_writer),
47
10.7k
          _num_values(0),
48
10.7k
          _num_data_pages(0),
49
10.7k
          _disk_size(0),
50
10.7k
          _value_key_coder(nullptr),
51
10.7k
          _compress_codec(nullptr) {}
52
53
10.7k
IndexedColumnWriter::~IndexedColumnWriter() = default;
54
55
10.7k
Status IndexedColumnWriter::init() {
56
    // Caller must set _options.encoding to a concrete value before calling init.
57
10.7k
    if (_options.encoding == DEFAULT_ENCODING) {
58
0
        return Status::InternalError(
59
0
                "IndexedColumnWriterOptions::encoding is DEFAULT_ENCODING for type={}; caller must "
60
0
                "resolve to a concrete encoding before IndexedColumnWriter::init",
61
0
                _type);
62
0
    }
63
10.7k
    const EncodingInfo* encoding_info;
64
10.7k
    RETURN_IF_ERROR(EncodingInfo::get(_type, _options.encoding, &encoding_info));
65
66
10.7k
    PageBuilder* data_page_builder = nullptr;
67
10.7k
    PageBuilderOptions builder_option;
68
10.7k
    builder_option.need_check_bitmap = false;
69
10.7k
    builder_option.data_page_size = _options.data_page_size;
70
10.7k
    RETURN_IF_ERROR(encoding_info->create_page_builder(builder_option, &data_page_builder));
71
10.7k
    _data_page_builder.reset(data_page_builder);
72
73
10.7k
    if (_options.write_ordinal_index) {
74
10.7k
        _ordinal_index_builder.reset(new IndexPageBuilder(_options.index_page_size, true));
75
10.7k
    }
76
10.7k
    if (_options.write_value_index) {
77
212
        _value_index_builder.reset(new IndexPageBuilder(_options.index_page_size, true));
78
212
        _value_key_coder = get_key_coder(_type);
79
212
    }
80
81
10.7k
    if (_options.compression != NO_COMPRESSION) {
82
212
        RETURN_IF_ERROR(get_block_compression_codec(_options.compression, &_compress_codec));
83
212
    }
84
10.7k
    return Status::OK();
85
10.7k
}
86
87
149k
Status IndexedColumnWriter::add(const void* value) {
88
149k
    if (_options.write_value_index && _data_page_builder->count() == 0) {
89
        // remember page's first value encoded key because it's used to build value index
90
245
        _first_value_string.clear();
91
245
        _value_key_coder->full_encode_ascending(value, &_first_value_string);
92
245
    }
93
149k
    size_t num_to_write = 1;
94
149k
    RETURN_IF_ERROR(
95
149k
            _data_page_builder->add(reinterpret_cast<const uint8_t*>(value), &num_to_write));
96
149k
    CHECK(num_to_write == 1 || num_to_write == 0);
97
149k
    if (num_to_write == 0) {
98
0
        CHECK(_data_page_builder->is_page_full());
99
        // current page is already full, we need to first flush the current page,
100
        // and then add the value to the new page
101
0
        size_t num_val;
102
0
        RETURN_IF_ERROR(_finish_current_data_page(num_val));
103
0
        return add(value);
104
0
    }
105
149k
    _num_values++;
106
149k
    size_t num_val;
107
149k
    if (_data_page_builder->is_page_full()) {
108
33
        RETURN_IF_ERROR(_finish_current_data_page(num_val));
109
33
    }
110
149k
    return Status::OK();
111
149k
}
112
113
10.7k
Status IndexedColumnWriter::_finish_current_data_page(size_t& num_val) {
114
10.7k
    auto num_values_in_page = _data_page_builder->count();
115
10.7k
    num_val = num_values_in_page;
116
10.7k
    if (num_values_in_page == 0) {
117
10
        return Status::OK();
118
10
    }
119
10.7k
    ordinal_t first_ordinal = _num_values - num_values_in_page;
120
121
    // IndexedColumn doesn't have NULLs, thus data page body only contains encoded values
122
10.7k
    OwnedSlice page_body;
123
10.7k
    RETURN_IF_ERROR(_data_page_builder->finish(&page_body));
124
10.7k
    RETURN_IF_ERROR(_data_page_builder->reset());
125
126
10.7k
    PageFooterPB footer;
127
10.7k
    footer.set_type(DATA_PAGE);
128
10.7k
    footer.set_uncompressed_size(static_cast<uint32_t>(page_body.slice().get_size()));
129
10.7k
    footer.mutable_data_page_footer()->set_first_ordinal(first_ordinal);
130
10.7k
    footer.mutable_data_page_footer()->set_num_values(num_values_in_page);
131
10.7k
    footer.mutable_data_page_footer()->set_nullmap_size(0);
132
133
10.7k
    uint64_t start_size = _file_writer->bytes_appended();
134
10.7k
    RETURN_IF_ERROR(PageIO::compress_and_write_page(
135
10.7k
            _compress_codec, _options.compression_min_space_saving, _file_writer,
136
10.7k
            {page_body.slice()}, footer, &_last_data_page));
137
10.7k
    _num_data_pages++;
138
10.7k
    _disk_size += (_file_writer->bytes_appended() - start_size);
139
140
10.7k
    if (_options.write_ordinal_index) {
141
10.7k
        std::string key;
142
10.7k
        KeyCoderTraits<FieldType::OLAP_FIELD_TYPE_UNSIGNED_BIGINT>::full_encode_ascending(
143
10.7k
                &first_ordinal, &key);
144
10.7k
        _ordinal_index_builder->add(key, _last_data_page);
145
10.7k
    }
146
147
10.7k
    if (_options.write_value_index) {
148
        // TODO short separate key optimize
149
245
        _value_index_builder->add(_first_value_string, _last_data_page);
150
        // TODO record last key in short separate key optimize
151
245
    }
152
10.7k
    return Status::OK();
153
10.7k
}
154
155
10.7k
Status IndexedColumnWriter::finish(IndexedColumnMetaPB* meta) {
156
10.7k
    size_t num_val_in_page;
157
10.7k
    RETURN_IF_ERROR(_finish_current_data_page(num_val_in_page));
158
10.7k
    if (_options.write_ordinal_index) {
159
10.7k
        RETURN_IF_ERROR(
160
10.7k
                _flush_index(_ordinal_index_builder.get(), meta->mutable_ordinal_index_meta()));
161
10.7k
    }
162
10.7k
    if (_options.write_value_index) {
163
212
        RETURN_IF_ERROR(_flush_index(_value_index_builder.get(), meta->mutable_value_index_meta()));
164
212
    }
165
10.7k
    meta->set_data_type(int(_type));
166
10.7k
    meta->set_encoding(_options.encoding);
167
10.7k
    meta->set_num_values(_num_values);
168
10.7k
    meta->set_compression(_options.compression);
169
    // `_finish_current_data_page` will be called in `add` function when page is full,
170
    // so num_val_in_page will be zero in this case.
171
10.7k
    if (_num_data_pages <= 1 && num_val_in_page != 0) {
172
10.7k
        DCHECK(num_val_in_page == _num_values)
173
0
                << "num_val_in_page: " << num_val_in_page << ", _num_values: " << _num_values;
174
10.7k
    }
175
10.7k
    return Status::OK();
176
10.7k
}
177
178
10.9k
Status IndexedColumnWriter::_flush_index(IndexPageBuilder* index_builder, BTreeMetaPB* meta) {
179
10.9k
    if (_num_data_pages <= 1) {
180
10.9k
        meta->set_is_root_data_page(true);
181
10.9k
        _last_data_page.to_proto(meta->mutable_root_page());
182
10.9k
    } else {
183
42
        OwnedSlice page_body;
184
42
        PageFooterPB page_footer;
185
42
        index_builder->finish(&page_body, &page_footer);
186
187
42
        PagePointer pp;
188
42
        uint64_t start_size = _file_writer->bytes_appended();
189
42
        RETURN_IF_ERROR(PageIO::compress_and_write_page(
190
42
                _compress_codec, _options.compression_min_space_saving, _file_writer,
191
42
                {page_body.slice()}, page_footer, &pp));
192
42
        _disk_size += (_file_writer->bytes_appended() - start_size);
193
194
42
        meta->set_is_root_data_page(false);
195
42
        pp.to_proto(meta->mutable_root_page());
196
42
    }
197
10.9k
    return Status::OK();
198
10.9k
}
199
200
} // namespace segment_v2
201
} // namespace doris