Coverage Report

Created: 2026-04-14 17:06

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/storage/index/indexed_column_writer.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "storage/index/indexed_column_writer.h"
19
20
#include <gen_cpp/segment_v2.pb.h>
21
22
#include <ostream>
23
#include <string>
24
25
#include "common/logging.h"
26
#include "io/fs/file_writer.h"
27
#include "storage/index/index_page.h"
28
#include "storage/key_coder.h"
29
#include "storage/olap_common.h"
30
#include "storage/segment/encoding_info.h"
31
#include "storage/segment/options.h"
32
#include "storage/segment/page_builder.h"
33
#include "storage/segment/page_io.h"
34
#include "storage/segment/page_pointer.h"
35
#include "storage/types.h"
36
#include "util/block_compression.h"
37
#include "util/slice.h"
38
39
namespace doris {
40
namespace segment_v2 {
41
42
IndexedColumnWriter::IndexedColumnWriter(const IndexedColumnWriterOptions& options,
43
                                         const TypeInfo* type_info, io::FileWriter* file_writer)
44
16.1k
        : _options(options),
45
16.1k
          _type_info(type_info),
46
16.1k
          _file_writer(file_writer),
47
16.1k
          _num_values(0),
48
16.1k
          _num_data_pages(0),
49
16.1k
          _disk_size(0),
50
16.1k
          _value_key_coder(nullptr),
51
16.1k
          _compress_codec(nullptr) {}
52
53
16.1k
IndexedColumnWriter::~IndexedColumnWriter() = default;
54
55
16.1k
Status IndexedColumnWriter::init() {
56
16.1k
    const EncodingInfo* encoding_info;
57
16.1k
    RETURN_IF_ERROR(EncodingInfo::get(_type_info->type(), _options.encoding, {}, &encoding_info));
58
16.1k
    _options.encoding = encoding_info->encoding();
59
    // should store more concrete encoding type instead of DEFAULT_ENCODING
60
    // because the default encoding of a data type can be changed in the future
61
16.1k
    DCHECK_NE(_options.encoding, DEFAULT_ENCODING);
62
63
16.1k
    PageBuilder* data_page_builder = nullptr;
64
16.1k
    PageBuilderOptions builder_option;
65
16.1k
    builder_option.need_check_bitmap = false;
66
16.1k
    builder_option.data_page_size = _options.data_page_size;
67
16.1k
    RETURN_IF_ERROR(encoding_info->create_page_builder(builder_option, &data_page_builder));
68
16.1k
    _data_page_builder.reset(data_page_builder);
69
70
16.1k
    if (_options.write_ordinal_index) {
71
16.1k
        _ordinal_index_builder.reset(new IndexPageBuilder(_options.index_page_size, true));
72
16.1k
    }
73
16.1k
    if (_options.write_value_index) {
74
210
        _value_index_builder.reset(new IndexPageBuilder(_options.index_page_size, true));
75
210
        _value_key_coder = get_key_coder(_type_info->type());
76
210
    }
77
78
16.1k
    if (_options.compression != NO_COMPRESSION) {
79
210
        RETURN_IF_ERROR(get_block_compression_codec(_options.compression, &_compress_codec));
80
210
    }
81
16.1k
    return Status::OK();
82
16.1k
}
83
84
154k
Status IndexedColumnWriter::add(const void* value) {
85
154k
    if (_options.write_value_index && _data_page_builder->count() == 0) {
86
        // remember page's first value encoded key because it's used to build value index
87
243
        _first_value_string.clear();
88
243
        _value_key_coder->full_encode_ascending(value, &_first_value_string);
89
243
    }
90
154k
    size_t num_to_write = 1;
91
154k
    RETURN_IF_ERROR(
92
154k
            _data_page_builder->add(reinterpret_cast<const uint8_t*>(value), &num_to_write));
93
154k
    CHECK(num_to_write == 1 || num_to_write == 0);
94
154k
    if (num_to_write == 0) {
95
0
        CHECK(_data_page_builder->is_page_full());
96
        // current page is already full, we need to first flush the current page,
97
        // and then add the value to the new page
98
0
        size_t num_val;
99
0
        RETURN_IF_ERROR(_finish_current_data_page(num_val));
100
0
        return add(value);
101
0
    }
102
154k
    _num_values++;
103
154k
    size_t num_val;
104
154k
    if (_data_page_builder->is_page_full()) {
105
33
        RETURN_IF_ERROR(_finish_current_data_page(num_val));
106
33
    }
107
154k
    return Status::OK();
108
154k
}
109
110
16.1k
Status IndexedColumnWriter::_finish_current_data_page(size_t& num_val) {
111
16.1k
    auto num_values_in_page = _data_page_builder->count();
112
16.1k
    num_val = num_values_in_page;
113
16.1k
    if (num_values_in_page == 0) {
114
10
        return Status::OK();
115
10
    }
116
16.1k
    ordinal_t first_ordinal = _num_values - num_values_in_page;
117
118
    // IndexedColumn doesn't have NULLs, thus data page body only contains encoded values
119
16.1k
    OwnedSlice page_body;
120
16.1k
    RETURN_IF_ERROR(_data_page_builder->finish(&page_body));
121
16.1k
    RETURN_IF_ERROR(_data_page_builder->reset());
122
123
16.1k
    PageFooterPB footer;
124
16.1k
    footer.set_type(DATA_PAGE);
125
16.1k
    footer.set_uncompressed_size(static_cast<uint32_t>(page_body.slice().get_size()));
126
16.1k
    footer.mutable_data_page_footer()->set_first_ordinal(first_ordinal);
127
16.1k
    footer.mutable_data_page_footer()->set_num_values(num_values_in_page);
128
16.1k
    footer.mutable_data_page_footer()->set_nullmap_size(0);
129
130
16.1k
    uint64_t start_size = _file_writer->bytes_appended();
131
16.1k
    RETURN_IF_ERROR(PageIO::compress_and_write_page(
132
16.1k
            _compress_codec, _options.compression_min_space_saving, _file_writer,
133
16.1k
            {page_body.slice()}, footer, &_last_data_page));
134
16.1k
    _num_data_pages++;
135
16.1k
    _disk_size += (_file_writer->bytes_appended() - start_size);
136
137
16.1k
    if (_options.write_ordinal_index) {
138
16.1k
        std::string key;
139
16.1k
        KeyCoderTraits<FieldType::OLAP_FIELD_TYPE_UNSIGNED_BIGINT>::full_encode_ascending(
140
16.1k
                &first_ordinal, &key);
141
16.1k
        _ordinal_index_builder->add(key, _last_data_page);
142
16.1k
    }
143
144
16.1k
    if (_options.write_value_index) {
145
        // TODO short separate key optimize
146
243
        _value_index_builder->add(_first_value_string, _last_data_page);
147
        // TODO record last key in short separate key optimize
148
243
    }
149
16.1k
    return Status::OK();
150
16.1k
}
151
152
16.1k
Status IndexedColumnWriter::finish(IndexedColumnMetaPB* meta) {
153
16.1k
    size_t num_val_in_page;
154
16.1k
    RETURN_IF_ERROR(_finish_current_data_page(num_val_in_page));
155
16.1k
    if (_options.write_ordinal_index) {
156
16.1k
        RETURN_IF_ERROR(
157
16.1k
                _flush_index(_ordinal_index_builder.get(), meta->mutable_ordinal_index_meta()));
158
16.1k
    }
159
16.1k
    if (_options.write_value_index) {
160
210
        RETURN_IF_ERROR(_flush_index(_value_index_builder.get(), meta->mutable_value_index_meta()));
161
210
    }
162
16.1k
    meta->set_data_type(int(_type_info->type()));
163
16.1k
    meta->set_encoding(_options.encoding);
164
16.1k
    meta->set_num_values(_num_values);
165
16.1k
    meta->set_compression(_options.compression);
166
    // `_finish_current_data_page` will be called in `add` function when page is full,
167
    // so num_val_in_page will be zero in this case.
168
16.1k
    if (_num_data_pages <= 1 && num_val_in_page != 0) {
169
16.0k
        DCHECK(num_val_in_page == _num_values)
170
0
                << "num_val_in_page: " << num_val_in_page << ", _num_values: " << _num_values;
171
16.0k
    }
172
16.1k
    return Status::OK();
173
16.1k
}
174
175
16.3k
Status IndexedColumnWriter::_flush_index(IndexPageBuilder* index_builder, BTreeMetaPB* meta) {
176
16.3k
    if (_num_data_pages <= 1) {
177
16.2k
        meta->set_is_root_data_page(true);
178
16.2k
        _last_data_page.to_proto(meta->mutable_root_page());
179
16.2k
    } else {
180
42
        OwnedSlice page_body;
181
42
        PageFooterPB page_footer;
182
42
        index_builder->finish(&page_body, &page_footer);
183
184
42
        PagePointer pp;
185
42
        uint64_t start_size = _file_writer->bytes_appended();
186
42
        RETURN_IF_ERROR(PageIO::compress_and_write_page(
187
42
                _compress_codec, _options.compression_min_space_saving, _file_writer,
188
42
                {page_body.slice()}, page_footer, &pp));
189
42
        _disk_size += (_file_writer->bytes_appended() - start_size);
190
191
42
        meta->set_is_root_data_page(false);
192
42
        pp.to_proto(meta->mutable_root_page());
193
42
    }
194
16.3k
    return Status::OK();
195
16.3k
}
196
197
} // namespace segment_v2
198
} // namespace doris