Coverage Report

Created: 2026-03-31 18:42

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/storage/index/indexed_column_writer.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "storage/index/indexed_column_writer.h"
19
20
#include <gen_cpp/segment_v2.pb.h>
21
22
#include <ostream>
23
#include <string>
24
25
#include "common/logging.h"
26
#include "io/fs/file_writer.h"
27
#include "storage/index/index_page.h"
28
#include "storage/key_coder.h"
29
#include "storage/olap_common.h"
30
#include "storage/segment/encoding_info.h"
31
#include "storage/segment/options.h"
32
#include "storage/segment/page_builder.h"
33
#include "storage/segment/page_io.h"
34
#include "storage/segment/page_pointer.h"
35
#include "storage/types.h"
36
#include "util/block_compression.h"
37
#include "util/slice.h"
38
39
namespace doris {
40
namespace segment_v2 {
41
#include "common/compile_check_begin.h"
42
43
IndexedColumnWriter::IndexedColumnWriter(const IndexedColumnWriterOptions& options,
44
                                         const TypeInfo* type_info, io::FileWriter* file_writer)
45
16.1k
        : _options(options),
46
16.1k
          _type_info(type_info),
47
16.1k
          _file_writer(file_writer),
48
16.1k
          _num_values(0),
49
16.1k
          _num_data_pages(0),
50
16.1k
          _disk_size(0),
51
16.1k
          _value_key_coder(nullptr),
52
16.1k
          _compress_codec(nullptr) {}
53
54
16.1k
IndexedColumnWriter::~IndexedColumnWriter() = default;
55
56
16.1k
Status IndexedColumnWriter::init() {
57
16.1k
    const EncodingInfo* encoding_info;
58
16.1k
    RETURN_IF_ERROR(EncodingInfo::get(_type_info->type(), _options.encoding, {}, &encoding_info));
59
16.1k
    _options.encoding = encoding_info->encoding();
60
    // should store more concrete encoding type instead of DEFAULT_ENCODING
61
    // because the default encoding of a data type can be changed in the future
62
16.1k
    DCHECK_NE(_options.encoding, DEFAULT_ENCODING);
63
64
16.1k
    PageBuilder* data_page_builder = nullptr;
65
16.1k
    PageBuilderOptions builder_option;
66
16.1k
    builder_option.need_check_bitmap = false;
67
16.1k
    builder_option.data_page_size = _options.data_page_size;
68
16.1k
    RETURN_IF_ERROR(encoding_info->create_page_builder(builder_option, &data_page_builder));
69
16.1k
    _data_page_builder.reset(data_page_builder);
70
71
16.1k
    if (_options.write_ordinal_index) {
72
16.1k
        _ordinal_index_builder.reset(new IndexPageBuilder(_options.index_page_size, true));
73
16.1k
    }
74
16.1k
    if (_options.write_value_index) {
75
210
        _value_index_builder.reset(new IndexPageBuilder(_options.index_page_size, true));
76
210
        _value_key_coder = get_key_coder(_type_info->type());
77
210
    }
78
79
16.1k
    if (_options.compression != NO_COMPRESSION) {
80
210
        RETURN_IF_ERROR(get_block_compression_codec(_options.compression, &_compress_codec));
81
210
    }
82
16.1k
    return Status::OK();
83
16.1k
}
84
85
154k
Status IndexedColumnWriter::add(const void* value) {
86
154k
    if (_options.write_value_index && _data_page_builder->count() == 0) {
87
        // remember page's first value encoded key because it's used to build value index
88
243
        _first_value_string.clear();
89
243
        _value_key_coder->full_encode_ascending(value, &_first_value_string);
90
243
    }
91
154k
    size_t num_to_write = 1;
92
154k
    RETURN_IF_ERROR(
93
154k
            _data_page_builder->add(reinterpret_cast<const uint8_t*>(value), &num_to_write));
94
154k
    CHECK(num_to_write == 1 || num_to_write == 0);
95
154k
    if (num_to_write == 0) {
96
0
        CHECK(_data_page_builder->is_page_full());
97
        // current page is already full, we need to first flush the current page,
98
        // and then add the value to the new page
99
0
        size_t num_val;
100
0
        RETURN_IF_ERROR(_finish_current_data_page(num_val));
101
0
        return add(value);
102
0
    }
103
154k
    _num_values++;
104
154k
    size_t num_val;
105
154k
    if (_data_page_builder->is_page_full()) {
106
33
        RETURN_IF_ERROR(_finish_current_data_page(num_val));
107
33
    }
108
154k
    return Status::OK();
109
154k
}
110
111
16.1k
Status IndexedColumnWriter::_finish_current_data_page(size_t& num_val) {
112
16.1k
    auto num_values_in_page = _data_page_builder->count();
113
16.1k
    num_val = num_values_in_page;
114
16.1k
    if (num_values_in_page == 0) {
115
10
        return Status::OK();
116
10
    }
117
16.1k
    ordinal_t first_ordinal = _num_values - num_values_in_page;
118
119
    // IndexedColumn doesn't have NULLs, thus data page body only contains encoded values
120
16.1k
    OwnedSlice page_body;
121
16.1k
    RETURN_IF_ERROR(_data_page_builder->finish(&page_body));
122
16.1k
    RETURN_IF_ERROR(_data_page_builder->reset());
123
124
16.1k
    PageFooterPB footer;
125
16.1k
    footer.set_type(DATA_PAGE);
126
16.1k
    footer.set_uncompressed_size(static_cast<uint32_t>(page_body.slice().get_size()));
127
16.1k
    footer.mutable_data_page_footer()->set_first_ordinal(first_ordinal);
128
16.1k
    footer.mutable_data_page_footer()->set_num_values(num_values_in_page);
129
16.1k
    footer.mutable_data_page_footer()->set_nullmap_size(0);
130
131
16.1k
    uint64_t start_size = _file_writer->bytes_appended();
132
16.1k
    RETURN_IF_ERROR(PageIO::compress_and_write_page(
133
16.1k
            _compress_codec, _options.compression_min_space_saving, _file_writer,
134
16.1k
            {page_body.slice()}, footer, &_last_data_page));
135
16.1k
    _num_data_pages++;
136
16.1k
    _disk_size += (_file_writer->bytes_appended() - start_size);
137
138
16.1k
    if (_options.write_ordinal_index) {
139
16.1k
        std::string key;
140
16.1k
        KeyCoderTraits<FieldType::OLAP_FIELD_TYPE_UNSIGNED_BIGINT>::full_encode_ascending(
141
16.1k
                &first_ordinal, &key);
142
16.1k
        _ordinal_index_builder->add(key, _last_data_page);
143
16.1k
    }
144
145
16.1k
    if (_options.write_value_index) {
146
        // TODO short separate key optimize
147
243
        _value_index_builder->add(_first_value_string, _last_data_page);
148
        // TODO record last key in short separate key optimize
149
243
    }
150
16.1k
    return Status::OK();
151
16.1k
}
152
153
16.1k
Status IndexedColumnWriter::finish(IndexedColumnMetaPB* meta) {
154
16.1k
    size_t num_val_in_page;
155
16.1k
    RETURN_IF_ERROR(_finish_current_data_page(num_val_in_page));
156
16.1k
    if (_options.write_ordinal_index) {
157
16.1k
        RETURN_IF_ERROR(
158
16.1k
                _flush_index(_ordinal_index_builder.get(), meta->mutable_ordinal_index_meta()));
159
16.1k
    }
160
16.1k
    if (_options.write_value_index) {
161
210
        RETURN_IF_ERROR(_flush_index(_value_index_builder.get(), meta->mutable_value_index_meta()));
162
210
    }
163
16.1k
    meta->set_data_type(int(_type_info->type()));
164
16.1k
    meta->set_encoding(_options.encoding);
165
16.1k
    meta->set_num_values(_num_values);
166
16.1k
    meta->set_compression(_options.compression);
167
    // `_finish_current_data_page` will be called in `add` function when page is full,
168
    // so num_val_in_page will be zero in this case.
169
16.1k
    if (_num_data_pages <= 1 && num_val_in_page != 0) {
170
16.0k
        DCHECK(num_val_in_page == _num_values)
171
0
                << "num_val_in_page: " << num_val_in_page << ", _num_values: " << _num_values;
172
16.0k
    }
173
16.1k
    return Status::OK();
174
16.1k
}
175
176
16.3k
Status IndexedColumnWriter::_flush_index(IndexPageBuilder* index_builder, BTreeMetaPB* meta) {
177
16.3k
    if (_num_data_pages <= 1) {
178
16.2k
        meta->set_is_root_data_page(true);
179
16.2k
        _last_data_page.to_proto(meta->mutable_root_page());
180
16.2k
    } else {
181
42
        OwnedSlice page_body;
182
42
        PageFooterPB page_footer;
183
42
        index_builder->finish(&page_body, &page_footer);
184
185
42
        PagePointer pp;
186
42
        uint64_t start_size = _file_writer->bytes_appended();
187
42
        RETURN_IF_ERROR(PageIO::compress_and_write_page(
188
42
                _compress_codec, _options.compression_min_space_saving, _file_writer,
189
42
                {page_body.slice()}, page_footer, &pp));
190
42
        _disk_size += (_file_writer->bytes_appended() - start_size);
191
192
42
        meta->set_is_root_data_page(false);
193
42
        pp.to_proto(meta->mutable_root_page());
194
42
    }
195
16.3k
    return Status::OK();
196
16.3k
}
197
198
#include "common/compile_check_end.h"
199
} // namespace segment_v2
200
} // namespace doris