Coverage Report

Created: 2026-03-16 19:58

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/storage/segment/segment.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <butil/macros.h>
21
#include <gen_cpp/olap_file.pb.h>
22
#include <gen_cpp/segment_v2.pb.h>
23
#include <glog/logging.h>
24
25
#include <cstdint>
26
#include <map>
27
#include <memory> // for unique_ptr
28
#include <string>
29
#include <unordered_map>
30
31
#include "agent/be_exec_version_manager.h"
32
#include "common/status.h" // Status
33
#include "core/column/column.h"
34
#include "core/data_type/data_type.h"
35
#include "io/fs/file_reader.h"
36
#include "io/fs/file_reader_writer_fwd.h"
37
#include "io/fs/file_system.h"
38
#include "runtime/descriptors.h"
39
#include "storage/cache/page_cache.h"
40
#include "storage/field.h"
41
#include "storage/olap_common.h"
42
#include "storage/schema.h"
43
#include "storage/segment/page_handle.h"
44
#include "storage/tablet/tablet_schema.h"
45
#include "util/once.h"
46
#include "util/slice.h"
47
namespace doris {
48
class IDataType;
49
50
class ShortKeyIndexDecoder;
51
class Schema;
52
class StorageReadOptions;
53
class PrimaryKeyIndexReader;
54
class RowwiseIterator;
55
struct RowLocation;
56
57
namespace segment_v2 {
58
59
class Segment;
60
class InvertedIndexIterator;
61
class IndexFileReader;
62
class IndexIterator;
63
class ColumnReader;
64
class ColumnIterator;
65
class ColumnReaderCache;
66
class ColumnMetaAccessor;
67
68
using SegmentSharedPtr = std::shared_ptr<Segment>;
69
70
struct SparseColumnCache;
71
using SparseColumnCacheSPtr = std::shared_ptr<SparseColumnCache>;
72
73
// key is column path, value is the sparse column cache
74
// now column path is only SPARSE_COLUMN_PATH, in the future, we can add more sparse column paths
75
using PathToSparseColumnCache = std::unordered_map<std::string, SparseColumnCacheSPtr>;
76
using PathToSparseColumnCacheUPtr = std::unique_ptr<PathToSparseColumnCache>;
77
78
struct BinaryColumnCache;
79
using BinaryColumnCacheSPtr = std::shared_ptr<BinaryColumnCache>;
80
using PathToBinaryColumnCache = std::unordered_map<std::string, BinaryColumnCacheSPtr>;
81
using PathToBinaryColumnCacheUPtr = std::unique_ptr<PathToBinaryColumnCache>;
82
83
// A Segment is used to represent a segment in memory format. When segment is
84
// generated, it won't be modified, so this struct aimed to help read operation.
85
// It will prepare all ColumnReader to create ColumnIterator as needed.
86
// And user can create a RowwiseIterator through new_iterator function.
87
//
88
// NOTE: This segment is used to a specified TabletSchema, when TabletSchema
89
// is changed, this segment can not be used any more. For example, after a schema
90
// change finished, client should disable all cached Segment for old TabletSchema.
91
class Segment : public std::enable_shared_from_this<Segment>, public MetadataAdder<Segment> {
92
public:
93
    static Status open(io::FileSystemSPtr fs, const std::string& path, int64_t tablet_id,
94
                       uint32_t segment_id, RowsetId rowset_id, TabletSchemaSPtr tablet_schema,
95
                       const io::FileReaderOptions& reader_options,
96
                       std::shared_ptr<Segment>* output, InvertedIndexFileInfo idx_file_info = {},
97
                       OlapReaderStatistics* stats = nullptr);
98
99
    static io::UInt128Wrapper file_cache_key(std::string_view rowset_id, uint32_t seg_id);
100
0
    io::UInt128Wrapper file_cache_key() const {
101
0
        return file_cache_key(_rowset_id.to_string(), _segment_id);
102
0
    }
103
104
    ~Segment() override;
105
106
    int64_t get_metadata_size() const override;
107
    void update_metadata_size();
108
109
    Status new_iterator(SchemaSPtr schema, const StorageReadOptions& read_options,
110
                        std::unique_ptr<RowwiseIterator>* iter);
111
112
    static Status new_default_iterator(const TabletColumn& tablet_column,
113
                                       std::unique_ptr<ColumnIterator>* iter);
114
115
13.6k
    uint32_t id() const { return _segment_id; }
116
117
467
    RowsetId rowset_id() const { return _rowset_id; }
118
119
24.3k
    uint32_t num_rows() const { return _num_rows; }
120
121
    // if variant_sparse_column_cache is nullptr, means the sparse column cache is not used
122
    Status new_column_iterator(const TabletColumn& tablet_column,
123
                               std::unique_ptr<ColumnIterator>* iter, const StorageReadOptions* opt,
124
                               const std::unordered_map<int32_t, PathToBinaryColumnCacheUPtr>*
125
                                       variant_sparse_column_cache = nullptr);
126
127
    Status new_index_iterator(const TabletColumn& tablet_column, const TabletIndex* index_meta,
128
                              const StorageReadOptions& read_options,
129
                              std::unique_ptr<IndexIterator>* iter);
130
131
1
    const ShortKeyIndexDecoder* get_short_key_index() const {
132
1
        DCHECK(_load_index_once.has_called() && _load_index_once.stored_result().ok());
133
1
        return _sk_index_decoder.get();
134
1
    }
135
136
54
    const PrimaryKeyIndexReader* get_primary_key_index() const {
137
54
        DCHECK(_load_index_once.has_called() && _load_index_once.stored_result().ok());
138
54
        return _pk_index_reader.get();
139
54
    }
140
141
    Status lookup_row_key(const Slice& key, const TabletSchema* latest_schema, bool with_seq_col,
142
                          bool with_rowid, RowLocation* row_location, OlapReaderStatistics* stats,
143
                          std::string* encoded_seq_value = nullptr);
144
145
    Status read_key_by_rowid(uint32_t row_id, std::string* key);
146
147
    Status seek_and_read_by_rowid(const TabletSchema& schema, SlotDescriptor* slot, uint32_t row_id,
148
                                  MutableColumnPtr& result,
149
                                  StorageReadOptions& storage_read_options,
150
                                  std::unique_ptr<ColumnIterator>& iterator_hint);
151
152
    Status load_index(OlapReaderStatistics* stats);
153
154
    Status load_pk_index_and_bf(OlapReaderStatistics* stats);
155
156
0
    void update_healthy_status(Status new_status) { _healthy_status.update(new_status); }
157
    // The segment is loaded into SegmentCache and then will load indices, if there are something wrong
158
    // during loading indices, should remove it from SegmentCache. If not, it will always report error during
159
    // query. So we add a healthy status API, the caller should check the healhty status before using the segment.
160
    Status healthy_status();
161
162
0
    std::string min_key() {
163
0
        DCHECK(_tablet_schema->keys_type() == UNIQUE_KEYS && _pk_index_meta != nullptr);
164
0
        return _pk_index_meta->min_key();
165
0
    }
166
0
    std::string max_key() {
167
0
        DCHECK(_tablet_schema->keys_type() == UNIQUE_KEYS && _pk_index_meta != nullptr);
168
0
        return _pk_index_meta->max_key();
169
0
    }
170
171
140
    io::FileReaderSPtr file_reader() { return _file_reader; }
172
173
    // Including the column reader memory.
174
    // another method `get_metadata_size` not include the column reader, only the segment object itself.
175
18.6k
    int64_t meta_mem_usage() const { return _meta_mem_usage; }
176
177
    // Get the inner file column's data type.
178
    // When `read_options` is provided, the decision (e.g. flat-leaf vs hierarchical) can depend
179
    // on the reader type and tablet schema; when it is nullptr, we treat it as a query reader.
180
    // nullptr will be returned if storage type does not contain such column.
181
    std::shared_ptr<const IDataType> get_data_type_of(const TabletColumn& column,
182
                                                      const StorageReadOptions& read_options);
183
184
    // If column in segment is the same type in schema, then it is safe to apply predicate.
185
    bool can_apply_predicate_safely(
186
            int cid, const Schema& schema,
187
            const std::map<std::string, DataTypePtr>& target_cast_type_for_variants,
188
0
            const StorageReadOptions& read_options) {
189
0
        const doris::StorageField* col = schema.column(cid);
190
0
        DCHECK(col != nullptr) << "Column not found in schema for cid=" << cid;
191
0
        DataTypePtr storage_column_type = get_data_type_of(col->get_desc(), read_options);
192
0
        if (storage_column_type == nullptr || col->type() != FieldType::OLAP_FIELD_TYPE_VARIANT ||
193
0
            !target_cast_type_for_variants.contains(col->name())) {
194
            // Default column iterator or not variant column
195
0
            return true;
196
0
        }
197
0
        if (storage_column_type->equals(*target_cast_type_for_variants.at(col->name()))) {
198
0
            return true;
199
0
        } else {
200
0
            return false;
201
0
        }
202
0
    }
203
204
12
    const TabletSchemaSPtr& tablet_schema() { return _tablet_schema; }
205
206
    // get the column reader by tablet column, return NOT_FOUND if not found reader in this segment
207
    Status get_column_reader(const TabletColumn& col, std::shared_ptr<ColumnReader>* column_reader,
208
                             OlapReaderStatistics* stats);
209
210
    // get the column reader by column unique id, return NOT_FOUND if not found reader in this segment
211
    Status get_column_reader(int32_t col_uid, std::shared_ptr<ColumnReader>* column_reader,
212
                             OlapReaderStatistics* stats);
213
214
    Status traverse_column_meta_pbs(const std::function<void(const ColumnMetaPB&)>& visitor);
215
216
    static StoragePageCache::CacheKey get_segment_footer_cache_key(
217
            const io::FileReaderSPtr& file_reader);
218
219
private:
220
    DISALLOW_COPY_AND_ASSIGN(Segment);
221
    Segment(uint32_t segment_id, RowsetId rowset_id, TabletSchemaSPtr tablet_schema,
222
            InvertedIndexFileInfo idx_file_info = InvertedIndexFileInfo());
223
    static Status _open(io::FileSystemSPtr fs, const std::string& path, uint32_t segment_id,
224
                        RowsetId rowset_id, TabletSchemaSPtr tablet_schema,
225
                        const io::FileReaderOptions& reader_options,
226
                        std::shared_ptr<Segment>* output, InvertedIndexFileInfo idx_file_info,
227
                        OlapReaderStatistics* stats);
228
    // open segment file and read the minimum amount of necessary information (footer)
229
    Status _open(OlapReaderStatistics* stats);
230
    Status _parse_footer(std::shared_ptr<SegmentFooterPB>& footer,
231
                         OlapReaderStatistics* stats = nullptr);
232
    Status _create_column_meta(const SegmentFooterPB& footer);
233
    Status _load_pk_bloom_filter(OlapReaderStatistics* stats);
234
    // Must ensure _create_column_readers_once has been called before calling this function.
235
    ColumnReader* _get_column_reader(const TabletColumn& col);
236
237
    Status _write_error_file(size_t file_size, size_t offset, size_t bytes_read, char* data,
238
                             io::IOContext& io_ctx);
239
240
    Status _open_index_file_reader();
241
242
    Status _create_column_meta_once(OlapReaderStatistics* stats);
243
244
    virtual Status _get_segment_footer(std::shared_ptr<SegmentFooterPB>&,
245
                                       OlapReaderStatistics* stats);
246
247
    StoragePageCache::CacheKey get_segment_footer_cache_key() const;
248
249
    friend class SegmentIterator;
250
    friend class ColumnReaderCache;
251
    friend class MockSegment;
252
253
    io::FileSystemSPtr _fs;
254
    io::FileReaderSPtr _file_reader;
255
    uint32_t _segment_id;
256
    uint32_t _num_rows;
257
    AtomicStatus _healthy_status;
258
259
    // 1. Tracking memory use by segment meta data such as footer or index page.
260
    // 2. Tracking memory use by segment column reader
261
    // The memory consumed by querying is tracked in segment iterator.
262
    int64_t _meta_mem_usage;
263
    int64_t _tracked_meta_mem_usage = 0;
264
265
    RowsetId _rowset_id;
266
    TabletSchemaSPtr _tablet_schema;
267
268
    std::unique_ptr<PrimaryKeyIndexMetaPB> _pk_index_meta;
269
    PagePointerPB _sk_index_page;
270
271
    // Limited cache for column readers
272
    std::unique_ptr<ColumnReaderCache> _column_reader_cache;
273
274
    // Centralized accessor for column metadata layout and uid->column_ordinal mapping.
275
    std::unique_ptr<ColumnMetaAccessor> _column_meta_accessor;
276
277
    // Init from ColumnMetaPB in SegmentFooterPB
278
    // map column unique id ---> it's inner data type
279
    std::map<int32_t, std::shared_ptr<const IDataType>> _file_column_types;
280
281
    // used to guarantee that short key index will be loaded at most once in a thread-safe way
282
    DorisCallOnce<Status> _load_index_once;
283
    // used to guarantee that primary key bloom filter will be loaded at most once in a thread-safe way
284
    DorisCallOnce<Status> _load_pk_bf_once;
285
286
    DorisCallOnce<Status> _create_column_meta_once_call;
287
288
    std::weak_ptr<SegmentFooterPB> _footer_pb;
289
290
    // used to hold short key index page in memory
291
    PageHandle _sk_index_handle;
292
    // short key index decoder
293
    // all content is in memory
294
    std::unique_ptr<ShortKeyIndexDecoder> _sk_index_decoder;
295
    // primary key index reader
296
    std::unique_ptr<PrimaryKeyIndexReader> _pk_index_reader;
297
    std::mutex _open_lock;
298
    // inverted index file reader
299
    std::shared_ptr<IndexFileReader> _index_file_reader;
300
    DorisCallOnce<Status> _index_file_reader_open;
301
302
    InvertedIndexFileInfo _idx_file_info;
303
304
    int _be_exec_version = BeExecVersionManager::get_newest_version();
305
};
306
307
} // namespace segment_v2
308
} // namespace doris