Coverage Report

Created: 2026-03-16 05:10

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/storage/index/indexed_column_reader.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <gen_cpp/segment_v2.pb.h>
21
#include <glog/logging.h>
22
#include <stddef.h>
23
#include <stdint.h>
24
25
#include <string>
26
#include <utility>
27
28
#include "common/status.h"
29
#include "core/data_type/data_type.h"
30
#include "io/fs/file_reader_writer_fwd.h"
31
#include "storage/index/index_page.h"
32
#include "storage/olap_common.h"
33
#include "storage/segment/common.h"
34
#include "storage/segment/page_handle.h"
35
#include "storage/segment/page_pointer.h"
36
#include "storage/segment/parsed_page.h"
37
#include "util/slice.h"
38
39
namespace doris {
40
41
class KeyCoder;
42
class TypeInfo;
43
class BlockCompressionCodec;
44
45
namespace segment_v2 {
46
47
class EncodingInfo;
48
49
// thread-safe reader for IndexedColumn (see comments of `IndexedColumnWriter` to understand what IndexedColumn is)
50
class IndexedColumnReader : public MetadataAdder<IndexedColumnReader> {
51
public:
52
    explicit IndexedColumnReader(io::FileReaderSPtr file_reader, const IndexedColumnMetaPB& meta)
53
181
            : _file_reader(std::move(file_reader)), _meta(meta) {
54
181
        _ordinal_index_reader = std::make_unique<IndexPageReader>();
55
181
        _value_index_reader = std::make_unique<IndexPageReader>();
56
181
    }
57
58
    ~IndexedColumnReader() override;
59
60
    Status load(bool use_page_cache, bool kept_in_memory,
61
                OlapReaderStatistics* index_load_stats = nullptr);
62
63
    // read a page specified by `pp' from `file' into `handle'
64
    Status read_page(const PagePointer& pp, PageHandle* handle, Slice* body, PageFooterPB* footer,
65
                     PageTypePB type, BlockCompressionCodec* codec, bool pre_decode,
66
                     OlapReaderStatistics* stats = nullptr) const;
67
68
6.50k
    int64_t num_values() const { return _num_values; }
69
258
    const EncodingInfo* encoding_info() const { return _encoding_info; }
70
60
    const TypeInfo* type_info() const { return _type_info; }
71
354
    bool support_ordinal_seek() const { return _meta.has_ordinal_index_meta(); }
72
5.23k
    bool support_value_seek() const { return _meta.has_value_index_meta(); }
73
74
187
    CompressionTypePB get_compression() const { return _meta.compression(); }
75
0
    uint64_t get_memory_size() const { return _mem_size; }
76
60
    void set_is_pk_index(bool is_pk) { _is_pk_index = is_pk; }
77
78
private:
79
    Status load_index_page(const PagePointerPB& pp, PageHandle* handle, IndexPageReader* reader,
80
                           OlapReaderStatistics* index_load_stats);
81
82
    int64_t get_metadata_size() const override;
83
84
    friend class IndexedColumnIterator;
85
86
    io::FileReaderSPtr _file_reader;
87
    IndexedColumnMetaPB _meta;
88
89
    bool _use_page_cache;
90
    bool _kept_in_memory;
91
    int64_t _num_values = 0;
92
    // whether this column contains any index page.
93
    // could be false when the column contains only one data page.
94
    bool _has_index_page = false;
95
    // valid only when the column contains only one data page
96
    PagePointer _sole_data_page;
97
    std::unique_ptr<IndexPageReader> _ordinal_index_reader;
98
    std::unique_ptr<IndexPageReader> _value_index_reader;
99
    PageHandle _ordinal_index_page_handle;
100
    PageHandle _value_index_page_handle;
101
102
    const TypeInfo* _type_info = nullptr;
103
    const EncodingInfo* _encoding_info = nullptr;
104
    const KeyCoder* _value_key_coder = nullptr;
105
    uint64_t _mem_size = 0;
106
    bool _is_pk_index = false;
107
};
108
109
class IndexedColumnIterator {
110
public:
111
    explicit IndexedColumnIterator(const IndexedColumnReader* reader,
112
                                   OlapReaderStatistics* stats = nullptr)
113
187
            : _reader(reader),
114
187
              _ordinal_iter(reader->_ordinal_index_reader.get()),
115
187
              _value_iter(reader->_value_index_reader.get()),
116
187
              _stats(stats) {}
117
118
    // Seek to the given ordinal entry. Entry 0 is the first entry.
119
    // Return Status::Error<ENTRY_NOT_FOUND> if provided seek point is past the end.
120
    // Return NotSupported for column without ordinal index.
121
    Status seek_to_ordinal(ordinal_t idx);
122
123
    // Seek the index to the given key, or to the index entry immediately
124
    // before it. Then seek the data block to the value matching value or to
125
    // the value immediately after it.
126
    //
127
    // Sets *exact_match to indicate whether the seek found the exact
128
    // key requested.
129
    //
130
    // Return Status::Error<ENTRY_NOT_FOUND> if the given key is greater than all keys in this column.
131
    // Return NotSupported for column without value index.
132
    Status seek_at_or_after(const void* key, bool* exact_match);
133
4.52k
    Status seek_at_or_after(const std::string* key, bool* exact_match) {
134
4.52k
        Slice slice(key->data(), key->size());
135
4.52k
        return seek_at_or_after(static_cast<const void*>(&slice), exact_match);
136
4.52k
    }
137
138
    // Get the ordinal index that the iterator is currently pointed to.
139
5.32k
    ordinal_t get_current_ordinal() const {
140
        DCHECK(_seeked);
141
5.32k
        return _current_ordinal;
142
5.32k
    }
143
144
    // After one seek, we can only call this function once to read data
145
    Status next_batch(size_t* n, MutableColumnPtr& dst);
146
147
private:
148
    Status _read_data_page(const PagePointer& pp);
149
150
    const IndexedColumnReader* _reader = nullptr;
151
    // iterator for ordinal index page
152
    IndexPageIterator _ordinal_iter;
153
    // iterator for value index page
154
    IndexPageIterator _value_iter;
155
156
    bool _seeked = false;
157
    // current in-use index iterator, could be `&_ordinal_iter` or `&_value_iter` or null
158
    IndexPageIterator* _current_iter = nullptr;
159
    // seeked data page, containing value at `_current_ordinal`
160
    ParsedPage _data_page;
161
    // next_batch() will read from this position
162
    ordinal_t _current_ordinal = 0;
163
    // iterator owned compress codec, should NOT be shared by threads, initialized before used
164
    BlockCompressionCodec* _compress_codec = nullptr;
165
    OlapReaderStatistics* _stats = nullptr;
166
};
167
168
} // namespace segment_v2
169
} // namespace doris