Coverage Report

Created: 2026-05-13 20:40

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/storage/index/indexed_column_reader.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <gen_cpp/segment_v2.pb.h>
21
#include <glog/logging.h>
22
#include <stddef.h>
23
#include <stdint.h>
24
25
#include <string>
26
#include <utility>
27
28
#include "common/status.h"
29
#include "core/data_type/data_type.h"
30
#include "io/fs/file_reader_writer_fwd.h"
31
#include "storage/index/index_page.h"
32
#include "storage/olap_common.h"
33
#include "storage/segment/common.h"
34
#include "storage/segment/page_handle.h"
35
#include "storage/segment/page_pointer.h"
36
#include "storage/segment/parsed_page.h"
37
#include "util/slice.h"
38
39
namespace doris {
40
41
class KeyCoder;
42
class BlockCompressionCodec;
43
44
namespace segment_v2 {
45
46
class EncodingInfo;
47
48
// thread-safe reader for IndexedColumn (see comments of `IndexedColumnWriter` to understand what IndexedColumn is)
49
class IndexedColumnReader : public MetadataAdder<IndexedColumnReader> {
50
public:
51
    explicit IndexedColumnReader(io::FileReaderSPtr file_reader, const IndexedColumnMetaPB& meta)
52
185
            : _file_reader(std::move(file_reader)), _meta(meta) {
53
185
        _ordinal_index_reader = std::make_unique<IndexPageReader>();
54
185
        _value_index_reader = std::make_unique<IndexPageReader>();
55
185
    }
56
57
    ~IndexedColumnReader() override;
58
59
    Status load(bool use_page_cache, bool kept_in_memory,
60
                OlapReaderStatistics* index_load_stats = nullptr);
61
62
    // read a page specified by `pp' from `file' into `handle'
63
    Status read_page(const PagePointer& pp, PageHandle* handle, Slice* body, PageFooterPB* footer,
64
                     PageTypePB type, BlockCompressionCodec* codec, bool pre_decode,
65
                     OlapReaderStatistics* stats = nullptr) const;
66
67
6.51k
    int64_t num_values() const { return _num_values; }
68
262
    const EncodingInfo* encoding_info() const { return _encoding_info; }
69
60
    FieldType type() const { return _type; }
70
358
    bool support_ordinal_seek() const { return _meta.has_ordinal_index_meta(); }
71
5.23k
    bool support_value_seek() const { return _meta.has_value_index_meta(); }
72
73
191
    CompressionTypePB get_compression() const { return _meta.compression(); }
74
0
    uint64_t get_memory_size() const { return _mem_size; }
75
60
    void set_is_pk_index(bool is_pk) { _is_pk_index = is_pk; }
76
77
private:
78
    Status load_index_page(const PagePointerPB& pp, PageHandle* handle, IndexPageReader* reader,
79
                           OlapReaderStatistics* index_load_stats);
80
81
    int64_t get_metadata_size() const override;
82
83
    friend class IndexedColumnIterator;
84
85
    io::FileReaderSPtr _file_reader;
86
    IndexedColumnMetaPB _meta;
87
88
    bool _use_page_cache;
89
    bool _kept_in_memory;
90
    int64_t _num_values = 0;
91
    // whether this column contains any index page.
92
    // could be false when the column contains only one data page.
93
    bool _has_index_page = false;
94
    // valid only when the column contains only one data page
95
    PagePointer _sole_data_page;
96
    std::unique_ptr<IndexPageReader> _ordinal_index_reader;
97
    std::unique_ptr<IndexPageReader> _value_index_reader;
98
    PageHandle _ordinal_index_page_handle;
99
    PageHandle _value_index_page_handle;
100
101
    FieldType _type = FieldType::OLAP_FIELD_TYPE_NONE;
102
    const EncodingInfo* _encoding_info = nullptr;
103
    const KeyCoder* _value_key_coder = nullptr;
104
    uint64_t _mem_size = 0;
105
    bool _is_pk_index = false;
106
};
107
108
class IndexedColumnIterator {
109
public:
110
    explicit IndexedColumnIterator(const IndexedColumnReader* reader,
111
                                   OlapReaderStatistics* stats = nullptr)
112
191
            : _reader(reader),
113
191
              _ordinal_iter(reader->_ordinal_index_reader.get()),
114
191
              _value_iter(reader->_value_index_reader.get()),
115
191
              _stats(stats) {}
116
117
    // Seek to the given ordinal entry. Entry 0 is the first entry.
118
    // Return Status::Error<ENTRY_NOT_FOUND> if provided seek point is past the end.
119
    // Return NotSupported for column without ordinal index.
120
    Status seek_to_ordinal(ordinal_t idx);
121
122
    // Seek the index to the given key, or to the index entry immediately
123
    // before it. Then seek the data block to the value matching value or to
124
    // the value immediately after it.
125
    //
126
    // Sets *exact_match to indicate whether the seek found the exact
127
    // key requested.
128
    //
129
    // Return Status::Error<ENTRY_NOT_FOUND> if the given key is greater than all keys in this column.
130
    // Return NotSupported for column without value index.
131
    Status seek_at_or_after(const void* key, bool* exact_match);
132
4.52k
    Status seek_at_or_after(const std::string* key, bool* exact_match) {
133
4.52k
        Slice slice(key->data(), key->size());
134
4.52k
        return seek_at_or_after(static_cast<const void*>(&slice), exact_match);
135
4.52k
    }
136
137
    // Get the ordinal index that the iterator is currently pointed to.
138
5.32k
    ordinal_t get_current_ordinal() const {
139
        DCHECK(_seeked);
140
5.32k
        return _current_ordinal;
141
5.32k
    }
142
143
    // After one seek, we can only call this function once to read data
144
    Status next_batch(size_t* n, MutableColumnPtr& dst);
145
146
private:
147
    Status _read_data_page(const PagePointer& pp);
148
149
    const IndexedColumnReader* _reader = nullptr;
150
    // iterator for ordinal index page
151
    IndexPageIterator _ordinal_iter;
152
    // iterator for value index page
153
    IndexPageIterator _value_iter;
154
155
    bool _seeked = false;
156
    // current in-use index iterator, could be `&_ordinal_iter` or `&_value_iter` or null
157
    IndexPageIterator* _current_iter = nullptr;
158
    // seeked data page, containing value at `_current_ordinal`
159
    ParsedPage _data_page;
160
    // next_batch() will read from this position
161
    ordinal_t _current_ordinal = 0;
162
    // iterator owned compress codec, should NOT be shared by threads, initialized before used
163
    BlockCompressionCodec* _compress_codec = nullptr;
164
    OlapReaderStatistics* _stats = nullptr;
165
};
166
167
} // namespace segment_v2
168
} // namespace doris