be/src/storage/segment/segment.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #pragma once |
19 | | |
20 | | #include <butil/macros.h> |
21 | | #include <gen_cpp/olap_file.pb.h> |
22 | | #include <gen_cpp/segment_v2.pb.h> |
23 | | #include <glog/logging.h> |
24 | | |
25 | | #include <cstdint> |
26 | | #include <map> |
27 | | #include <memory> // for unique_ptr |
28 | | #include <string> |
29 | | #include <unordered_map> |
30 | | |
31 | | #include "agent/be_exec_version_manager.h" |
32 | | #include "common/be_mock_util.h" |
33 | | #include "common/status.h" // Status |
34 | | #include "core/column/column.h" |
35 | | #include "core/data_type/data_type.h" |
36 | | #include "io/fs/file_reader.h" |
37 | | #include "io/fs/file_reader_writer_fwd.h" |
38 | | #include "io/fs/file_system.h" |
39 | | #include "runtime/descriptors.h" |
40 | | #include "storage/cache/page_cache.h" |
41 | | #include "storage/field.h" |
42 | | #include "storage/olap_common.h" |
43 | | #include "storage/schema.h" |
44 | | #include "storage/segment/page_handle.h" |
45 | | #include "storage/tablet/tablet_schema.h" |
46 | | #include "util/once.h" |
47 | | #include "util/slice.h" |
48 | | namespace doris { |
49 | | class IDataType; |
50 | | |
51 | | class ShortKeyIndexDecoder; |
52 | | class Schema; |
53 | | class StorageReadOptions; |
54 | | class PrimaryKeyIndexReader; |
55 | | class RowwiseIterator; |
56 | | struct RowLocation; |
57 | | |
58 | | namespace segment_v2 { |
59 | | |
60 | | class Segment; |
61 | | class InvertedIndexIterator; |
62 | | class IndexFileReader; |
63 | | class IndexIterator; |
64 | | class ColumnReader; |
65 | | class ColumnIterator; |
66 | | class ColumnReaderCache; |
67 | | class ColumnMetaAccessor; |
68 | | |
69 | | using SegmentSharedPtr = std::shared_ptr<Segment>; |
70 | | |
71 | | struct SparseColumnCache; |
72 | | using SparseColumnCacheSPtr = std::shared_ptr<SparseColumnCache>; |
73 | | |
74 | | // key is column path, value is the sparse column cache |
75 | | // now column path is only SPARSE_COLUMN_PATH, in the future, we can add more sparse column paths |
76 | | using PathToSparseColumnCache = std::unordered_map<std::string, SparseColumnCacheSPtr>; |
77 | | using PathToSparseColumnCacheUPtr = std::unique_ptr<PathToSparseColumnCache>; |
78 | | |
79 | | struct BinaryColumnCache; |
80 | | using BinaryColumnCacheSPtr = std::shared_ptr<BinaryColumnCache>; |
81 | | using PathToBinaryColumnCache = std::unordered_map<std::string, BinaryColumnCacheSPtr>; |
82 | | using PathToBinaryColumnCacheUPtr = std::unique_ptr<PathToBinaryColumnCache>; |
83 | | |
84 | | // A Segment is used to represent a segment in memory format. When segment is |
85 | | // generated, it won't be modified, so this struct aimed to help read operation. |
86 | | // It will prepare all ColumnReader to create ColumnIterator as needed. |
87 | | // And user can create a RowwiseIterator through new_iterator function. |
88 | | // |
89 | | // NOTE: This segment is used to a specified TabletSchema, when TabletSchema |
90 | | // is changed, this segment can not be used any more. For example, after a schema |
91 | | // change finished, client should disable all cached Segment for old TabletSchema. |
92 | | class Segment : public std::enable_shared_from_this<Segment>, public MetadataAdder<Segment> { |
93 | | public: |
94 | | static Status open(io::FileSystemSPtr fs, const std::string& path, int64_t tablet_id, |
95 | | uint32_t segment_id, RowsetId rowset_id, TabletSchemaSPtr tablet_schema, |
96 | | const io::FileReaderOptions& reader_options, |
97 | | std::shared_ptr<Segment>* output, InvertedIndexFileInfo idx_file_info = {}, |
98 | | OlapReaderStatistics* stats = nullptr); |
99 | | |
100 | | static io::UInt128Wrapper file_cache_key(std::string_view rowset_id, uint32_t seg_id); |
101 | 0 | io::UInt128Wrapper file_cache_key() const { |
102 | 0 | return file_cache_key(_rowset_id.to_string(), _segment_id); |
103 | 0 | } |
104 | | |
105 | | ~Segment() override; |
106 | | |
107 | | int64_t get_metadata_size() const override; |
108 | | void update_metadata_size(); |
109 | | |
110 | | Status new_iterator(SchemaSPtr schema, const StorageReadOptions& read_options, |
111 | | std::unique_ptr<RowwiseIterator>* iter); |
112 | | |
113 | | static Status new_default_iterator(const TabletColumn& tablet_column, |
114 | | std::unique_ptr<ColumnIterator>* iter); |
115 | | |
116 | 13.6k | uint32_t id() const { return _segment_id; } |
117 | | |
118 | 467 | RowsetId rowset_id() const { return _rowset_id; } |
119 | | |
120 | 24.3k | MOCK_FUNCTION uint32_t num_rows() const { return _num_rows; } |
121 | | |
122 | | // if variant_sparse_column_cache is nullptr, means the sparse column cache is not used |
123 | | Status new_column_iterator(const TabletColumn& tablet_column, |
124 | | std::unique_ptr<ColumnIterator>* iter, const StorageReadOptions* opt, |
125 | | const std::unordered_map<int32_t, PathToBinaryColumnCacheUPtr>* |
126 | | variant_sparse_column_cache = nullptr); |
127 | | |
128 | | Status new_index_iterator(const TabletColumn& tablet_column, const TabletIndex* index_meta, |
129 | | const StorageReadOptions& read_options, |
130 | | std::unique_ptr<IndexIterator>* iter); |
131 | | |
132 | 1 | const ShortKeyIndexDecoder* get_short_key_index() const { |
133 | 1 | DCHECK(_load_index_once.has_called() && _load_index_once.stored_result().ok()); |
134 | 1 | return _sk_index_decoder.get(); |
135 | 1 | } |
136 | | |
137 | 54 | const PrimaryKeyIndexReader* get_primary_key_index() const { |
138 | 54 | DCHECK(_load_index_once.has_called() && _load_index_once.stored_result().ok()); |
139 | 54 | return _pk_index_reader.get(); |
140 | 54 | } |
141 | | |
142 | | Status lookup_row_key(const Slice& key, const TabletSchema* latest_schema, bool with_seq_col, |
143 | | bool with_rowid, RowLocation* row_location, OlapReaderStatistics* stats, |
144 | | std::string* encoded_seq_value = nullptr); |
145 | | |
146 | | Status read_key_by_rowid(uint32_t row_id, std::string* key); |
147 | | |
148 | | Status seek_and_read_by_rowid(const TabletSchema& schema, SlotDescriptor* slot, uint32_t row_id, |
149 | | MutableColumnPtr& result, |
150 | | StorageReadOptions& storage_read_options, |
151 | | std::unique_ptr<ColumnIterator>& iterator_hint); |
152 | | |
153 | | Status load_index(OlapReaderStatistics* stats); |
154 | | |
155 | | Status load_pk_index_and_bf(OlapReaderStatistics* stats); |
156 | | |
157 | 0 | void update_healthy_status(Status new_status) { _healthy_status.update(new_status); } |
158 | | // The segment is loaded into SegmentCache and then will load indices, if there are something wrong |
159 | | // during loading indices, should remove it from SegmentCache. If not, it will always report error during |
160 | | // query. So we add a healthy status API, the caller should check the healhty status before using the segment. |
161 | | Status healthy_status(); |
162 | | |
163 | 0 | std::string min_key() { |
164 | 0 | DCHECK(_tablet_schema->keys_type() == UNIQUE_KEYS && _pk_index_meta != nullptr); |
165 | 0 | return _pk_index_meta->min_key(); |
166 | 0 | } |
167 | 0 | std::string max_key() { |
168 | 0 | DCHECK(_tablet_schema->keys_type() == UNIQUE_KEYS && _pk_index_meta != nullptr); |
169 | 0 | return _pk_index_meta->max_key(); |
170 | 0 | } |
171 | | |
172 | 140 | io::FileReaderSPtr file_reader() { return _file_reader; } |
173 | | |
174 | | // Including the column reader memory. |
175 | | // another method `get_metadata_size` not include the column reader, only the segment object itself. |
176 | 18.4k | int64_t meta_mem_usage() const { return _meta_mem_usage; } |
177 | | |
178 | | // Get the inner file column's data type. |
179 | | // When `read_options` is provided, the decision (e.g. flat-leaf vs hierarchical) can depend |
180 | | // on the reader type and tablet schema; when it is nullptr, we treat it as a query reader. |
181 | | // nullptr will be returned if storage type does not contain such column. |
182 | | std::shared_ptr<const IDataType> get_data_type_of(const TabletColumn& column, |
183 | | const StorageReadOptions& read_options); |
184 | | |
185 | | // If column in segment is the same type in schema, then it is safe to apply predicate. |
186 | | bool can_apply_predicate_safely( |
187 | | int cid, const Schema& schema, |
188 | | const std::map<std::string, DataTypePtr>& target_cast_type_for_variants, |
189 | 0 | const StorageReadOptions& read_options) { |
190 | 0 | const doris::StorageField* col = schema.column(cid); |
191 | 0 | DCHECK(col != nullptr) << "Column not found in schema for cid=" << cid; |
192 | 0 | DataTypePtr storage_column_type = get_data_type_of(col->get_desc(), read_options); |
193 | 0 | if (storage_column_type == nullptr || col->type() != FieldType::OLAP_FIELD_TYPE_VARIANT || |
194 | 0 | !target_cast_type_for_variants.contains(col->name())) { |
195 | | // Default column iterator or not variant column |
196 | 0 | return true; |
197 | 0 | } |
198 | 0 | if (storage_column_type->equals(*target_cast_type_for_variants.at(col->name()))) { |
199 | 0 | return true; |
200 | 0 | } else { |
201 | 0 | return false; |
202 | 0 | } |
203 | 0 | } |
204 | | |
205 | 17 | const TabletSchemaSPtr& tablet_schema() const { return _tablet_schema; } |
206 | | |
207 | | // get the column reader by tablet column, return NOT_FOUND if not found reader in this segment |
208 | | Status get_column_reader(const TabletColumn& col, std::shared_ptr<ColumnReader>* column_reader, |
209 | | OlapReaderStatistics* stats); |
210 | | |
211 | | // get the column reader by column unique id, return NOT_FOUND if not found reader in this segment |
212 | | Status get_column_reader(int32_t col_uid, std::shared_ptr<ColumnReader>* column_reader, |
213 | | OlapReaderStatistics* stats); |
214 | | |
215 | | Status traverse_column_meta_pbs(const std::function<void(const ColumnMetaPB&)>& visitor); |
216 | | |
217 | | // Returns the cached raw_data_bytes for the given column unique id, or 0 if not found. |
218 | | // Data is populated during _create_column_meta (under call_once), so thread-safe after init. |
219 | 3 | uint64_t column_raw_data_bytes(int32_t column_uid) const { |
220 | 3 | auto it = _column_uid_to_raw_bytes.find(column_uid); |
221 | 3 | return it != _column_uid_to_raw_bytes.end() ? it->second : 0; |
222 | 3 | } |
223 | | |
224 | | static StoragePageCache::CacheKey get_segment_footer_cache_key( |
225 | | const io::FileReaderSPtr& file_reader); |
226 | | |
227 | | private: |
228 | | DISALLOW_COPY_AND_ASSIGN(Segment); |
229 | | Segment(uint32_t segment_id, RowsetId rowset_id, TabletSchemaSPtr tablet_schema, |
230 | | InvertedIndexFileInfo idx_file_info = InvertedIndexFileInfo()); |
231 | | static Status _open(io::FileSystemSPtr fs, const std::string& path, uint32_t segment_id, |
232 | | RowsetId rowset_id, TabletSchemaSPtr tablet_schema, |
233 | | const io::FileReaderOptions& reader_options, |
234 | | std::shared_ptr<Segment>* output, InvertedIndexFileInfo idx_file_info, |
235 | | OlapReaderStatistics* stats); |
236 | | // open segment file and read the minimum amount of necessary information (footer) |
237 | | Status _open(OlapReaderStatistics* stats); |
238 | | Status _parse_footer(std::shared_ptr<SegmentFooterPB>& footer, |
239 | | OlapReaderStatistics* stats = nullptr); |
240 | | Status _create_column_meta(const SegmentFooterPB& footer); |
241 | | Status _load_pk_bloom_filter(OlapReaderStatistics* stats); |
242 | | // Must ensure _create_column_readers_once has been called before calling this function. |
243 | | ColumnReader* _get_column_reader(const TabletColumn& col); |
244 | | |
245 | | Status _write_error_file(size_t file_size, size_t offset, size_t bytes_read, char* data, |
246 | | io::IOContext& io_ctx); |
247 | | |
248 | | Status _open_index_file_reader(); |
249 | | |
250 | | Status _create_column_meta_once(OlapReaderStatistics* stats); |
251 | | |
252 | | virtual Status _get_segment_footer(std::shared_ptr<SegmentFooterPB>&, |
253 | | OlapReaderStatistics* stats); |
254 | | |
255 | | StoragePageCache::CacheKey get_segment_footer_cache_key() const; |
256 | | |
257 | | friend class SegmentIterator; |
258 | | friend class ColumnReaderCache; |
259 | | friend class MockSegment; |
260 | | |
261 | | io::FileSystemSPtr _fs; |
262 | | io::FileReaderSPtr _file_reader; |
263 | | uint32_t _segment_id; |
264 | | uint32_t _num_rows; |
265 | | AtomicStatus _healthy_status; |
266 | | |
267 | | // 1. Tracking memory use by segment meta data such as footer or index page. |
268 | | // 2. Tracking memory use by segment column reader |
269 | | // The memory consumed by querying is tracked in segment iterator. |
270 | | int64_t _meta_mem_usage; |
271 | | int64_t _tracked_meta_mem_usage = 0; |
272 | | |
273 | | RowsetId _rowset_id; |
274 | | TabletSchemaSPtr _tablet_schema; |
275 | | |
276 | | std::unique_ptr<PrimaryKeyIndexMetaPB> _pk_index_meta; |
277 | | PagePointerPB _sk_index_page; |
278 | | |
279 | | // Limited cache for column readers |
280 | | std::unique_ptr<ColumnReaderCache> _column_reader_cache; |
281 | | |
282 | | // Centralized accessor for column metadata layout and uid->column_ordinal mapping. |
283 | | std::unique_ptr<ColumnMetaAccessor> _column_meta_accessor; |
284 | | |
285 | | // Init from ColumnMetaPB in SegmentFooterPB |
286 | | // map column unique id ---> it's inner data type |
287 | | std::map<int32_t, std::shared_ptr<const IDataType>> _file_column_types; |
288 | | |
289 | | // used to guarantee that short key index will be loaded at most once in a thread-safe way |
290 | | DorisCallOnce<Status> _load_index_once; |
291 | | // used to guarantee that primary key bloom filter will be loaded at most once in a thread-safe way |
292 | | DorisCallOnce<Status> _load_pk_bf_once; |
293 | | |
294 | | DorisCallOnce<Status> _create_column_meta_once_call; |
295 | | |
296 | | std::weak_ptr<SegmentFooterPB> _footer_pb; |
297 | | |
298 | | // Cached raw_data_bytes per column unique id, populated once in _create_column_meta(). |
299 | | std::unordered_map<int32_t, uint64_t> _column_uid_to_raw_bytes; |
300 | | |
301 | | // used to hold short key index page in memory |
302 | | PageHandle _sk_index_handle; |
303 | | // short key index decoder |
304 | | // all content is in memory |
305 | | std::unique_ptr<ShortKeyIndexDecoder> _sk_index_decoder; |
306 | | // primary key index reader |
307 | | std::unique_ptr<PrimaryKeyIndexReader> _pk_index_reader; |
308 | | std::mutex _open_lock; |
309 | | // inverted index file reader |
310 | | std::shared_ptr<IndexFileReader> _index_file_reader; |
311 | | DorisCallOnce<Status> _index_file_reader_open; |
312 | | |
313 | | InvertedIndexFileInfo _idx_file_info; |
314 | | |
315 | | int _be_exec_version = BeExecVersionManager::get_newest_version(); |
316 | | }; |
317 | | |
318 | | } // namespace segment_v2 |
319 | | } // namespace doris |