be/src/storage/segment/page_io.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #pragma once |
19 | | |
20 | | #include <gen_cpp/segment_v2.pb.h> |
21 | | |
22 | | #include <vector> |
23 | | |
24 | | #include "common/logging.h" |
25 | | #include "common/status.h" |
26 | | #include "io/cache/block_file_cache.h" |
27 | | #include "io/io_common.h" |
28 | | #include "storage/segment/page_pointer.h" |
29 | | #include "util/slice.h" |
30 | | |
31 | | namespace doris { |
32 | | |
33 | | class BlockCompressionCodec; |
34 | | struct OlapReaderStatistics; |
35 | | |
36 | | namespace io { |
37 | | class FileWriter; |
38 | | class FileReader; |
39 | | } // namespace io |
40 | | |
41 | | namespace segment_v2 { |
42 | | class EncodingInfo; |
43 | | class PageHandle; |
44 | | |
45 | | io::UInt128Wrapper file_cache_key_from_path(const std::string& seg_path); |
46 | | std::string file_cache_key_str(const std::string& seg_path); |
47 | | |
48 | | struct PageReadOptions { |
49 | | // whether to verify page checksum |
50 | | bool verify_checksum = true; |
51 | | // whether to use page cache in read path |
52 | | bool use_page_cache = false; |
53 | | // if true, use DURABLE CachePriority in page cache |
54 | | // currently used for in memory olap table |
55 | | bool kept_in_memory = false; |
56 | | // index_page should not be pre-decoded |
57 | | bool pre_decode = true; |
58 | | // for page cache allocation |
59 | | // page types are divided into DATA_PAGE & INDEX_PAGE |
60 | | // INDEX_PAGE including index_page, dict_page and short_key_page |
61 | | PageTypePB type; |
62 | | // block to read page |
63 | | io::FileReader* file_reader = nullptr; |
64 | | // location of the page |
65 | | PagePointer page_pointer; |
66 | | // decompressor for page body (null means page body is not compressed) |
67 | | BlockCompressionCodec* codec = nullptr; |
68 | | // used to collect IO metrics |
69 | | OlapReaderStatistics* stats = nullptr; |
70 | | |
71 | | const EncodingInfo* encoding_info = nullptr; |
72 | | |
73 | | const io::IOContext io_ctx; |
74 | | |
75 | | // for dict page, we need to use encoding_info based on footer->dict_page_footer().encoding() |
76 | | // to get its pre_decoder |
77 | | bool is_dict_page {false}; |
78 | | |
79 | 7.36M | void sanity_check() const { |
80 | 7.36M | CHECK_NOTNULL(file_reader); |
81 | 7.36M | CHECK_NOTNULL(stats); |
82 | 7.36M | } |
83 | 7.32M | PageReadOptions(const io::IOContext& ioctx) : io_ctx(ioctx) {} |
84 | | |
85 | 0 | PageReadOptions(const PageReadOptions& old) : io_ctx(old.io_ctx) { |
86 | 0 | file_reader = old.file_reader; |
87 | 0 | page_pointer = old.page_pointer; |
88 | 0 | codec = old.codec; |
89 | 0 | stats = old.stats; |
90 | 0 | verify_checksum = old.verify_checksum; |
91 | 0 | use_page_cache = old.use_page_cache; |
92 | 0 | kept_in_memory = old.kept_in_memory; |
93 | 0 | type = old.type; |
94 | 0 | encoding_info = old.encoding_info; |
95 | 0 | pre_decode = old.pre_decode; |
96 | 0 | is_dict_page = old.is_dict_page; |
97 | 0 | } |
98 | | }; |
99 | | |
100 | | struct InjectionContext { |
101 | | uint32_t* crc; |
102 | | PageReadOptions* opts; |
103 | | }; |
104 | | |
105 | 0 | inline std::ostream& operator<<(std::ostream& os, const PageReadOptions& opt) { |
106 | 0 | return os << "PageReadOptions { verify_checksum=" << opt.verify_checksum |
107 | 0 | << " use_page_cache=" << opt.use_page_cache |
108 | 0 | << " kept_in_memory=" << opt.kept_in_memory << " pre_decode=" << opt.pre_decode |
109 | 0 | << " type=" << opt.type << " page_pointer=" << opt.page_pointer |
110 | 0 | << " has_codec=" << (opt.codec != nullptr) |
111 | 0 | << " has_encoding_info=" << (opt.encoding_info != nullptr) << " }"; |
112 | 0 | } |
113 | | |
114 | | // Utility class for read and write page. All types of page share the same general layout: |
115 | | // Page := PageBody, PageFooter, FooterSize(4), Checksum(4) |
116 | | // - PageBody is defined by page type and may be compressed |
117 | | // - PageFooter is serialized PageFooterPB. It contains page_type, uncompressed_body_size, |
118 | | // and other custom metadata. PageBody is not compressed when its size is equal to |
119 | | // uncompressed_body_size |
120 | | // - FooterSize stores the size of PageFooter |
121 | | // - Checksum is the crc32c checksum of all previous part |
122 | | class PageIO { |
123 | | public: |
124 | | // Compress `body' using `codec' into `compressed_body'. |
125 | | // The size of returned `compressed_body' is 0 when the body is not compressed, this |
126 | | // could happen when `codec' is null or space saving is less than `min_space_saving'. |
127 | | static Status compress_page_body(BlockCompressionCodec* codec, double min_space_saving, |
128 | | const std::vector<Slice>& body, OwnedSlice* compressed_body); |
129 | | |
130 | | // Encode page from `body' and `footer' and write to `file'. |
131 | | // `body' could be either uncompressed or compressed. |
132 | | // On success, the file pointer to the written page is stored in `result'. |
133 | | static Status write_page(io::FileWriter* writer, const std::vector<Slice>& body, |
134 | | const PageFooterPB& footer, PagePointer* result); |
135 | | |
136 | | // Convenient function to compress page body and write page in one go. |
137 | | static Status compress_and_write_page(BlockCompressionCodec* codec, double min_space_saving, |
138 | | io::FileWriter* writer, const std::vector<Slice>& body, |
139 | 1.17M | const PageFooterPB& footer, PagePointer* result) { |
140 | 1.17M | DCHECK_EQ(footer.uncompressed_size(), Slice::compute_total_size(body)); |
141 | 1.17M | OwnedSlice compressed_body; |
142 | 1.17M | RETURN_IF_ERROR(compress_page_body(codec, min_space_saving, body, &compressed_body)); |
143 | 1.17M | if (compressed_body.slice().empty()) { // uncompressed |
144 | 1.11M | return write_page(writer, body, footer, result); |
145 | 1.11M | } |
146 | 61.8k | return write_page(writer, {compressed_body.slice()}, footer, result); |
147 | 1.17M | } |
148 | | |
149 | | // Read and parse a page according to `opts'. |
150 | | // On success |
151 | | // `handle' holds the memory of page data, |
152 | | // `body' points to page body, |
153 | | // `footer' stores the page footer. |
154 | | // This method is exception safe, it will failed when allocate memory failed. |
155 | | // deal with CORRUPTION when using file cache, retry from remote |
156 | | static Status read_and_decompress_page(const PageReadOptions& opts, PageHandle* handle, |
157 | | Slice* body, PageFooterPB* footer); |
158 | | |
159 | | private: |
160 | | static Status do_read_and_decompress_page(const PageReadOptions& opts, PageHandle* handle, |
161 | 7.35M | Slice* body, PageFooterPB* footer) { |
162 | 7.35M | RETURN_IF_CATCH_EXCEPTION( |
163 | 7.35M | { return read_and_decompress_page_(opts, handle, body, footer); }); |
164 | 7.35M | } |
165 | | // An internal method that not deal with exception. |
166 | | static Status read_and_decompress_page_(const PageReadOptions& opts, PageHandle* handle, |
167 | | Slice* body, PageFooterPB* footer); |
168 | | }; |
169 | | |
170 | | } // namespace segment_v2 |
171 | | } // namespace doris |