/root/doris/be/src/olap/olap_common.h
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #pragma once |
19 | | |
20 | | #include <gen_cpp/Types_types.h> |
21 | | #include <netinet/in.h> |
22 | | |
23 | | #include <atomic> |
24 | | #include <charconv> |
25 | | #include <cstdint> |
26 | | #include <functional> |
27 | | #include <list> |
28 | | #include <map> |
29 | | #include <memory> |
30 | | #include <ostream> |
31 | | #include <sstream> |
32 | | #include <string> |
33 | | #include <typeinfo> |
34 | | #include <unordered_map> |
35 | | #include <unordered_set> |
36 | | #include <utility> |
37 | | |
38 | | #include "common/config.h" |
39 | | #include "io/io_common.h" |
40 | | #include "olap/olap_define.h" |
41 | | #include "olap/rowset/rowset_fwd.h" |
42 | | #include "util/hash_util.hpp" |
43 | | #include "util/time.h" |
44 | | #include "util/uid_util.h" |
45 | | |
46 | | namespace doris { |
47 | | |
48 | | static constexpr int64_t MAX_ROWSET_ID = 1L << 56; |
49 | | static constexpr int64_t LOW_56_BITS = 0x00ffffffffffffff; |
50 | | |
51 | | using SchemaHash = int32_t; |
52 | | using int128_t = __int128; |
53 | | using uint128_t = unsigned __int128; |
54 | | |
55 | | using TabletUid = UniqueId; |
56 | | |
57 | | enum CompactionType { BASE_COMPACTION = 1, CUMULATIVE_COMPACTION = 2, FULL_COMPACTION = 3 }; |
58 | | |
59 | | enum DataDirType { |
60 | | SPILL_DISK_DIR, |
61 | | OLAP_DATA_DIR, |
62 | | DATA_CACHE_DIR, |
63 | | }; |
64 | | |
65 | | struct DataDirInfo { |
66 | | std::string path; |
67 | | size_t path_hash = 0; |
68 | | int64_t disk_capacity = 1; // actual disk capacity |
69 | | int64_t available = 0; // available space, in bytes unit |
70 | | int64_t local_used_capacity = 0; |
71 | | int64_t remote_used_capacity = 0; |
72 | | int64_t trash_used_capacity = 0; |
73 | | bool is_used = false; // whether available mark |
74 | | TStorageMedium::type storage_medium = TStorageMedium::HDD; // Storage medium type: SSD|HDD |
75 | | DataDirType data_dir_type = DataDirType::OLAP_DATA_DIR; |
76 | | std::string bvar_name; |
77 | | }; |
78 | | struct PredicateFilterInfo { |
79 | | int type = 0; |
80 | | uint64_t input_row = 0; |
81 | | uint64_t filtered_row = 0; |
82 | | }; |
83 | | // Sort DataDirInfo by available space. |
84 | | struct DataDirInfoLessAvailability { |
85 | 7 | bool operator()(const DataDirInfo& left, const DataDirInfo& right) const { |
86 | 7 | return left.available < right.available; |
87 | 7 | } |
88 | | }; |
89 | | |
90 | | struct TabletInfo { |
91 | | TabletInfo(TTabletId in_tablet_id, UniqueId in_uid) |
92 | | : tablet_id(in_tablet_id), tablet_uid(in_uid) {} |
93 | | |
94 | | bool operator<(const TabletInfo& right) const { |
95 | | if (tablet_id != right.tablet_id) { |
96 | | return tablet_id < right.tablet_id; |
97 | | } else { |
98 | | return tablet_uid < right.tablet_uid; |
99 | | } |
100 | | } |
101 | | |
102 | | std::string to_string() const { |
103 | | std::stringstream ss; |
104 | | ss << tablet_id << "." << tablet_uid.to_string(); |
105 | | return ss.str(); |
106 | | } |
107 | | |
108 | | TTabletId tablet_id; |
109 | | UniqueId tablet_uid; |
110 | | }; |
111 | | |
112 | | struct TabletSize { |
113 | | TabletSize(TTabletId in_tablet_id, size_t in_tablet_size) |
114 | 0 | : tablet_id(in_tablet_id), tablet_size(in_tablet_size) {} |
115 | | |
116 | | TTabletId tablet_id; |
117 | | size_t tablet_size; |
118 | | }; |
119 | | |
120 | | // Define all data types supported by Field. |
121 | | // If new filed_type is defined, not only new TypeInfo may need be defined, |
122 | | // but also some functions like get_type_info in types.cpp need to be changed. |
123 | | enum class FieldType { |
124 | | OLAP_FIELD_TYPE_TINYINT = 1, // MYSQL_TYPE_TINY |
125 | | OLAP_FIELD_TYPE_UNSIGNED_TINYINT = 2, |
126 | | OLAP_FIELD_TYPE_SMALLINT = 3, // MYSQL_TYPE_SHORT |
127 | | OLAP_FIELD_TYPE_UNSIGNED_SMALLINT = 4, |
128 | | OLAP_FIELD_TYPE_INT = 5, // MYSQL_TYPE_LONG |
129 | | OLAP_FIELD_TYPE_UNSIGNED_INT = 6, |
130 | | OLAP_FIELD_TYPE_BIGINT = 7, // MYSQL_TYPE_LONGLONG |
131 | | OLAP_FIELD_TYPE_UNSIGNED_BIGINT = 8, |
132 | | OLAP_FIELD_TYPE_LARGEINT = 9, |
133 | | OLAP_FIELD_TYPE_FLOAT = 10, // MYSQL_TYPE_FLOAT |
134 | | OLAP_FIELD_TYPE_DOUBLE = 11, // MYSQL_TYPE_DOUBLE |
135 | | OLAP_FIELD_TYPE_DISCRETE_DOUBLE = 12, |
136 | | OLAP_FIELD_TYPE_CHAR = 13, // MYSQL_TYPE_STRING |
137 | | OLAP_FIELD_TYPE_DATE = 14, // MySQL_TYPE_NEWDATE |
138 | | OLAP_FIELD_TYPE_DATETIME = 15, // MySQL_TYPE_DATETIME |
139 | | OLAP_FIELD_TYPE_DECIMAL = 16, // DECIMAL, using different store format against MySQL |
140 | | OLAP_FIELD_TYPE_VARCHAR = 17, |
141 | | |
142 | | OLAP_FIELD_TYPE_STRUCT = 18, // Struct |
143 | | OLAP_FIELD_TYPE_ARRAY = 19, // ARRAY |
144 | | OLAP_FIELD_TYPE_MAP = 20, // Map |
145 | | OLAP_FIELD_TYPE_UNKNOWN = 21, // UNKNOW OLAP_FIELD_TYPE_STRING |
146 | | OLAP_FIELD_TYPE_NONE = 22, |
147 | | OLAP_FIELD_TYPE_HLL = 23, |
148 | | OLAP_FIELD_TYPE_BOOL = 24, |
149 | | OLAP_FIELD_TYPE_OBJECT = 25, |
150 | | OLAP_FIELD_TYPE_STRING = 26, |
151 | | OLAP_FIELD_TYPE_QUANTILE_STATE = 27, |
152 | | OLAP_FIELD_TYPE_DATEV2 = 28, |
153 | | OLAP_FIELD_TYPE_DATETIMEV2 = 29, |
154 | | OLAP_FIELD_TYPE_TIMEV2 = 30, |
155 | | OLAP_FIELD_TYPE_DECIMAL32 = 31, |
156 | | OLAP_FIELD_TYPE_DECIMAL64 = 32, |
157 | | OLAP_FIELD_TYPE_DECIMAL128I = 33, |
158 | | OLAP_FIELD_TYPE_JSONB = 34, |
159 | | OLAP_FIELD_TYPE_VARIANT = 35, |
160 | | OLAP_FIELD_TYPE_AGG_STATE = 36, |
161 | | OLAP_FIELD_TYPE_DECIMAL256 = 37, |
162 | | OLAP_FIELD_TYPE_IPV4 = 38, |
163 | | OLAP_FIELD_TYPE_IPV6 = 39, |
164 | | }; |
165 | | |
166 | | // Define all aggregation methods supported by Field |
167 | | // Note that in practice, not all types can use all the following aggregation methods |
168 | | // For example, it is meaningless to use SUM for the string type (but it will not cause the program to crash) |
169 | | // The implementation of the Field class does not perform such checks, and should be constrained when creating the table |
170 | | enum class FieldAggregationMethod { |
171 | | OLAP_FIELD_AGGREGATION_NONE = 0, |
172 | | OLAP_FIELD_AGGREGATION_SUM = 1, |
173 | | OLAP_FIELD_AGGREGATION_MIN = 2, |
174 | | OLAP_FIELD_AGGREGATION_MAX = 3, |
175 | | OLAP_FIELD_AGGREGATION_REPLACE = 4, |
176 | | OLAP_FIELD_AGGREGATION_HLL_UNION = 5, |
177 | | OLAP_FIELD_AGGREGATION_UNKNOWN = 6, |
178 | | OLAP_FIELD_AGGREGATION_BITMAP_UNION = 7, |
179 | | // Replace if and only if added value is not null |
180 | | OLAP_FIELD_AGGREGATION_REPLACE_IF_NOT_NULL = 8, |
181 | | OLAP_FIELD_AGGREGATION_QUANTILE_UNION = 9, |
182 | | OLAP_FIELD_AGGREGATION_GENERIC = 10 |
183 | | }; |
184 | | |
185 | | enum class PushType { |
186 | | PUSH_NORMAL = 1, // for broker/hadoop load, not used any more |
187 | | PUSH_FOR_DELETE = 2, // for delete |
188 | | PUSH_FOR_LOAD_DELETE = 3, // not used any more |
189 | | PUSH_NORMAL_V2 = 4, // for spark load |
190 | | }; |
191 | | |
192 | | constexpr bool field_is_slice_type(const FieldType& field_type) { |
193 | | return field_type == FieldType::OLAP_FIELD_TYPE_VARCHAR || |
194 | | field_type == FieldType::OLAP_FIELD_TYPE_CHAR || |
195 | | field_type == FieldType::OLAP_FIELD_TYPE_STRING; |
196 | | } |
197 | | |
198 | 0 | constexpr bool field_is_numeric_type(const FieldType& field_type) { |
199 | 0 | return field_type == FieldType::OLAP_FIELD_TYPE_INT || |
200 | 0 | field_type == FieldType::OLAP_FIELD_TYPE_UNSIGNED_INT || |
201 | 0 | field_type == FieldType::OLAP_FIELD_TYPE_BIGINT || |
202 | 0 | field_type == FieldType::OLAP_FIELD_TYPE_SMALLINT || |
203 | 0 | field_type == FieldType::OLAP_FIELD_TYPE_UNSIGNED_TINYINT || |
204 | 0 | field_type == FieldType::OLAP_FIELD_TYPE_UNSIGNED_SMALLINT || |
205 | 0 | field_type == FieldType::OLAP_FIELD_TYPE_TINYINT || |
206 | 0 | field_type == FieldType::OLAP_FIELD_TYPE_DOUBLE || |
207 | 0 | field_type == FieldType::OLAP_FIELD_TYPE_FLOAT || |
208 | 0 | field_type == FieldType::OLAP_FIELD_TYPE_DATE || |
209 | 0 | field_type == FieldType::OLAP_FIELD_TYPE_DATEV2 || |
210 | 0 | field_type == FieldType::OLAP_FIELD_TYPE_DATETIME || |
211 | 0 | field_type == FieldType::OLAP_FIELD_TYPE_DATETIMEV2 || |
212 | 0 | field_type == FieldType::OLAP_FIELD_TYPE_LARGEINT || |
213 | 0 | field_type == FieldType::OLAP_FIELD_TYPE_DECIMAL || |
214 | 0 | field_type == FieldType::OLAP_FIELD_TYPE_DECIMAL32 || |
215 | 0 | field_type == FieldType::OLAP_FIELD_TYPE_DECIMAL64 || |
216 | 0 | field_type == FieldType::OLAP_FIELD_TYPE_DECIMAL128I || |
217 | 0 | field_type == FieldType::OLAP_FIELD_TYPE_DECIMAL256 || |
218 | 0 | field_type == FieldType::OLAP_FIELD_TYPE_BOOL || |
219 | 0 | field_type == FieldType::OLAP_FIELD_TYPE_IPV4 || |
220 | 0 | field_type == FieldType::OLAP_FIELD_TYPE_IPV6; |
221 | 0 | } |
222 | | |
223 | | // <start_version_id, end_version_id>, such as <100, 110> |
224 | | //using Version = std::pair<TupleVersion, TupleVersion>; |
225 | | |
226 | | struct Version { |
227 | | int64_t first; |
228 | | int64_t second; |
229 | | |
230 | 1.06M | Version(int64_t first_, int64_t second_) : first(first_), second(second_) {} |
231 | 6.23k | Version() : first(0), second(0) {} |
232 | | |
233 | | static Version mock() { |
234 | | // Every time SchemaChange is used for external rowing, some temporary versions (such as 999, 1000, 1001) will be written, in order to avoid Cache conflicts, temporary |
235 | | // The version number takes a BIG NUMBER plus the version number of the current SchemaChange |
236 | | return Version(1 << 28, 1 << 29); |
237 | | } |
238 | | |
239 | | friend std::ostream& operator<<(std::ostream& os, const Version& version); |
240 | | |
241 | | bool operator!=(const Version& rhs) const { return first != rhs.first || second != rhs.second; } |
242 | | |
243 | 4.63k | bool operator==(const Version& rhs) const { return first == rhs.first && second == rhs.second; } |
244 | | |
245 | | bool contains(const Version& other) const { |
246 | | return first <= other.first && second >= other.second; |
247 | | } |
248 | | |
249 | 481 | std::string to_string() const { return fmt::format("[{}-{}]", first, second); } |
250 | | }; |
251 | | |
252 | | using Versions = std::vector<Version>; |
253 | | |
254 | 9 | inline std::ostream& operator<<(std::ostream& os, const Version& version) { |
255 | 9 | return os << version.to_string(); |
256 | 9 | } |
257 | | |
258 | | inline std::ostream& operator<<(std::ostream& os, const Versions& versions) { |
259 | | for (auto& version : versions) { |
260 | | os << version; |
261 | | } |
262 | | return os; |
263 | | } |
264 | | |
265 | | // used for hash-struct of hash_map<Version, Rowset*>. |
266 | | struct HashOfVersion { |
267 | 1.13k | size_t operator()(const Version& version) const { |
268 | 1.13k | size_t seed = 0; |
269 | 1.13k | seed = HashUtil::hash64(&version.first, sizeof(version.first), seed); |
270 | 1.13k | seed = HashUtil::hash64(&version.second, sizeof(version.second), seed); |
271 | 1.13k | return seed; |
272 | 1.13k | } |
273 | | }; |
274 | | |
275 | | // It is used to represent Graph vertex. |
276 | | struct Vertex { |
277 | | int64_t value = 0; |
278 | | std::list<int64_t> edges; |
279 | | |
280 | | Vertex(int64_t v) : value(v) {} |
281 | | }; |
282 | | |
283 | | class Field; |
284 | | class WrapperField; |
285 | | using KeyRange = std::pair<WrapperField*, WrapperField*>; |
286 | | |
287 | | // ReaderStatistics used to collect statistics when scan data from storage |
288 | | struct OlapReaderStatistics { |
289 | | int64_t io_ns = 0; |
290 | | int64_t compressed_bytes_read = 0; |
291 | | |
292 | | int64_t decompress_ns = 0; |
293 | | int64_t uncompressed_bytes_read = 0; |
294 | | |
295 | | // total read bytes in memory |
296 | | int64_t bytes_read = 0; |
297 | | |
298 | | int64_t block_fetch_ns = 0; // time of rowset reader's `next_batch()` call |
299 | | int64_t block_load_ns = 0; |
300 | | int64_t blocks_load = 0; |
301 | | // Not used any more, will be removed after non-vectorized code is removed |
302 | | int64_t block_seek_num = 0; |
303 | | // Not used any more, will be removed after non-vectorized code is removed |
304 | | int64_t block_seek_ns = 0; |
305 | | |
306 | | // block_load_ns |
307 | | // block_init_ns |
308 | | // block_init_seek_ns |
309 | | // generate_row_ranges_ns |
310 | | // predicate_column_read_ns |
311 | | // predicate_column_read_seek_ns |
312 | | // lazy_read_ns |
313 | | // block_lazy_read_seek_ns |
314 | | int64_t block_init_ns = 0; |
315 | | int64_t block_init_seek_num = 0; |
316 | | int64_t block_init_seek_ns = 0; |
317 | | int64_t predicate_column_read_ns = 0; |
318 | | int64_t non_predicate_read_ns = 0; |
319 | | int64_t predicate_column_read_seek_num = 0; |
320 | | int64_t predicate_column_read_seek_ns = 0; |
321 | | int64_t lazy_read_ns = 0; |
322 | | int64_t block_lazy_read_seek_num = 0; |
323 | | int64_t block_lazy_read_seek_ns = 0; |
324 | | |
325 | | int64_t raw_rows_read = 0; |
326 | | |
327 | | int64_t rows_vec_cond_filtered = 0; |
328 | | int64_t rows_short_circuit_cond_filtered = 0; |
329 | | int64_t vec_cond_input_rows = 0; |
330 | | int64_t short_circuit_cond_input_rows = 0; |
331 | | int64_t rows_vec_del_cond_filtered = 0; |
332 | | int64_t vec_cond_ns = 0; |
333 | | int64_t short_cond_ns = 0; |
334 | | int64_t expr_filter_ns = 0; |
335 | | int64_t output_col_ns = 0; |
336 | | |
337 | | std::map<int, PredicateFilterInfo> filter_info; |
338 | | |
339 | | int64_t rows_key_range_filtered = 0; |
340 | | int64_t rows_stats_filtered = 0; |
341 | | int64_t rows_stats_rp_filtered = 0; |
342 | | int64_t rows_bf_filtered = 0; |
343 | | int64_t rows_dict_filtered = 0; |
344 | | // Including the number of rows filtered out according to the Delete information in the Tablet, |
345 | | // and the number of rows filtered for marked deleted rows under the unique key model. |
346 | | // This metric is mainly used to record the number of rows filtered by the delete condition in Segment V1, |
347 | | // and it is also used to record the replaced rows in the Unique key model in the "Reader" class. |
348 | | // In segmentv2, if you want to get all filtered rows, you need the sum of "rows_del_filtered" and "rows_conditions_filtered". |
349 | | int64_t rows_del_filtered = 0; |
350 | | int64_t rows_del_by_bitmap = 0; |
351 | | // the number of rows filtered by various column indexes. |
352 | | int64_t rows_conditions_filtered = 0; |
353 | | int64_t generate_row_ranges_ns = 0; |
354 | | int64_t generate_row_ranges_by_bf_ns = 0; |
355 | | int64_t generate_row_ranges_by_zonemap_ns = 0; |
356 | | int64_t generate_row_ranges_by_dict_ns = 0; |
357 | | |
358 | | int64_t index_load_ns = 0; |
359 | | |
360 | | int64_t total_pages_num = 0; |
361 | | int64_t cached_pages_num = 0; |
362 | | |
363 | | int64_t rows_bitmap_index_filtered = 0; |
364 | | int64_t bitmap_index_filter_timer = 0; |
365 | | |
366 | | int64_t rows_inverted_index_filtered = 0; |
367 | | int64_t inverted_index_filter_timer = 0; |
368 | | int64_t inverted_index_query_timer = 0; |
369 | | int64_t inverted_index_query_cache_hit = 0; |
370 | | int64_t inverted_index_query_cache_miss = 0; |
371 | | int64_t inverted_index_query_null_bitmap_timer = 0; |
372 | | int64_t inverted_index_query_bitmap_copy_timer = 0; |
373 | | int64_t inverted_index_searcher_open_timer = 0; |
374 | | int64_t inverted_index_searcher_search_timer = 0; |
375 | | int64_t inverted_index_searcher_cache_hit = 0; |
376 | | int64_t inverted_index_searcher_cache_miss = 0; |
377 | | int64_t inverted_index_downgrade_count = 0; |
378 | | |
379 | | int64_t output_index_result_column_timer = 0; |
380 | | // number of segment filtered by column stat when creating seg iterator |
381 | | int64_t filtered_segment_number = 0; |
382 | | // total number of segment |
383 | | int64_t total_segment_number = 0; |
384 | | |
385 | | io::FileCacheStatistics file_cache_stats; |
386 | | int64_t load_segments_timer = 0; |
387 | | |
388 | | int64_t collect_iterator_merge_next_timer = 0; |
389 | | int64_t collect_iterator_normal_next_timer = 0; |
390 | | int64_t delete_bitmap_get_agg_ns = 0; |
391 | | }; |
392 | | |
393 | | using ColumnId = uint32_t; |
394 | | // Column unique id set |
395 | | using UniqueIdSet = std::set<uint32_t>; |
396 | | // Column unique Id -> column id map |
397 | | using UniqueIdToColumnIdMap = std::map<ColumnId, ColumnId>; |
398 | | struct RowsetId; |
399 | | RowsetId next_rowset_id(); |
400 | | |
401 | | // 8 bit rowset id version |
402 | | // 56 bit, inc number from 1 |
403 | | // 128 bit backend uid, it is a uuid bit, id version |
404 | | struct RowsetId { |
405 | | int8_t version = 0; |
406 | | int64_t hi = 0; |
407 | | int64_t mi = 0; |
408 | | int64_t lo = 0; |
409 | | |
410 | 896 | void init(std::string_view rowset_id_str) { |
411 | | // for new rowsetid its a 48 hex string |
412 | | // if the len < 48, then it is an old format rowset id |
413 | 896 | if (rowset_id_str.length() < 48) [[unlikely]] { |
414 | 65 | int64_t high; |
415 | 65 | auto [_, ec] = std::from_chars(rowset_id_str.data(), |
416 | 65 | rowset_id_str.data() + rowset_id_str.length(), high); |
417 | 65 | if (ec != std::errc {}) [[unlikely]] { |
418 | 0 | if (config::force_regenerate_rowsetid_on_start_error) { |
419 | 0 | LOG(WARNING) << "failed to init rowset id: " << rowset_id_str; |
420 | 0 | high = next_rowset_id().hi; |
421 | 0 | } else { |
422 | 0 | LOG(FATAL) << "failed to init rowset id: " << rowset_id_str; |
423 | 0 | } |
424 | 0 | } |
425 | 65 | init(1, high, 0, 0); |
426 | 831 | } else { |
427 | 831 | int64_t high = 0; |
428 | 831 | int64_t middle = 0; |
429 | 831 | int64_t low = 0; |
430 | 831 | from_hex(&high, rowset_id_str.substr(0, 16)); |
431 | 831 | from_hex(&middle, rowset_id_str.substr(16, 16)); |
432 | 831 | from_hex(&low, rowset_id_str.substr(32, 16)); |
433 | 831 | init(high >> 56, high & LOW_56_BITS, middle, low); |
434 | 831 | } |
435 | 896 | } |
436 | | |
437 | | // to compatible with old version |
438 | 1.01k | void init(int64_t rowset_id) { init(1, rowset_id, 0, 0); } |
439 | | |
440 | 8.79k | void init(int64_t id_version, int64_t high, int64_t middle, int64_t low) { |
441 | 8.79k | version = id_version; |
442 | 8.79k | if (UNLIKELY(high >= MAX_ROWSET_ID)) { |
443 | 0 | LOG(FATAL) << "inc rowsetid is too large:" << high; |
444 | 0 | } |
445 | 8.79k | hi = (id_version << 56) + (high & LOW_56_BITS); |
446 | 8.79k | mi = middle; |
447 | 8.79k | lo = low; |
448 | 8.79k | } |
449 | | |
450 | 29.4k | std::string to_string() const { |
451 | 29.4k | if (version < 2) { |
452 | 20.5k | return std::to_string(hi & LOW_56_BITS); |
453 | 20.5k | } else { |
454 | 8.88k | char buf[48]; |
455 | 8.88k | to_hex(hi, buf); |
456 | 8.88k | to_hex(mi, buf + 16); |
457 | 8.88k | to_hex(lo, buf + 32); |
458 | 8.88k | return {buf, 48}; |
459 | 8.88k | } |
460 | 29.4k | } |
461 | | |
462 | | // std::unordered_map need this api |
463 | 4.15k | bool operator==(const RowsetId& rhs) const { |
464 | 4.15k | return hi == rhs.hi && mi == rhs.mi && lo == rhs.lo; |
465 | 4.15k | } |
466 | | |
467 | | bool operator!=(const RowsetId& rhs) const { |
468 | | return hi != rhs.hi || mi != rhs.mi || lo != rhs.lo; |
469 | | } |
470 | | |
471 | 44.6M | bool operator<(const RowsetId& rhs) const { |
472 | 44.6M | if (hi != rhs.hi) { |
473 | 4.54M | return hi < rhs.hi; |
474 | 40.1M | } else if (mi != rhs.mi) { |
475 | 0 | return mi < rhs.mi; |
476 | 40.1M | } else { |
477 | 40.1M | return lo < rhs.lo; |
478 | 40.1M | } |
479 | 44.6M | } |
480 | | |
481 | 848 | friend std::ostream& operator<<(std::ostream& out, const RowsetId& rowset_id) { |
482 | 848 | out << rowset_id.to_string(); |
483 | 848 | return out; |
484 | 848 | } |
485 | | }; |
486 | | |
487 | | using RowsetIdUnorderedSet = std::unordered_set<RowsetId>; |
488 | | |
489 | | // Extract rowset id from filename, return uninitialized rowset id if filename is invalid |
490 | 0 | inline RowsetId extract_rowset_id(std::string_view filename) { |
491 | 0 | RowsetId rowset_id; |
492 | 0 | if (filename.ends_with(".dat")) { |
493 | 0 | // filename format: {rowset_id}_{segment_num}.dat |
494 | 0 | auto end = filename.find('_'); |
495 | 0 | if (end == std::string::npos) { |
496 | 0 | return rowset_id; |
497 | 0 | } |
498 | 0 | rowset_id.init(filename.substr(0, end)); |
499 | 0 | return rowset_id; |
500 | 0 | } |
501 | 0 | if (filename.ends_with(".idx")) { |
502 | 0 | // filename format: {rowset_id}_{segment_num}_{index_id}.idx |
503 | 0 | auto end = filename.find('_'); |
504 | 0 | if (end == std::string::npos) { |
505 | 0 | return rowset_id; |
506 | 0 | } |
507 | 0 | rowset_id.init(filename.substr(0, end)); |
508 | 0 | return rowset_id; |
509 | 0 | } |
510 | 0 | return rowset_id; |
511 | 0 | } |
512 | | |
513 | | class DeleteBitmap; |
514 | | // merge on write context |
515 | | struct MowContext { |
516 | | MowContext(int64_t version, int64_t txnid, const RowsetIdUnorderedSet& ids, |
517 | | std::vector<RowsetSharedPtr> rowset_ptrs, std::shared_ptr<DeleteBitmap> db) |
518 | | : max_version(version), |
519 | | txn_id(txnid), |
520 | | rowset_ids(ids), |
521 | | rowset_ptrs(std::move(rowset_ptrs)), |
522 | 8 | delete_bitmap(std::move(db)) {} |
523 | | int64_t max_version; |
524 | | int64_t txn_id; |
525 | | const RowsetIdUnorderedSet& rowset_ids; |
526 | | std::vector<RowsetSharedPtr> rowset_ptrs; |
527 | | std::shared_ptr<DeleteBitmap> delete_bitmap; |
528 | | }; |
529 | | |
530 | | // used for controll compaction |
531 | | struct VersionWithTime { |
532 | | std::atomic<int64_t> version; |
533 | | int64_t update_ts; |
534 | | |
535 | | VersionWithTime() : version(0), update_ts(MonotonicMillis()) {} |
536 | | |
537 | | void update_version_monoto(int64_t new_version) { |
538 | | int64_t cur_version = version.load(std::memory_order_relaxed); |
539 | | while (cur_version < new_version) { |
540 | | if (version.compare_exchange_strong(cur_version, new_version, std::memory_order_relaxed, |
541 | | std::memory_order_relaxed)) { |
542 | | update_ts = MonotonicMillis(); |
543 | | break; |
544 | | } |
545 | | } |
546 | | } |
547 | | }; |
548 | | |
549 | | } // namespace doris |
550 | | |
551 | | // This intended to be a "good" hash function. It may change from time to time. |
552 | | template <> |
553 | | struct std::hash<doris::RowsetId> { |
554 | 1.47k | size_t operator()(const doris::RowsetId& rowset_id) const { |
555 | 1.47k | size_t seed = 0; |
556 | 1.47k | seed = doris::HashUtil::xxHash64WithSeed((const char*)&rowset_id.hi, sizeof(rowset_id.hi), |
557 | 1.47k | seed); |
558 | 1.47k | seed = doris::HashUtil::xxHash64WithSeed((const char*)&rowset_id.mi, sizeof(rowset_id.mi), |
559 | 1.47k | seed); |
560 | 1.47k | seed = doris::HashUtil::xxHash64WithSeed((const char*)&rowset_id.lo, sizeof(rowset_id.lo), |
561 | 1.47k | seed); |
562 | 1.47k | return seed; |
563 | 1.47k | } |
564 | | }; |