/root/doris/be/src/olap/tablet.h
| Line | Count | Source | 
| 1 |  | // Licensed to the Apache Software Foundation (ASF) under one | 
| 2 |  | // or more contributor license agreements.  See the NOTICE file | 
| 3 |  | // distributed with this work for additional information | 
| 4 |  | // regarding copyright ownership.  The ASF licenses this file | 
| 5 |  | // to you under the Apache License, Version 2.0 (the | 
| 6 |  | // "License"); you may not use this file except in compliance | 
| 7 |  | // with the License.  You may obtain a copy of the License at | 
| 8 |  | // | 
| 9 |  | //   http://www.apache.org/licenses/LICENSE-2.0 | 
| 10 |  | // | 
| 11 |  | // Unless required by applicable law or agreed to in writing, | 
| 12 |  | // software distributed under the License is distributed on an | 
| 13 |  | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | 
| 14 |  | // KIND, either express or implied.  See the License for the | 
| 15 |  | // specific language governing permissions and limitations | 
| 16 |  | // under the License. | 
| 17 |  |  | 
| 18 |  | #pragma once | 
| 19 |  |  | 
| 20 |  | #include <butil/macros.h> | 
| 21 |  | #include <glog/logging.h> | 
| 22 |  |  | 
| 23 |  | #include <atomic> | 
| 24 |  | #include <cstddef> | 
| 25 |  | #include <cstdint> | 
| 26 |  | #include <functional> | 
| 27 |  | #include <memory> | 
| 28 |  | #include <mutex> | 
| 29 |  | #include <ostream> | 
| 30 |  | #include <set> | 
| 31 |  | #include <shared_mutex> | 
| 32 |  | #include <string> | 
| 33 |  | #include <string_view> | 
| 34 |  | #include <utility> | 
| 35 |  | #include <vector> | 
| 36 |  |  | 
| 37 |  | #include "common/atomic_shared_ptr.h" | 
| 38 |  | #include "common/config.h" | 
| 39 |  | #include "common/status.h" | 
| 40 |  | #include "olap/base_tablet.h" | 
| 41 |  | #include "olap/binlog_config.h" | 
| 42 |  | #include "olap/data_dir.h" | 
| 43 |  | #include "olap/olap_common.h" | 
| 44 |  | #include "olap/partial_update_info.h" | 
| 45 |  | #include "olap/rowset/rowset.h" | 
| 46 |  | #include "olap/rowset/rowset_meta.h" | 
| 47 |  | #include "olap/rowset/rowset_reader.h" | 
| 48 |  | #include "olap/rowset/segment_v2/segment.h" | 
| 49 |  | #include "olap/version_graph.h" | 
| 50 |  | #include "segment_loader.h" | 
| 51 |  | #include "util/metrics.h" | 
| 52 |  | #include "util/once.h" | 
| 53 |  | #include "util/slice.h" | 
| 54 |  |  | 
| 55 |  | namespace bvar { | 
| 56 |  | template <typename T> | 
| 57 |  | class Adder; | 
| 58 |  | } | 
| 59 |  |  | 
| 60 |  | namespace doris { | 
| 61 |  | #include "common/compile_check_begin.h" | 
| 62 |  |  | 
| 63 |  | class Tablet; | 
| 64 |  | class CumulativeCompactionPolicy; | 
| 65 |  | class CompactionMixin; | 
| 66 |  | class SingleReplicaCompaction; | 
| 67 |  | class RowsetWriter; | 
| 68 |  | struct RowsetWriterContext; | 
| 69 |  | class TTabletInfo; | 
| 70 |  | class TabletMetaPB; | 
| 71 |  | class TupleDescriptor; | 
| 72 |  | class CalcDeleteBitmapToken; | 
| 73 |  | enum CompressKind : int; | 
| 74 |  | class RowsetBinlogMetasPB; | 
| 75 |  |  | 
| 76 |  | namespace io { | 
| 77 |  | class RemoteFileSystem; | 
| 78 |  | } // namespace io | 
| 79 |  | namespace vectorized { | 
| 80 |  | class Block; | 
| 81 |  | } // namespace vectorized | 
| 82 |  | struct RowLocation; | 
| 83 |  | enum KeysType : int; | 
| 84 |  | enum SortType : int; | 
| 85 |  |  | 
| 86 |  | enum TabletStorageType { STORAGE_TYPE_LOCAL, STORAGE_TYPE_REMOTE, STORAGE_TYPE_REMOTE_AND_LOCAL }; | 
| 87 |  |  | 
| 88 |  | extern bvar::Adder<uint64_t> unused_remote_rowset_num; | 
| 89 |  |  | 
| 90 |  | static inline constexpr auto TRACE_TABLET_LOCK_THRESHOLD = std::chrono::seconds(1); | 
| 91 |  |  | 
| 92 |  | struct WriteCooldownMetaExecutors { | 
| 93 |  |     WriteCooldownMetaExecutors(size_t executor_nums = 5); | 
| 94 |  |  | 
| 95 |  |     void stop(); | 
| 96 |  |  | 
| 97 |  |     void submit(TabletSharedPtr tablet); | 
| 98 | 284k |     size_t _get_executor_pos(int64_t tablet_id) const { | 
| 99 | 284k |         return std::hash<int64_t>()(tablet_id) % _executor_nums; | 
| 100 | 284k |     }; | 
| 101 |  |     // Each executor is a mpsc to ensure uploads of the same tablet meta are not concurrent | 
| 102 |  |     // FIXME(AlexYue): Use mpsc instead of `ThreadPool` with 1 thread | 
| 103 |  |     // We use PriorityThreadPool since it would call status inside it's `shutdown` function. | 
| 104 |  |     // Consider one situation where the StackTraceCache's singleton is detructed before | 
| 105 |  |     // this WriteCooldownMetaExecutors's singleton, then invoking the status would also call | 
| 106 |  |     // StackTraceCache which would then result in heap use after free like #23834 | 
| 107 |  |     std::vector<std::unique_ptr<PriorityThreadPool>> _executors; | 
| 108 |  |     std::unordered_set<int64_t> _pending_tablets; | 
| 109 |  |     std::mutex _latch; | 
| 110 |  |     size_t _executor_nums; | 
| 111 |  | }; | 
| 112 |  |  | 
| 113 |  | class Tablet final : public BaseTablet { | 
| 114 |  | public: | 
| 115 |  |     Tablet(StorageEngine& engine, TabletMetaSharedPtr tablet_meta, DataDir* data_dir, | 
| 116 |  |            const std::string_view& cumulative_compaction_type = ""); | 
| 117 |  |  | 
| 118 | 37.6M |     DataDir* data_dir() const { return _data_dir; } | 
| 119 | 1.47M |     int64_t replica_id() const { return _tablet_meta->replica_id(); } | 
| 120 |  |  | 
| 121 | 30.9k |     std::string tablet_path() const override { return _tablet_path; } | 
| 122 |  |  | 
| 123 |  |     bool set_tablet_schema_into_rowset_meta(); | 
| 124 |  |     Status init(); | 
| 125 |  |     bool init_succeeded(); | 
| 126 |  |  | 
| 127 |  |     bool is_used(); | 
| 128 |  |  | 
| 129 |  |     void register_tablet_into_dir(); | 
| 130 |  |     void deregister_tablet_from_dir(); | 
| 131 |  |  | 
| 132 |  |     void save_meta(); | 
| 133 |  |     // Used in clone task, to update local meta when finishing a clone job | 
| 134 |  |     Status revise_tablet_meta(const std::vector<RowsetSharedPtr>& to_add, | 
| 135 |  |                               const std::vector<RowsetSharedPtr>& to_delete, | 
| 136 |  |                               bool is_incremental_clone); | 
| 137 |  |  | 
| 138 |  |     int64_t cumulative_layer_point() const; | 
| 139 |  |     void set_cumulative_layer_point(int64_t new_point); | 
| 140 |  |     inline int64_t cumulative_promotion_size() const; | 
| 141 |  |     inline void set_cumulative_promotion_size(int64_t new_size); | 
| 142 |  |  | 
| 143 |  |     // Disk space occupied by tablet, contain local and remote. | 
| 144 |  |     size_t tablet_footprint() override; | 
| 145 |  |     // Local disk space occupied by tablet. | 
| 146 |  |     size_t tablet_local_size(); | 
| 147 |  |     // Remote disk space occupied by tablet. | 
| 148 |  |     size_t tablet_remote_size(); | 
| 149 |  |  | 
| 150 |  |     size_t num_rows(); | 
| 151 |  |     size_t version_count() const; | 
| 152 |  |     size_t stale_version_count() const; | 
| 153 |  |     bool exceed_version_limit(int32_t limit) override; | 
| 154 |  |     uint64_t segment_count() const; | 
| 155 |  |     Version max_version() const; | 
| 156 |  |     CumulativeCompactionPolicy* cumulative_compaction_policy(); | 
| 157 |  |  | 
| 158 |  |     // properties encapsulated in TabletSchema | 
| 159 |  |     SortType sort_type() const; | 
| 160 |  |     size_t sort_col_num() const; | 
| 161 |  |     size_t num_columns() const; | 
| 162 |  |     size_t num_null_columns() const; | 
| 163 |  |     size_t num_short_key_columns() const; | 
| 164 |  |     size_t num_rows_per_row_block() const; | 
| 165 |  |     double bloom_filter_fpp() const; | 
| 166 |  |     size_t next_unique_id() const; | 
| 167 |  |     int64_t avg_rs_meta_serialize_size() const; | 
| 168 |  |  | 
| 169 |  |     // operation in rowsets | 
| 170 |  |     Status add_rowset(RowsetSharedPtr rowset); | 
| 171 |  |     Status create_initial_rowset(const int64_t version); | 
| 172 |  |  | 
| 173 |  |     // MUST hold EXCLUSIVE `_meta_lock`. | 
| 174 |  |     Status modify_rowsets(std::vector<RowsetSharedPtr>& to_add, | 
| 175 |  |                           std::vector<RowsetSharedPtr>& to_delete, bool check_delete = false); | 
| 176 |  |     bool rowset_exists_unlocked(const RowsetSharedPtr& rowset); | 
| 177 |  |  | 
| 178 |  |     Status add_inc_rowset(const RowsetSharedPtr& rowset); | 
| 179 |  |     /// Delete stale rowset by timing. This delete policy uses now() minutes | 
| 180 |  |     /// config::tablet_rowset_expired_stale_sweep_time_sec to compute the deadline of expired rowset | 
| 181 |  |     /// to delete.  When rowset is deleted, it will be added to StorageEngine unused map and record | 
| 182 |  |     /// need to delete flag. | 
| 183 |  |     void delete_expired_stale_rowset(); | 
| 184 |  |  | 
| 185 |  |     // if quiet is true, no error log will be printed if there are missing versions | 
| 186 |  |     Status check_version_integrity(const Version& version, bool quiet = false); | 
| 187 |  |     bool check_version_exist(const Version& version) const; | 
| 188 |  |     void acquire_version_and_rowsets( | 
| 189 |  |             std::vector<std::pair<Version, RowsetSharedPtr>>* version_rowsets) const; | 
| 190 |  |  | 
| 191 |  |     // If skip_missing_version is true, skip versions if they are missing. | 
| 192 |  |     Status capture_rs_readers(const Version& spec_version, std::vector<RowSetSplits>* rs_splits, | 
| 193 |  |                               const CaptureRowsetOps& opts) override; | 
| 194 |  |  | 
| 195 |  |     // Find the missed versions until the spec_version. | 
| 196 |  |     // | 
| 197 |  |     // for example: | 
| 198 |  |     //     [0-4][5-5][8-8][9-9][14-14] | 
| 199 |  |     // if spec_version = 12, it will return [6, 6], [7, 7], [10, 10], [11, 11], [12, 12] | 
| 200 |  |     Versions calc_missed_versions(int64_t spec_version, Versions existing_versions) const override; | 
| 201 |  |  | 
| 202 |  |     // meta lock | 
| 203 | 316k |     std::shared_mutex& get_header_lock() { return _meta_lock; } | 
| 204 | 920 |     std::mutex& get_rowset_update_lock() { return _rowset_update_lock; } | 
| 205 | 876 |     std::mutex& get_push_lock() { return _ingest_lock; } | 
| 206 | 7.23M |     std::mutex& get_base_compaction_lock() { return _base_compaction_lock; } | 
| 207 | 9.86M |     std::mutex& get_cumulative_compaction_lock() { return _cumulative_compaction_lock; } | 
| 208 | 1 |     std::shared_mutex& get_meta_store_lock() { return _meta_store_lock; } | 
| 209 |  |  | 
| 210 | 1.74k |     std::shared_timed_mutex& get_migration_lock() { return _migration_lock; } | 
| 211 |  |  | 
| 212 | 19 |     std::mutex& get_build_inverted_index_lock() { return _build_inverted_index_lock; } | 
| 213 |  |  | 
| 214 |  |     // operation for compaction | 
| 215 |  |     bool can_do_compaction(size_t path_hash, CompactionType compaction_type); | 
| 216 |  |     bool suitable_for_compaction( | 
| 217 |  |             CompactionType compaction_type, | 
| 218 |  |             std::shared_ptr<CumulativeCompactionPolicy> cumulative_compaction_policy); | 
| 219 |  |  | 
| 220 |  |     uint32_t calc_compaction_score(); | 
| 221 |  |  | 
| 222 |  |     // This function to find max continuous version from the beginning. | 
| 223 |  |     // For example: If there are 1, 2, 3, 5, 6, 7 versions belongs tablet, then 3 is target. | 
| 224 |  |     // 3 will be saved in "version", and 7 will be saved in "max_version", if max_version != nullptr | 
| 225 |  |     void max_continuous_version_from_beginning(Version* version, Version* max_version = nullptr); | 
| 226 |  |  | 
| 227 | 0 |     void set_bad(bool is_bad) { _is_bad = is_bad; } | 
| 228 |  |  | 
| 229 | 17.2M |     int64_t last_cumu_compaction_failure_time() { return _last_cumu_compaction_failure_millis; } | 
| 230 | 41.6k |     void set_last_cumu_compaction_failure_time(int64_t millis) { | 
| 231 | 41.6k |         _last_cumu_compaction_failure_millis = millis; | 
| 232 | 41.6k |     } | 
| 233 |  |  | 
| 234 | 7.22M |     int64_t last_base_compaction_failure_time() { return _last_base_compaction_failure_millis; } | 
| 235 | 19.1k |     void set_last_base_compaction_failure_time(int64_t millis) { | 
| 236 | 19.1k |         _last_base_compaction_failure_millis = millis; | 
| 237 | 19.1k |     } | 
| 238 |  |  | 
| 239 | 0 |     int64_t last_full_compaction_failure_time() { return _last_full_compaction_failure_millis; } | 
| 240 | 0 |     void set_last_full_compaction_failure_time(int64_t millis) { | 
| 241 | 0 |         _last_full_compaction_failure_millis = millis; | 
| 242 | 0 |     } | 
| 243 |  |  | 
| 244 | 38.2k |     int64_t last_cumu_compaction_success_time() { return _last_cumu_compaction_success_millis; } | 
| 245 | 1.58k |     void set_last_cumu_compaction_success_time(int64_t millis) { | 
| 246 | 1.58k |         _last_cumu_compaction_success_millis = millis; | 
| 247 | 1.58k |     } | 
| 248 |  |  | 
| 249 | 38.2k |     int64_t last_base_compaction_success_time() { return _last_base_compaction_success_millis; } | 
| 250 | 1.50k |     void set_last_base_compaction_success_time(int64_t millis) { | 
| 251 | 1.50k |         _last_base_compaction_success_millis = millis; | 
| 252 | 1.50k |     } | 
| 253 |  |  | 
| 254 | 0 |     int64_t last_full_compaction_success_time() { return _last_full_compaction_success_millis; } | 
| 255 | 0 |     void set_last_full_compaction_success_time(int64_t millis) { | 
| 256 | 0 |         _last_full_compaction_success_millis = millis; | 
| 257 | 0 |     } | 
| 258 |  |  | 
| 259 | 0 |     int64_t last_cumu_compaction_schedule_time() { return _last_cumu_compaction_schedule_millis; } | 
| 260 | 41.7k |     void set_last_cumu_compaction_schedule_time(int64_t millis) { | 
| 261 | 41.7k |         _last_cumu_compaction_schedule_millis = millis; | 
| 262 | 41.7k |     } | 
| 263 |  |  | 
| 264 | 0 |     int64_t last_base_compaction_schedule_time() { return _last_base_compaction_schedule_millis; } | 
| 265 | 9.57k |     void set_last_base_compaction_schedule_time(int64_t millis) { | 
| 266 | 9.57k |         _last_base_compaction_schedule_millis = millis; | 
| 267 | 9.57k |     } | 
| 268 |  |  | 
| 269 | 0 |     int64_t last_full_compaction_schedule_time() { return _last_full_compaction_schedule_millis; } | 
| 270 | 0 |     void set_last_full_compaction_schedule_time(int64_t millis) { | 
| 271 | 0 |         _last_full_compaction_schedule_millis = millis; | 
| 272 | 0 |     } | 
| 273 |  |  | 
| 274 | 0 |     void set_last_single_compaction_failure_status(std::string status) { | 
| 275 | 0 |         _last_single_compaction_failure_status = std::move(status); | 
| 276 | 0 |     } | 
| 277 |  |  | 
| 278 | 0 |     void set_last_fetched_version(Version version) { _last_fetched_version = std::move(version); } | 
| 279 |  |  | 
| 280 |  |     void delete_all_files(); | 
| 281 |  |  | 
| 282 |  |     void check_tablet_path_exists(); | 
| 283 |  |  | 
| 284 |  |     std::vector<RowsetSharedPtr> pick_candidate_rowsets_to_cumulative_compaction(); | 
| 285 |  |     std::vector<RowsetSharedPtr> pick_candidate_rowsets_to_base_compaction(); | 
| 286 |  |     std::vector<RowsetSharedPtr> pick_candidate_rowsets_to_full_compaction(); | 
| 287 |  |     std::vector<RowsetSharedPtr> pick_candidate_rowsets_to_build_inverted_index( | 
| 288 |  |             const std::set<int64_t>& alter_index_uids, bool is_drop_op); | 
| 289 |  |  | 
| 290 |  |     // used for single compaction to get the local versions | 
| 291 |  |     // Single compaction does not require remote rowsets and cannot violate the cooldown semantics | 
| 292 |  |     std::vector<Version> get_all_local_versions(); | 
| 293 |  |  | 
| 294 |  |     void calculate_cumulative_point(); | 
| 295 |  |     // TODO(ygl): | 
| 296 | 0 |     bool is_primary_replica() { return false; } | 
| 297 |  |  | 
| 298 |  |     // return true if the checkpoint is actually done | 
| 299 |  |     bool do_tablet_meta_checkpoint(); | 
| 300 |  |  | 
| 301 |  |     // Check whether the rowset is useful or not, unuseful rowset can be swept up then. | 
| 302 |  |     // Rowset which is under tablet's management is useful, i.e. rowset is in | 
| 303 |  |     // _rs_version_map, or _stale_rs_version_map. | 
| 304 |  |     // Rowset whose version range is not covered by this tablet is also useful. | 
| 305 |  |     bool rowset_meta_is_useful(RowsetMetaSharedPtr rowset_meta); | 
| 306 |  |  | 
| 307 |  |     void build_tablet_report_info(TTabletInfo* tablet_info, | 
| 308 |  |                                   bool enable_consecutive_missing_check = false, | 
| 309 |  |                                   bool enable_path_check = false); | 
| 310 |  |  | 
| 311 |  |     // return a json string to show the compaction status of this tablet | 
| 312 |  |     void get_compaction_status(std::string* json_result); | 
| 313 |  |  | 
| 314 |  |     static Status prepare_compaction_and_calculate_permits( | 
| 315 |  |             CompactionType compaction_type, const TabletSharedPtr& tablet, | 
| 316 |  |             std::shared_ptr<CompactionMixin>& compaction, int64_t& permits); | 
| 317 |  |  | 
| 318 |  |     void execute_compaction(CompactionMixin& compaction); | 
| 319 |  |     void execute_single_replica_compaction(SingleReplicaCompaction& compaction); | 
| 320 |  |  | 
| 321 |  |     void set_cumulative_compaction_policy( | 
| 322 | 0 |             std::shared_ptr<CumulativeCompactionPolicy> cumulative_compaction_policy) { | 
| 323 | 0 |         _cumulative_compaction_policy = cumulative_compaction_policy; | 
| 324 | 0 |     } | 
| 325 |  |  | 
| 326 | 0 |     std::shared_ptr<CumulativeCompactionPolicy> get_cumulative_compaction_policy() { | 
| 327 | 0 |         return _cumulative_compaction_policy; | 
| 328 | 0 |     } | 
| 329 |  |  | 
| 330 | 41.7k |     void set_last_cumu_compaction_status(std::string status) { | 
| 331 | 41.7k |         _last_cumu_compaction_status = std::move(status); | 
| 332 | 41.7k |     } | 
| 333 |  |  | 
| 334 | 0 |     std::string get_last_cumu_compaction_status() { return _last_cumu_compaction_status; } | 
| 335 |  |  | 
| 336 | 19.1k |     void set_last_base_compaction_status(std::string status) { | 
| 337 | 19.1k |         _last_base_compaction_status = std::move(status); | 
| 338 | 19.1k |     } | 
| 339 |  |  | 
| 340 | 0 |     std::string get_last_base_compaction_status() { return _last_base_compaction_status; } | 
| 341 |  |  | 
| 342 | 0 |     void set_last_full_compaction_status(std::string status) { | 
| 343 | 0 |         _last_full_compaction_status = std::move(status); | 
| 344 | 0 |     } | 
| 345 |  |  | 
| 346 | 0 |     std::string get_last_full_compaction_status() { return _last_full_compaction_status; } | 
| 347 |  |  | 
| 348 |  |     std::tuple<int64_t, int64_t> get_visible_version_and_time() const; | 
| 349 |  |  | 
| 350 | 499k |     void set_visible_version(const std::shared_ptr<const VersionWithTime>& visible_version) { | 
| 351 | 499k |         _visible_version.store(visible_version); | 
| 352 | 499k |     } | 
| 353 |  |  | 
| 354 |  |     bool should_fetch_from_peer(); | 
| 355 |  |  | 
| 356 | 0 |     inline bool all_beta() const { | 
| 357 | 0 |         std::shared_lock rdlock(_meta_lock); | 
| 358 | 0 |         return _tablet_meta->all_beta(); | 
| 359 | 0 |     } | 
| 360 |  |  | 
| 361 | 0 |     const TabletSchemaSPtr& tablet_schema_unlocked() const { return _max_version_schema; } | 
| 362 |  |  | 
| 363 |  |     Result<std::unique_ptr<RowsetWriter>> create_rowset_writer(RowsetWriterContext& context, | 
| 364 |  |                                                                bool vertical) override; | 
| 365 |  |  | 
| 366 |  |     Result<std::unique_ptr<RowsetWriter>> create_transient_rowset_writer( | 
| 367 |  |             const Rowset& rowset, std::shared_ptr<PartialUpdateInfo> partial_update_info, | 
| 368 |  |             int64_t txn_expiration = 0) override; | 
| 369 |  |     Result<std::unique_ptr<RowsetWriter>> create_transient_rowset_writer( | 
| 370 |  |             RowsetWriterContext& context, const RowsetId& rowset_id); | 
| 371 |  |  | 
| 372 |  |     Status create_rowset(const RowsetMetaSharedPtr& rowset_meta, RowsetSharedPtr* rowset); | 
| 373 |  |  | 
| 374 |  |     // MUST hold EXCLUSIVE `_meta_lock` | 
| 375 |  |     void add_rowsets(const std::vector<RowsetSharedPtr>& to_add); | 
| 376 |  |     // MUST hold EXCLUSIVE `_meta_lock` | 
| 377 |  |     Status delete_rowsets(const std::vector<RowsetSharedPtr>& to_delete, bool move_to_stale); | 
| 378 |  |  | 
| 379 |  |     // MUST hold SHARED `_meta_lock` | 
| 380 | 19.4k |     const auto& rowset_map() const { return _rs_version_map; } | 
| 381 |  |     // MUST hold SHARED `_meta_lock` | 
| 382 | 19.4k |     const auto& stale_rowset_map() const { return _stale_rs_version_map; } | 
| 383 |  |  | 
| 384 |  |     //////////////////////////////////////////////////////////////////////////// | 
| 385 |  |     // begin cooldown functions | 
| 386 |  |     //////////////////////////////////////////////////////////////////////////// | 
| 387 | 956k |     int64_t storage_policy_id() const { return _tablet_meta->storage_policy_id(); } | 
| 388 |  |     void set_storage_policy_id(int64_t id) { _tablet_meta->set_storage_policy_id(id); } | 
| 389 |  |  | 
| 390 | 4.82M |     int64_t last_failed_follow_cooldown_time() const { return _last_failed_follow_cooldown_time; } | 
| 391 |  |  | 
| 392 |  |     // Cooldown to remote fs. | 
| 393 |  |     Status cooldown(RowsetSharedPtr rowset = nullptr); | 
| 394 |  |  | 
| 395 |  |     RowsetSharedPtr pick_cooldown_rowset(); | 
| 396 |  |  | 
| 397 |  |     RowsetSharedPtr need_cooldown(int64_t* cooldown_timestamp, size_t* file_size); | 
| 398 |  |  | 
| 399 |  |     struct CooldownConf { | 
| 400 |  |         int64_t term = -1; | 
| 401 |  |         int64_t cooldown_replica_id = -1; | 
| 402 |  |     }; | 
| 403 |  |  | 
| 404 | 14 |     CooldownConf cooldown_conf() const { | 
| 405 | 14 |         std::shared_lock rlock(_cooldown_conf_lock); | 
| 406 | 14 |         return _cooldown_conf; | 
| 407 | 14 |     } | 
| 408 |  |  | 
| 409 | 42 |     CooldownConf cooldown_conf_unlocked() const { return _cooldown_conf; } | 
| 410 |  |  | 
| 411 |  |     // Return `true` if update success | 
| 412 |  |     bool update_cooldown_conf(int64_t cooldown_term, int64_t cooldown_replica_id); | 
| 413 |  |  | 
| 414 |  |     Status remove_all_remote_rowsets(); | 
| 415 |  |  | 
| 416 |  |     void record_unused_remote_rowset(const RowsetId& rowset_id, const std::string& resource, | 
| 417 |  |                                      int64_t num_segments); | 
| 418 |  |  | 
| 419 |  |     uint32_t calc_cold_data_compaction_score() const; | 
| 420 |  |  | 
| 421 | 19 |     std::mutex& get_cold_compaction_lock() { return _cold_compaction_lock; } | 
| 422 |  |  | 
| 423 | 0 |     std::shared_mutex& get_cooldown_conf_lock() { return _cooldown_conf_lock; } | 
| 424 |  |  | 
| 425 |  |     static void async_write_cooldown_meta(TabletSharedPtr tablet); | 
| 426 |  |     // Return `ABORTED` if should not to retry again | 
| 427 |  |     Status write_cooldown_meta(); | 
| 428 |  |     //////////////////////////////////////////////////////////////////////////// | 
| 429 |  |     // end cooldown functions | 
| 430 |  |     //////////////////////////////////////////////////////////////////////////// | 
| 431 |  |  | 
| 432 |  |     CalcDeleteBitmapExecutor* calc_delete_bitmap_executor() override; | 
| 433 |  |     Status save_delete_bitmap(const TabletTxnInfo* txn_info, int64_t txn_id, | 
| 434 |  |                               DeleteBitmapPtr delete_bitmap, RowsetWriter* rowset_writer, | 
| 435 |  |                               const RowsetIdUnorderedSet& cur_rowset_ids, int64_t lock_id = -1, | 
| 436 |  |                               int64_t next_visible_version = -1) override; | 
| 437 |  |  | 
| 438 |  |     void merge_delete_bitmap(const DeleteBitmap& delete_bitmap); | 
| 439 |  |     bool check_all_rowset_segment(); | 
| 440 |  |  | 
| 441 |  |     void update_max_version_schema(const TabletSchemaSPtr& tablet_schema); | 
| 442 |  |  | 
| 443 |  |     void set_skip_compaction(bool skip, | 
| 444 |  |                              CompactionType compaction_type = CompactionType::CUMULATIVE_COMPACTION, | 
| 445 |  |                              int64_t start = -1); | 
| 446 |  |     bool should_skip_compaction(CompactionType compaction_type, int64_t now); | 
| 447 |  |  | 
| 448 |  |     std::vector<std::string> get_binlog_filepath(std::string_view binlog_version) const; | 
| 449 |  |     std::pair<std::string, int64_t> get_binlog_info(std::string_view binlog_version) const; | 
| 450 |  |     std::string get_rowset_binlog_meta(std::string_view binlog_version, | 
| 451 |  |                                        std::string_view rowset_id) const; | 
| 452 |  |     Status get_rowset_binlog_metas(const std::vector<int64_t>& binlog_versions, | 
| 453 |  |                                    RowsetBinlogMetasPB* metas_pb); | 
| 454 |  |     Status get_rowset_binlog_metas(Version binlog_versions, RowsetBinlogMetasPB* metas_pb); | 
| 455 |  |     std::string get_segment_filepath(std::string_view rowset_id, | 
| 456 |  |                                      std::string_view segment_index) const; | 
| 457 |  |     std::string get_segment_filepath(std::string_view rowset_id, int64_t segment_index) const; | 
| 458 |  |     bool can_add_binlog(uint64_t total_binlog_size) const; | 
| 459 |  |     void gc_binlogs(int64_t version); | 
| 460 |  |     Status ingest_binlog_metas(RowsetBinlogMetasPB* metas_pb); | 
| 461 |  |  | 
| 462 |  |     void report_error(const Status& st); | 
| 463 |  |  | 
| 464 | 0 |     inline int64_t get_io_error_times() const { return _io_error_times; } | 
| 465 |  |  | 
| 466 | 1.45M |     inline bool is_io_error_too_times() const { | 
| 467 | 1.45M |         return config::max_tablet_io_errors > 0 && _io_error_times >= config::max_tablet_io_errors; | 
| 468 | 1.45M |     } | 
| 469 |  |  | 
| 470 | 844 |     int64_t get_table_id() { return _tablet_meta->table_id(); } | 
| 471 |  |  | 
| 472 |  |     // binlog related functions | 
| 473 |  |     bool is_enable_binlog(); | 
| 474 | 0 |     bool is_binlog_enabled() { return _tablet_meta->binlog_config().is_enable(); } | 
| 475 | 0 |     int64_t binlog_ttl_ms() const { return _tablet_meta->binlog_config().ttl_seconds(); } | 
| 476 | 0 |     int64_t binlog_max_bytes() const { return _tablet_meta->binlog_config().max_bytes(); } | 
| 477 |  |  | 
| 478 |  |     void set_binlog_config(BinlogConfig binlog_config); | 
| 479 |  |  | 
| 480 | 0 |     void set_is_full_compaction_running(bool is_full_compaction_running) { | 
| 481 | 0 |         _is_full_compaction_running = is_full_compaction_running; | 
| 482 | 0 |     } | 
| 483 | 0 |     inline bool is_full_compaction_running() const { return _is_full_compaction_running; } | 
| 484 |  |     void clear_cache() override; | 
| 485 |  |  | 
| 486 |  |     int32_t get_compaction_score() const { return _compaction_score; } | 
| 487 |  |  | 
| 488 | 0 |     void set_compaction_score(int32_t compaction_score) { _compaction_score = compaction_score; } | 
| 489 |  |  | 
| 490 | 24.0k |     void add_compaction_score(int32_t score) { | 
| 491 | 24.0k |         if (_compaction_score < 0) { | 
| 492 | 23.1k |             return; | 
| 493 | 23.1k |         } | 
| 494 | 983 |         _compaction_score += score; | 
| 495 | 983 |     } | 
| 496 |  |  | 
| 497 | 0 |     void minus_compaction_score(int32_t score) { | 
| 498 | 0 |         if (_compaction_score < 0) { | 
| 499 | 0 |             return; | 
| 500 | 0 |         } | 
| 501 | 0 |         _compaction_score -= score; | 
| 502 | 0 |     } | 
| 503 |  |  | 
| 504 |  |     Status prepare_txn(TPartitionId partition_id, TTransactionId transaction_id, | 
| 505 |  |                        const PUniqueId& load_id, bool ingest); | 
| 506 |  |     // TODO: commit_txn | 
| 507 |  |  | 
| 508 |  | private: | 
| 509 |  |     Status _init_once_action(); | 
| 510 |  |     bool _contains_rowset(const RowsetId rowset_id); | 
| 511 |  |     Status _contains_version(const Version& version); | 
| 512 |  |  | 
| 513 |  |     // Returns: | 
| 514 |  |     // version: the max continuous version from beginning | 
| 515 |  |     // max_version: the max version of this tablet | 
| 516 |  |     void _max_continuous_version_from_beginning_unlocked(Version* version, Version* max_version, | 
| 517 |  |                                                          bool* has_version_cross) const; | 
| 518 |  |     RowsetSharedPtr _rowset_with_largest_size(); | 
| 519 |  |     /// Delete stale rowset by version. This method not only delete the version in expired rowset map, | 
| 520 |  |     /// but also delete the version in rowset meta vector. | 
| 521 |  |     void _delete_stale_rowset_by_version(const Version& version); | 
| 522 |  |  | 
| 523 |  |     uint32_t _calc_cumulative_compaction_score( | 
| 524 |  |             std::shared_ptr<CumulativeCompactionPolicy> cumulative_compaction_policy); | 
| 525 |  |     uint32_t _calc_base_compaction_score() const; | 
| 526 |  |  | 
| 527 |  |     std::vector<RowsetSharedPtr> _pick_visible_rowsets_to_compaction(int64_t min_start_version, | 
| 528 |  |                                                                      int64_t max_start_version); | 
| 529 |  |  | 
| 530 |  |     void _init_context_common_fields(RowsetWriterContext& context); | 
| 531 |  |  | 
| 532 |  |     //////////////////////////////////////////////////////////////////////////// | 
| 533 |  |     // begin cooldown functions | 
| 534 |  |     //////////////////////////////////////////////////////////////////////////// | 
| 535 |  |     Status _cooldown_data(RowsetSharedPtr rowset); | 
| 536 |  |     Status _follow_cooldowned_data(); | 
| 537 |  |     Status _read_cooldown_meta(const StorageResource& storage_resource, | 
| 538 |  |                                TabletMetaPB* tablet_meta_pb); | 
| 539 |  |     bool _has_data_to_cooldown(); | 
| 540 |  |     int64_t _get_newest_cooldown_time(const RowsetSharedPtr& rowset); | 
| 541 |  |     //////////////////////////////////////////////////////////////////////////// | 
| 542 |  |     // end cooldown functions | 
| 543 |  |     //////////////////////////////////////////////////////////////////////////// | 
| 544 |  |  | 
| 545 |  |     void _clear_cache_by_rowset(const BetaRowsetSharedPtr& rowset); | 
| 546 |  |     void check_table_size_correctness(); | 
| 547 |  |     std::string get_segment_path(const RowsetMetaSharedPtr& rs_meta, int64_t seg_id); | 
| 548 |  |     int64_t get_segment_file_size(const RowsetMetaSharedPtr& rs_meta); | 
| 549 |  |     int64_t get_inverted_index_file_size(const RowsetMetaSharedPtr& rs_meta); | 
| 550 |  |  | 
| 551 |  | public: | 
| 552 |  |     static const int64_t K_INVALID_CUMULATIVE_POINT = -1; | 
| 553 |  |  | 
| 554 |  | private: | 
| 555 |  |     StorageEngine& _engine; | 
| 556 |  |     DataDir* _data_dir = nullptr; | 
| 557 |  |  | 
| 558 |  |     std::string _tablet_path; | 
| 559 |  |  | 
| 560 |  |     DorisCallOnce<Status> _init_once; | 
| 561 |  |     // meta store lock is used for prevent 2 threads do checkpoint concurrently | 
| 562 |  |     // it will be used in econ-mode in the future | 
| 563 |  |     std::shared_mutex _meta_store_lock; | 
| 564 |  |     std::mutex _ingest_lock; | 
| 565 |  |     std::mutex _base_compaction_lock; | 
| 566 |  |     std::mutex _cumulative_compaction_lock; | 
| 567 |  |     std::shared_timed_mutex _migration_lock; | 
| 568 |  |     std::mutex _build_inverted_index_lock; | 
| 569 |  |  | 
| 570 |  |     // In unique key table with MoW, we should guarantee that only one | 
| 571 |  |     // writer can update rowset and delete bitmap at the same time. | 
| 572 |  |     // We use a separate lock rather than _meta_lock, to avoid blocking read queries | 
| 573 |  |     // during publish_txn, which might take hundreds of milliseconds | 
| 574 |  |     mutable std::mutex _rowset_update_lock; | 
| 575 |  |  | 
| 576 |  |     // if this tablet is broken, set to true. default is false | 
| 577 |  |     std::atomic<bool> _is_bad; | 
| 578 |  |     // timestamp of last cumu compaction failure | 
| 579 |  |     std::atomic<int64_t> _last_cumu_compaction_failure_millis; | 
| 580 |  |     // timestamp of last base compaction failure | 
| 581 |  |     std::atomic<int64_t> _last_base_compaction_failure_millis; | 
| 582 |  |     // timestamp of last full compaction failure | 
| 583 |  |     std::atomic<int64_t> _last_full_compaction_failure_millis; | 
| 584 |  |     // timestamp of last cumu compaction success | 
| 585 |  |     std::atomic<int64_t> _last_cumu_compaction_success_millis; | 
| 586 |  |     // timestamp of last base compaction success | 
| 587 |  |     std::atomic<int64_t> _last_base_compaction_success_millis; | 
| 588 |  |     // timestamp of last full compaction success | 
| 589 |  |     std::atomic<int64_t> _last_full_compaction_success_millis; | 
| 590 |  |     // timestamp of last cumu compaction schedule time | 
| 591 |  |     std::atomic<int64_t> _last_cumu_compaction_schedule_millis; | 
| 592 |  |     // timestamp of last base compaction schedule time | 
| 593 |  |     std::atomic<int64_t> _last_base_compaction_schedule_millis; | 
| 594 |  |     // timestamp of last full compaction schedule time | 
| 595 |  |     std::atomic<int64_t> _last_full_compaction_schedule_millis; | 
| 596 |  |     std::atomic<int64_t> _cumulative_point; | 
| 597 |  |     std::atomic<int64_t> _cumulative_promotion_size; | 
| 598 |  |     std::atomic<int32_t> _newly_created_rowset_num; | 
| 599 |  |     std::atomic<int64_t> _last_checkpoint_time; | 
| 600 |  |     std::string _last_cumu_compaction_status; | 
| 601 |  |     std::string _last_base_compaction_status; | 
| 602 |  |     std::string _last_full_compaction_status; | 
| 603 |  |  | 
| 604 |  |     // single replica compaction status | 
| 605 |  |     std::string _last_single_compaction_failure_status; | 
| 606 |  |     Version _last_fetched_version; | 
| 607 |  |  | 
| 608 |  |     // cumulative compaction policy | 
| 609 |  |     std::shared_ptr<CumulativeCompactionPolicy> _cumulative_compaction_policy; | 
| 610 |  |     std::string_view _cumulative_compaction_type; | 
| 611 |  |  | 
| 612 |  |     // use a separate thread to check all tablets paths existence | 
| 613 |  |     std::atomic<bool> _is_tablet_path_exists; | 
| 614 |  |  | 
| 615 |  |     int64_t _last_missed_version; | 
| 616 |  |     int64_t _last_missed_time_s; | 
| 617 |  |  | 
| 618 |  |     bool _skip_cumu_compaction = false; | 
| 619 |  |     int64_t _skip_cumu_compaction_ts; | 
| 620 |  |  | 
| 621 |  |     bool _skip_base_compaction = false; | 
| 622 |  |     int64_t _skip_base_compaction_ts; | 
| 623 |  |  | 
| 624 |  |     // cooldown related | 
| 625 |  |     CooldownConf _cooldown_conf; | 
| 626 |  |     // `_cooldown_conf_lock` is used to serialize update cooldown conf and all operations that: | 
| 627 |  |     // 1. read cooldown conf | 
| 628 |  |     // 2. upload rowsets to remote storage | 
| 629 |  |     // 3. update cooldown meta id | 
| 630 |  |     mutable std::shared_mutex _cooldown_conf_lock; | 
| 631 |  |     // `_cold_compaction_lock` is used to serialize cold data compaction and all operations that | 
| 632 |  |     // may delete compaction input rowsets. | 
| 633 |  |     std::mutex _cold_compaction_lock; | 
| 634 |  |     int64_t _last_failed_follow_cooldown_time = 0; | 
| 635 |  |  | 
| 636 |  |     int64_t _io_error_times = 0; | 
| 637 |  |  | 
| 638 |  |     // partition's visible version. it sync from fe, but not real-time. | 
| 639 |  |     atomic_shared_ptr<const VersionWithTime> _visible_version; | 
| 640 |  |  | 
| 641 |  |     std::atomic_bool _is_full_compaction_running = false; | 
| 642 |  |  | 
| 643 |  |     int32_t _compaction_score = -1; | 
| 644 |  |     int32_t _score_check_cnt = 0; | 
| 645 |  | }; | 
| 646 |  |  | 
| 647 | 38.7k | inline CumulativeCompactionPolicy* Tablet::cumulative_compaction_policy() { | 
| 648 | 38.7k |     return _cumulative_compaction_policy.get(); | 
| 649 | 38.7k | } | 
| 650 |  |  | 
| 651 | 17.7M | inline bool Tablet::init_succeeded() { | 
| 652 | 17.7M |     return _init_once.has_called() && _init_once.stored_result().ok(); | 
| 653 | 17.7M | } | 
| 654 |  |  | 
| 655 | 21.1M | inline bool Tablet::is_used() { | 
| 656 | 21.1M |     return !_is_bad && _data_dir->is_used(); | 
| 657 | 21.1M | } | 
| 658 |  |  | 
| 659 | 488k | inline void Tablet::register_tablet_into_dir() { | 
| 660 | 488k |     _data_dir->register_tablet(this); | 
| 661 | 488k | } | 
| 662 |  |  | 
| 663 | 11.5k | inline void Tablet::deregister_tablet_from_dir() { | 
| 664 | 11.5k |     _data_dir->deregister_tablet(this); | 
| 665 | 11.5k | } | 
| 666 |  |  | 
| 667 | 526k | inline int64_t Tablet::cumulative_layer_point() const { | 
| 668 | 526k |     return _cumulative_point; | 
| 669 | 526k | } | 
| 670 |  |  | 
| 671 | 2.29k | inline void Tablet::set_cumulative_layer_point(int64_t new_point) { | 
| 672 |  |     // cumulative point should only be reset to -1, or be increased | 
| 673 | 2.29k |     CHECK(new_point == Tablet::K_INVALID_CUMULATIVE_POINT || new_point >= _cumulative_point) | 
| 674 | 0 |             << "Unexpected cumulative point: " << new_point | 
| 675 | 0 |             << ", origin: " << _cumulative_point.load(); | 
| 676 | 2.29k |     _cumulative_point = new_point; | 
| 677 | 2.29k | } | 
| 678 |  |  | 
| 679 | 38.5k | inline int64_t Tablet::cumulative_promotion_size() const { | 
| 680 | 38.5k |     return _cumulative_promotion_size; | 
| 681 | 38.5k | } | 
| 682 |  |  | 
| 683 | 67.9k | inline void Tablet::set_cumulative_promotion_size(int64_t new_size) { | 
| 684 | 67.9k |     _cumulative_promotion_size = new_size; | 
| 685 | 67.9k | } | 
| 686 |  |  | 
| 687 |  | // TODO(lingbin): Why other methods that need to get information from _tablet_meta | 
| 688 |  | // are not locked, here needs a comment to explain. | 
| 689 | 2.59k | inline size_t Tablet::tablet_footprint() { | 
| 690 | 2.59k |     std::shared_lock rdlock(_meta_lock); | 
| 691 | 2.59k |     return _tablet_meta->tablet_footprint(); | 
| 692 | 2.59k | } | 
| 693 |  |  | 
| 694 | 4.33M | inline size_t Tablet::tablet_local_size() { | 
| 695 | 4.33M |     std::shared_lock rdlock(_meta_lock); | 
| 696 | 4.33M |     return _tablet_meta->tablet_local_size(); | 
| 697 | 4.33M | } | 
| 698 |  |  | 
| 699 | 4.33M | inline size_t Tablet::tablet_remote_size() { | 
| 700 | 4.33M |     std::shared_lock rdlock(_meta_lock); | 
| 701 | 4.33M |     return _tablet_meta->tablet_remote_size(); | 
| 702 | 4.33M | } | 
| 703 |  |  | 
| 704 |  | // TODO(lingbin): Why other methods which need to get information from _tablet_meta | 
| 705 |  | // are not locked, here needs a comment to explain. | 
| 706 |  | inline size_t Tablet::num_rows() { | 
| 707 |  |     std::shared_lock rdlock(_meta_lock); | 
| 708 |  |     return _tablet_meta->num_rows(); | 
| 709 |  | } | 
| 710 |  |  | 
| 711 | 5.79M | inline size_t Tablet::version_count() const { | 
| 712 | 5.79M |     std::shared_lock rdlock(_meta_lock); | 
| 713 | 5.79M |     return _tablet_meta->version_count(); | 
| 714 | 5.79M | } | 
| 715 |  |  | 
| 716 | 872 | inline size_t Tablet::stale_version_count() const { | 
| 717 | 872 |     std::shared_lock rdlock(_meta_lock); | 
| 718 | 872 |     return _tablet_meta->stale_version_count(); | 
| 719 | 872 | } | 
| 720 |  |  | 
| 721 | 526k | inline Version Tablet::max_version() const { | 
| 722 | 526k |     std::shared_lock rdlock(_meta_lock); | 
| 723 | 526k |     return _tablet_meta->max_version(); | 
| 724 | 526k | } | 
| 725 |  |  | 
| 726 | 5.79M | inline uint64_t Tablet::segment_count() const { | 
| 727 | 5.79M |     std::shared_lock rdlock(_meta_lock); | 
| 728 | 5.79M |     uint64_t segment_nums = 0; | 
| 729 | 10.7M |     for (const auto& [_, rs_meta] : _tablet_meta->all_rs_metas()) { | 
| 730 | 10.7M |         segment_nums += rs_meta->num_segments(); | 
| 731 | 10.7M |     } | 
| 732 | 5.79M |     return segment_nums; | 
| 733 | 5.79M | } | 
| 734 |  |  | 
| 735 | 0 | inline SortType Tablet::sort_type() const { | 
| 736 | 0 |     return _tablet_meta->tablet_schema()->sort_type(); | 
| 737 | 0 | } | 
| 738 |  |  | 
| 739 | 0 | inline size_t Tablet::sort_col_num() const { | 
| 740 | 0 |     return _tablet_meta->tablet_schema()->sort_col_num(); | 
| 741 | 0 | } | 
| 742 |  |  | 
| 743 | 0 | inline size_t Tablet::num_columns() const { | 
| 744 | 0 |     return _tablet_meta->tablet_schema()->num_columns(); | 
| 745 | 0 | } | 
| 746 |  |  | 
| 747 | 0 | inline size_t Tablet::num_null_columns() const { | 
| 748 | 0 |     return _tablet_meta->tablet_schema()->num_null_columns(); | 
| 749 | 0 | } | 
| 750 |  |  | 
| 751 | 0 | inline size_t Tablet::num_short_key_columns() const { | 
| 752 | 0 |     return _tablet_meta->tablet_schema()->num_short_key_columns(); | 
| 753 | 0 | } | 
| 754 |  |  | 
| 755 | 0 | inline size_t Tablet::num_rows_per_row_block() const { | 
| 756 | 0 |     return _tablet_meta->tablet_schema()->num_rows_per_row_block(); | 
| 757 | 0 | } | 
| 758 |  |  | 
| 759 | 0 | inline double Tablet::bloom_filter_fpp() const { | 
| 760 | 0 |     return _tablet_meta->tablet_schema()->bloom_filter_fpp(); | 
| 761 | 0 | } | 
| 762 |  |  | 
| 763 | 0 | inline size_t Tablet::next_unique_id() const { | 
| 764 | 0 |     return _tablet_meta->tablet_schema()->next_column_unique_id(); | 
| 765 | 0 | } | 
| 766 |  |  | 
| 767 | 872 | inline int64_t Tablet::avg_rs_meta_serialize_size() const { | 
| 768 | 872 |     return _tablet_meta->avg_rs_meta_serialize_size(); | 
| 769 | 872 | } | 
| 770 |  |  | 
| 771 |  | #include "common/compile_check_end.h" | 
| 772 |  | } // namespace doris |