/root/doris/be/src/cloud/cloud_tablet.cpp
| Line | Count | Source | 
| 1 |  | // Licensed to the Apache Software Foundation (ASF) under one | 
| 2 |  | // or more contributor license agreements.  See the NOTICE file | 
| 3 |  | // distributed with this work for additional information | 
| 4 |  | // regarding copyright ownership.  The ASF licenses this file | 
| 5 |  | // to you under the Apache License, Version 2.0 (the | 
| 6 |  | // "License"); you may not use this file except in compliance | 
| 7 |  | // with the License.  You may obtain a copy of the License at | 
| 8 |  | // | 
| 9 |  | //   http://www.apache.org/licenses/LICENSE-2.0 | 
| 10 |  | // | 
| 11 |  | // Unless required by applicable law or agreed to in writing, | 
| 12 |  | // software distributed under the License is distributed on an | 
| 13 |  | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | 
| 14 |  | // KIND, either express or implied.  See the License for the | 
| 15 |  | // specific language governing permissions and limitations | 
| 16 |  | // under the License. | 
| 17 |  |  | 
| 18 |  | #include "cloud/cloud_tablet.h" | 
| 19 |  |  | 
| 20 |  | #include <bvar/bvar.h> | 
| 21 |  | #include <bvar/latency_recorder.h> | 
| 22 |  | #include <gen_cpp/Types_types.h> | 
| 23 |  | #include <gen_cpp/olap_file.pb.h> | 
| 24 |  | #include <rapidjson/document.h> | 
| 25 |  | #include <rapidjson/encodings.h> | 
| 26 |  | #include <rapidjson/prettywriter.h> | 
| 27 |  | #include <rapidjson/rapidjson.h> | 
| 28 |  | #include <rapidjson/stringbuffer.h> | 
| 29 |  |  | 
| 30 |  | #include <atomic> | 
| 31 |  | #include <chrono> | 
| 32 |  | #include <cstdint> | 
| 33 |  | #include <memory> | 
| 34 |  | #include <ranges> | 
| 35 |  | #include <ratio> | 
| 36 |  | #include <shared_mutex> | 
| 37 |  | #include <unordered_map> | 
| 38 |  | #include <vector> | 
| 39 |  |  | 
| 40 |  | #include "cloud/cloud_meta_mgr.h" | 
| 41 |  | #include "cloud/cloud_storage_engine.h" | 
| 42 |  | #include "cloud/cloud_tablet_mgr.h" | 
| 43 |  | #include "cloud/cloud_warm_up_manager.h" | 
| 44 |  | #include "common/cast_set.h" | 
| 45 |  | #include "common/config.h" | 
| 46 |  | #include "common/logging.h" | 
| 47 |  | #include "cpp/sync_point.h" | 
| 48 |  | #include "io/cache/block_file_cache_downloader.h" | 
| 49 |  | #include "io/cache/block_file_cache_factory.h" | 
| 50 |  | #include "olap/base_tablet.h" | 
| 51 |  | #include "olap/compaction.h" | 
| 52 |  | #include "olap/cumulative_compaction_time_series_policy.h" | 
| 53 |  | #include "olap/olap_define.h" | 
| 54 |  | #include "olap/rowset/beta_rowset.h" | 
| 55 |  | #include "olap/rowset/rowset.h" | 
| 56 |  | #include "olap/rowset/rowset_factory.h" | 
| 57 |  | #include "olap/rowset/rowset_fwd.h" | 
| 58 |  | #include "olap/rowset/rowset_writer.h" | 
| 59 |  | #include "olap/rowset/segment_v2/inverted_index_desc.h" | 
| 60 |  | #include "olap/storage_policy.h" | 
| 61 |  | #include "olap/tablet_schema.h" | 
| 62 |  | #include "olap/txn_manager.h" | 
| 63 |  | #include "util/debug_points.h" | 
| 64 |  | #include "vec/common/schema_util.h" | 
| 65 |  |  | 
| 66 |  | namespace doris { | 
| 67 |  | #include "common/compile_check_begin.h" | 
| 68 |  | using namespace ErrorCode; | 
| 69 |  |  | 
| 70 |  | bvar::LatencyRecorder g_cu_compaction_get_delete_bitmap_lock_time_ms( | 
| 71 |  |         "cu_compaction_get_delete_bitmap_lock_time_ms"); | 
| 72 |  | bvar::LatencyRecorder g_base_compaction_get_delete_bitmap_lock_time_ms( | 
| 73 |  |         "base_compaction_get_delete_bitmap_lock_time_ms"); | 
| 74 |  |  | 
| 75 |  | bvar::Adder<int64_t> g_unused_rowsets_count("unused_rowsets_count"); | 
| 76 |  | bvar::Adder<int64_t> g_unused_rowsets_bytes("unused_rowsets_bytes"); | 
| 77 |  |  | 
| 78 |  | bvar::Adder<int64_t> g_capture_prefer_cache_count("capture_prefer_cache_count"); | 
| 79 |  | bvar::Adder<int64_t> g_capture_with_freshness_tolerance_count( | 
| 80 |  |         "capture_with_freshness_tolerance_count"); | 
| 81 |  | bvar::Adder<int64_t> g_capture_with_freshness_tolerance_fallback_count( | 
| 82 |  |         "capture_with_freshness_tolerance_fallback_count"); | 
| 83 |  | bvar::Window<bvar::Adder<int64_t>> g_capture_prefer_cache_count_window( | 
| 84 |  |         "capture_prefer_cache_count_window", &g_capture_prefer_cache_count, 30); | 
| 85 |  | bvar::Window<bvar::Adder<int64_t>> g_capture_with_freshness_tolerance_count_window( | 
| 86 |  |         "capture_with_freshness_tolerance_count_window", &g_capture_with_freshness_tolerance_count, | 
| 87 |  |         30); | 
| 88 |  | bvar::Window<bvar::Adder<int64_t>> g_capture_with_freshness_tolerance_fallback_count_window( | 
| 89 |  |         "capture_with_freshness_tolerance_fallback_count_window", | 
| 90 |  |         &g_capture_with_freshness_tolerance_fallback_count, 30); | 
| 91 |  |  | 
| 92 |  | static constexpr int LOAD_INITIATOR_ID = -1; | 
| 93 |  |  | 
| 94 |  | bvar::Adder<uint64_t> g_file_cache_cloud_tablet_submitted_segment_size( | 
| 95 |  |         "file_cache_cloud_tablet_submitted_segment_size"); | 
| 96 |  | bvar::Adder<uint64_t> g_file_cache_cloud_tablet_submitted_segment_num( | 
| 97 |  |         "file_cache_cloud_tablet_submitted_segment_num"); | 
| 98 |  | bvar::Adder<uint64_t> g_file_cache_cloud_tablet_submitted_index_size( | 
| 99 |  |         "file_cache_cloud_tablet_submitted_index_size"); | 
| 100 |  | bvar::Adder<uint64_t> g_file_cache_cloud_tablet_submitted_index_num( | 
| 101 |  |         "file_cache_cloud_tablet_submitted_index_num"); | 
| 102 |  | bvar::Adder<uint64_t> g_file_cache_cloud_tablet_finished_segment_size( | 
| 103 |  |         "file_cache_cloud_tablet_finished_segment_size"); | 
| 104 |  | bvar::Adder<uint64_t> g_file_cache_cloud_tablet_finished_segment_num( | 
| 105 |  |         "file_cache_cloud_tablet_finished_segment_num"); | 
| 106 |  | bvar::Adder<uint64_t> g_file_cache_cloud_tablet_finished_index_size( | 
| 107 |  |         "file_cache_cloud_tablet_finished_index_size"); | 
| 108 |  | bvar::Adder<uint64_t> g_file_cache_cloud_tablet_finished_index_num( | 
| 109 |  |         "file_cache_cloud_tablet_finished_index_num"); | 
| 110 |  |  | 
| 111 |  | bvar::Adder<uint64_t> g_file_cache_recycle_cached_data_segment_num( | 
| 112 |  |         "file_cache_recycle_cached_data_segment_num"); | 
| 113 |  | bvar::Adder<uint64_t> g_file_cache_recycle_cached_data_segment_size( | 
| 114 |  |         "file_cache_recycle_cached_data_segment_size"); | 
| 115 |  | bvar::Adder<uint64_t> g_file_cache_recycle_cached_data_index_num( | 
| 116 |  |         "file_cache_recycle_cached_data_index_num"); | 
| 117 |  |  | 
| 118 |  | bvar::Adder<uint64_t> g_file_cache_warm_up_segment_complete_num( | 
| 119 |  |         "file_cache_warm_up_segment_complete_num"); | 
| 120 |  | bvar::Adder<uint64_t> g_file_cache_warm_up_segment_failed_num( | 
| 121 |  |         "file_cache_warm_up_segment_failed_num"); | 
| 122 |  | bvar::Adder<uint64_t> g_file_cache_warm_up_inverted_idx_complete_num( | 
| 123 |  |         "file_cache_warm_up_inverted_idx_complete_num"); | 
| 124 |  | bvar::Adder<uint64_t> g_file_cache_warm_up_inverted_idx_failed_num( | 
| 125 |  |         "file_cache_warm_up_inverted_idx_failed_num"); | 
| 126 |  | bvar::Adder<uint64_t> g_file_cache_warm_up_rowset_complete_num( | 
| 127 |  |         "file_cache_warm_up_rowset_complete_num"); | 
| 128 |  | bvar::Adder<uint64_t> g_file_cache_warm_up_rowset_triggered_by_job_num( | 
| 129 |  |         "file_cache_warm_up_rowset_triggered_by_job_num"); | 
| 130 |  | bvar::Adder<uint64_t> g_file_cache_warm_up_rowset_triggered_by_sync_rowset_num( | 
| 131 |  |         "file_cache_warm_up_rowset_triggered_by_sync_rowset_num"); | 
| 132 |  | bvar::Adder<uint64_t> g_file_cache_warm_up_rowset_triggered_by_event_driven_num( | 
| 133 |  |         "file_cache_warm_up_rowset_triggered_by_event_driven_num"); | 
| 134 |  | bvar::LatencyRecorder g_file_cache_warm_up_rowset_all_segments_latency( | 
| 135 |  |         "file_cache_warm_up_rowset_all_segments_latency"); | 
| 136 |  |  | 
| 137 |  | CloudTablet::CloudTablet(CloudStorageEngine& engine, TabletMetaSharedPtr tablet_meta) | 
| 138 | 86 |         : BaseTablet(std::move(tablet_meta)), _engine(engine) {} | 
| 139 |  |  | 
| 140 | 86 | CloudTablet::~CloudTablet() = default; | 
| 141 |  |  | 
| 142 | 0 | bool CloudTablet::exceed_version_limit(int32_t limit) { | 
| 143 | 0 |     return _approximate_num_rowsets.load(std::memory_order_relaxed) > limit; | 
| 144 | 0 | } | 
| 145 |  |  | 
| 146 | 19 | std::string CloudTablet::tablet_path() const { | 
| 147 | 19 |     return ""; | 
| 148 | 19 | } | 
| 149 |  |  | 
| 150 |  | Status CloudTablet::capture_rs_readers(const Version& spec_version, | 
| 151 |  |                                        std::vector<RowSetSplits>* rs_splits, | 
| 152 | 0 |                                        const CaptureRowsetOps& opts) { | 
| 153 | 0 |     DBUG_EXECUTE_IF("CloudTablet.capture_rs_readers.return.e-230", { | 
| 154 | 0 |         LOG_WARNING("CloudTablet.capture_rs_readers.return e-230").tag("tablet_id", tablet_id()); | 
| 155 | 0 |         return Status::Error<false>(-230, "injected error"); | 
| 156 | 0 |     }); | 
| 157 | 0 |     std::shared_lock rlock(_meta_lock); | 
| 158 | 0 |     *rs_splits = DORIS_TRY(capture_rs_readers_unlocked( | 
| 159 | 0 |             spec_version, CaptureRowsetOps {.skip_missing_versions = opts.skip_missing_versions})); | 
| 160 | 0 |     return Status::OK(); | 
| 161 | 0 | } | 
| 162 |  |  | 
| 163 |  | [[nodiscard]] Result<std::vector<Version>> CloudTablet::capture_consistent_versions_unlocked( | 
| 164 | 33 |         const Version& version_range, const CaptureRowsetOps& options) const { | 
| 165 | 33 |     if (options.query_freshness_tolerance_ms > 0) { | 
| 166 | 20 |         return capture_versions_with_freshness_tolerance(version_range, options); | 
| 167 | 20 |     } else if (options.enable_prefer_cached_rowset && !enable_unique_key_merge_on_write()) { | 
| 168 | 13 |         return capture_versions_prefer_cache(version_range); | 
| 169 | 13 |     } | 
| 170 | 0 |     return BaseTablet::capture_consistent_versions_unlocked(version_range, options); | 
| 171 | 33 | } | 
| 172 |  |  | 
| 173 |  | Result<std::vector<Version>> CloudTablet::capture_versions_prefer_cache( | 
| 174 | 13 |         const Version& spec_version) const { | 
| 175 | 13 |     g_capture_prefer_cache_count << 1; | 
| 176 | 13 |     Versions version_path; | 
| 177 | 13 |     std::shared_lock rlock(_meta_lock); | 
| 178 | 13 |     auto st = _timestamped_version_tracker.capture_consistent_versions_prefer_cache( | 
| 179 | 13 |             spec_version, version_path, | 
| 180 | 92 |             [&](int64_t start, int64_t end) { return rowset_is_warmed_up_unlocked(start, end); }); | 
| 181 | 13 |     if (!st.ok()) { | 
| 182 | 0 |         return ResultError(st); | 
| 183 | 0 |     } | 
| 184 | 13 |     int64_t path_max_version = version_path.back().second; | 
| 185 | 13 |     VLOG_DEBUG << fmt::format( | 
| 186 | 0 |             "[verbose] CloudTablet::capture_versions_prefer_cache, capture path: {}, " | 
| 187 | 0 |             "tablet_id={}, spec_version={}, path_max_version={}", | 
| 188 | 0 |             fmt::join(version_path | std::views::transform([](const auto& version) { | 
| 189 | 0 |                           return fmt::format("{}", version.to_string()); | 
| 190 | 0 |                       }), | 
| 191 | 0 |                       ", "), | 
| 192 | 0 |             tablet_id(), spec_version.to_string(), path_max_version); | 
| 193 | 13 |     return version_path; | 
| 194 | 13 | } | 
| 195 |  |  | 
| 196 | 210 | bool CloudTablet::rowset_is_warmed_up_unlocked(int64_t start_version, int64_t end_version) const { | 
| 197 | 210 |     if (start_version > end_version) { | 
| 198 | 0 |         return false; | 
| 199 | 0 |     } | 
| 200 | 210 |     Version version {start_version, end_version}; | 
| 201 | 210 |     auto it = _rs_version_map.find(version); | 
| 202 | 210 |     if (it == _rs_version_map.end()) { | 
| 203 | 78 |         it = _stale_rs_version_map.find(version); | 
| 204 | 78 |         if (it == _stale_rs_version_map.end()) { | 
| 205 | 0 |             LOG_WARNING( | 
| 206 | 0 |                     "fail to find Rowset in rs_version or stale_rs_version for version. " | 
| 207 | 0 |                     "tablet={}, version={}", | 
| 208 | 0 |                     tablet_id(), version.to_string()); | 
| 209 | 0 |             return false; | 
| 210 | 0 |         } | 
| 211 | 78 |     } | 
| 212 | 210 |     const auto& rs = it->second; | 
| 213 | 210 |     if (rs->visible_timestamp() < _engine.startup_timepoint()) { | 
| 214 |  |         // We only care about rowsets that are created after startup time point. For other rowsets, | 
| 215 |  |         // we assume they are warmed up. | 
| 216 | 10 |         return true; | 
| 217 | 10 |     } | 
| 218 | 200 |     return is_rowset_warmed_up(rs->rowset_id()); | 
| 219 | 210 | }; | 
| 220 |  |  | 
| 221 |  | Result<std::vector<Version>> CloudTablet::capture_versions_with_freshness_tolerance( | 
| 222 | 20 |         const Version& spec_version, const CaptureRowsetOps& options) const { | 
| 223 | 20 |     g_capture_with_freshness_tolerance_count << 1; | 
| 224 | 20 |     using namespace std::chrono; | 
| 225 | 20 |     auto query_freshness_tolerance_ms = options.query_freshness_tolerance_ms; | 
| 226 | 20 |     auto freshness_limit_tp = system_clock::now() - milliseconds(query_freshness_tolerance_ms); | 
| 227 |  |     // find a version path where every edge(rowset) has been warmuped | 
| 228 | 20 |     Versions version_path; | 
| 229 | 20 |     std::shared_lock rlock(_meta_lock); | 
| 230 | 20 |     if (enable_unique_key_merge_on_write()) { | 
| 231 |  |         // For merge-on-write table, newly generated delete bitmap marks will be on the rowsets which are in newest layout. | 
| 232 |  |         // So we can ony capture rowsets which are in newest data layout. Otherwise there may be data correctness issue. | 
| 233 | 11 |         RETURN_IF_ERROR_RESULT( | 
| 234 | 11 |                 _timestamped_version_tracker.capture_consistent_versions_with_validator_mow( | 
| 235 | 11 |                         spec_version, version_path, [&](int64_t start, int64_t end) { | 
| 236 | 11 |                             return rowset_is_warmed_up_unlocked(start, end); | 
| 237 | 11 |                         })); | 
| 238 | 11 |     } else { | 
| 239 | 9 |         RETURN_IF_ERROR_RESULT( | 
| 240 | 9 |                 _timestamped_version_tracker.capture_consistent_versions_with_validator( | 
| 241 | 9 |                         spec_version, version_path, [&](int64_t start, int64_t end) { | 
| 242 | 9 |                             return rowset_is_warmed_up_unlocked(start, end); | 
| 243 | 9 |                         })); | 
| 244 | 9 |     } | 
| 245 | 20 |     int64_t path_max_version = version_path.back().second; | 
| 246 | 335 |     auto should_be_visible_but_not_warmed_up = [&](const auto& rs_meta) -> bool { | 
| 247 | 335 |         if (rs_meta->version() == Version {0, 1}) { | 
| 248 |  |             // skip rowset[0-1] | 
| 249 | 18 |             return false; | 
| 250 | 18 |         } | 
| 251 | 317 |         bool ret = rs_meta->start_version() > path_max_version && | 
| 252 | 317 |                    rs_meta->visible_timestamp() < freshness_limit_tp; | 
| 253 | 317 |         if (ret && config::read_cluster_cache_opt_verbose_log) { | 
| 254 | 5 |             std::time_t t1 = system_clock::to_time_t(rs_meta->visible_timestamp()); | 
| 255 | 5 |             std::tm tm1 = *std::localtime(&t1); | 
| 256 | 5 |             std::ostringstream oss1; | 
| 257 | 5 |             oss1 << std::put_time(&tm1, "%Y-%m-%d %H:%M:%S"); | 
| 258 |  |  | 
| 259 | 5 |             std::time_t t2 = system_clock::to_time_t(freshness_limit_tp); | 
| 260 | 5 |             std::tm tm2 = *std::localtime(&t2); | 
| 261 | 5 |             std::ostringstream oss2; | 
| 262 | 5 |             oss2 << std::put_time(&tm2, "%Y-%m-%d %H:%M:%S"); | 
| 263 | 5 |             LOG_INFO( | 
| 264 | 5 |                     "[verbose] CloudTablet::capture_rs_readers_with_freshness_tolerance, " | 
| 265 | 5 |                     "find a rowset which should be visible but not warmed up, tablet_id={}, " | 
| 266 | 5 |                     "path_max_version={}, rowset_id={}, version={}, visible_time={}, " | 
| 267 | 5 |                     "freshness_limit={}, version_graph={}, rowset_warmup_digest={}", | 
| 268 | 5 |                     tablet_id(), path_max_version, rs_meta->rowset_id().to_string(), | 
| 269 | 5 |                     rs_meta->version().to_string(), oss1.str(), oss2.str(), | 
| 270 | 5 |                     _timestamped_version_tracker.debug_string(), rowset_warmup_digest()); | 
| 271 | 5 |         } | 
| 272 | 317 |         return ret; | 
| 273 | 335 |     }; | 
| 274 |  |     // use std::views::concat after C++26 | 
| 275 | 20 |     bool should_fallback = | 
| 276 | 20 |             std::ranges::any_of(std::views::values(_tablet_meta->all_rs_metas()), | 
| 277 | 20 |                                 should_be_visible_but_not_warmed_up) || | 
| 278 | 20 |             std::ranges::any_of(std::views::values(_tablet_meta->all_stale_rs_metas()), | 
| 279 | 18 |                                 should_be_visible_but_not_warmed_up); | 
| 280 | 20 |     if (should_fallback) { | 
| 281 | 5 |         rlock.unlock(); | 
| 282 | 5 |         g_capture_with_freshness_tolerance_fallback_count << 1; | 
| 283 |  |         // if there exists a rowset which satisfies freshness tolerance and its start version is larger than the path max version | 
| 284 |  |         // but has not been warmuped up yet, fallback to capture rowsets as usual | 
| 285 | 5 |         return BaseTablet::capture_consistent_versions_unlocked(spec_version, options); | 
| 286 | 5 |     } | 
| 287 | 15 |     VLOG_DEBUG << fmt::format( | 
| 288 | 0 |             "[verbose] CloudTablet::capture_versions_with_freshness_tolerance, capture path: {}, " | 
| 289 | 0 |             "tablet_id={}, spec_version={}, path_max_version={}", | 
| 290 | 0 |             fmt::join(version_path | std::views::transform([](const auto& version) { | 
| 291 | 0 |                           return fmt::format("{}", version.to_string()); | 
| 292 | 0 |                       }), | 
| 293 | 0 |                       ", "), | 
| 294 | 0 |             tablet_id(), spec_version.to_string(), path_max_version); | 
| 295 | 15 |     return version_path; | 
| 296 | 20 | } | 
| 297 |  |  | 
| 298 |  | // There are only two tablet_states RUNNING and NOT_READY in cloud mode | 
| 299 |  | // This function will erase the tablet from `CloudTabletMgr` when it can't find this tablet in MS. | 
| 300 | 16 | Status CloudTablet::sync_rowsets(const SyncOptions& options, SyncRowsetStats* stats) { | 
| 301 | 16 |     RETURN_IF_ERROR(sync_if_not_running(stats)); | 
| 302 |  |  | 
| 303 | 16 |     if (options.query_version > 0) { | 
| 304 | 0 |         std::shared_lock rlock(_meta_lock); | 
| 305 | 0 |         if (_max_version >= options.query_version) { | 
| 306 | 0 |             return Status::OK(); | 
| 307 | 0 |         } | 
| 308 | 0 |     } | 
| 309 |  |  | 
| 310 |  |     // serially execute sync to reduce unnecessary network overhead | 
| 311 | 16 |     std::unique_lock lock(_sync_meta_lock); | 
| 312 | 16 |     if (options.query_version > 0) { | 
| 313 | 0 |         std::shared_lock rlock(_meta_lock); | 
| 314 | 0 |         if (_max_version >= options.query_version) { | 
| 315 | 0 |             return Status::OK(); | 
| 316 | 0 |         } | 
| 317 | 0 |     } | 
| 318 |  |  | 
| 319 | 16 |     auto st = _engine.meta_mgr().sync_tablet_rowsets_unlocked(this, lock, options, stats); | 
| 320 | 16 |     if (st.is<ErrorCode::NOT_FOUND>()) { | 
| 321 | 0 |         clear_cache(); | 
| 322 | 0 |     } | 
| 323 |  |  | 
| 324 | 16 |     return st; | 
| 325 | 16 | } | 
| 326 |  |  | 
| 327 |  | // Sync tablet meta and all rowset meta if not running. | 
| 328 |  | // This could happen when BE didn't finish schema change job and another BE committed this schema change job. | 
| 329 |  | // It should be a quite rare situation. | 
| 330 | 16 | Status CloudTablet::sync_if_not_running(SyncRowsetStats* stats) { | 
| 331 | 16 |     if (tablet_state() == TABLET_RUNNING) { | 
| 332 | 16 |         return Status::OK(); | 
| 333 | 16 |     } | 
| 334 |  |  | 
| 335 |  |     // Serially execute sync to reduce unnecessary network overhead | 
| 336 | 0 |     std::unique_lock lock(_sync_meta_lock); | 
| 337 |  | 
 | 
| 338 | 0 |     { | 
| 339 | 0 |         std::shared_lock rlock(_meta_lock); | 
| 340 | 0 |         if (tablet_state() == TABLET_RUNNING) { | 
| 341 | 0 |             return Status::OK(); | 
| 342 | 0 |         } | 
| 343 | 0 |     } | 
| 344 |  |  | 
| 345 | 0 |     TabletMetaSharedPtr tablet_meta; | 
| 346 | 0 |     auto st = _engine.meta_mgr().get_tablet_meta(tablet_id(), &tablet_meta); | 
| 347 | 0 |     if (!st.ok()) { | 
| 348 | 0 |         if (st.is<ErrorCode::NOT_FOUND>()) { | 
| 349 | 0 |             clear_cache(); | 
| 350 | 0 |         } | 
| 351 | 0 |         return st; | 
| 352 | 0 |     } | 
| 353 |  |  | 
| 354 | 0 |     if (tablet_meta->tablet_state() != TABLET_RUNNING) [[unlikely]] { | 
| 355 |  |         // MoW may go to here when load while schema change | 
| 356 | 0 |         return Status::OK(); | 
| 357 | 0 |     } | 
| 358 |  |  | 
| 359 | 0 |     TimestampedVersionTracker empty_tracker; | 
| 360 | 0 |     { | 
| 361 | 0 |         std::lock_guard wlock(_meta_lock); | 
| 362 | 0 |         RETURN_IF_ERROR(set_tablet_state(TABLET_RUNNING)); | 
| 363 | 0 |         _rs_version_map.clear(); | 
| 364 | 0 |         _stale_rs_version_map.clear(); | 
| 365 | 0 |         std::swap(_timestamped_version_tracker, empty_tracker); | 
| 366 | 0 |         _tablet_meta->clear_rowsets(); | 
| 367 | 0 |         _tablet_meta->clear_stale_rowset(); | 
| 368 | 0 |         _max_version = -1; | 
| 369 | 0 |     } | 
| 370 |  |  | 
| 371 | 0 |     st = _engine.meta_mgr().sync_tablet_rowsets_unlocked(this, lock, {}, stats); | 
| 372 | 0 |     if (st.is<ErrorCode::NOT_FOUND>()) { | 
| 373 | 0 |         clear_cache(); | 
| 374 | 0 |     } | 
| 375 | 0 |     return st; | 
| 376 | 0 | } | 
| 377 |  |  | 
| 378 |  | void CloudTablet::add_rowsets(std::vector<RowsetSharedPtr> to_add, bool version_overlap, | 
| 379 |  |                               std::unique_lock<std::shared_mutex>& meta_lock, | 
| 380 | 247 |                               bool warmup_delta_data) { | 
| 381 | 247 |     if (to_add.empty()) { | 
| 382 | 0 |         return; | 
| 383 | 0 |     } | 
| 384 |  |  | 
| 385 | 247 |     auto add_rowsets_directly = [=, this](std::vector<RowsetSharedPtr>& rowsets) { | 
| 386 | 732 |         for (auto& rs : rowsets) { | 
| 387 | 732 |             if (version_overlap || warmup_delta_data) { | 
| 388 |  | #ifndef BE_TEST | 
| 389 |  |                 bool warm_up_state_updated = false; | 
| 390 |  |                 // Warmup rowset data in background | 
| 391 |  |                 for (int seg_id = 0; seg_id < rs->num_segments(); ++seg_id) { | 
| 392 |  |                     const auto& rowset_meta = rs->rowset_meta(); | 
| 393 |  |                     constexpr int64_t interval = 600; // 10 mins | 
| 394 |  |                     // When BE restart and receive the `load_sync` rpc, it will sync all historical rowsets first time. | 
| 395 |  |                     // So we need to filter out the old rowsets avoid to download the whole table. | 
| 396 |  |                     if (warmup_delta_data && | 
| 397 |  |                         ::time(nullptr) - rowset_meta->newest_write_timestamp() >= interval) { | 
| 398 |  |                         continue; | 
| 399 |  |                     } | 
| 400 |  |  | 
| 401 |  |                     auto storage_resource = rowset_meta->remote_storage_resource(); | 
| 402 |  |                     if (!storage_resource) { | 
| 403 |  |                         LOG(WARNING) << storage_resource.error(); | 
| 404 |  |                         continue; | 
| 405 |  |                     } | 
| 406 |  |  | 
| 407 |  |                     int64_t expiration_time = | 
| 408 |  |                             _tablet_meta->ttl_seconds() == 0 || | 
| 409 |  |                                             rowset_meta->newest_write_timestamp() <= 0 | 
| 410 |  |                                     ? 0 | 
| 411 |  |                                     : rowset_meta->newest_write_timestamp() + | 
| 412 |  |                                               _tablet_meta->ttl_seconds(); | 
| 413 |  |                     g_file_cache_cloud_tablet_submitted_segment_num << 1; | 
| 414 |  |                     if (rs->rowset_meta()->segment_file_size(seg_id) > 0) { | 
| 415 |  |                         g_file_cache_cloud_tablet_submitted_segment_size | 
| 416 |  |                                 << rs->rowset_meta()->segment_file_size(seg_id); | 
| 417 |  |                     } | 
| 418 |  |                     if (!warm_up_state_updated) { | 
| 419 |  |                         VLOG_DEBUG << "warm up rowset " << rs->version() << "(" << rs->rowset_id() | 
| 420 |  |                                    << ") triggerd by sync rowset"; | 
| 421 |  |                         if (!add_rowset_warmup_state_unlocked(*(rs->rowset_meta()), | 
| 422 |  |                                                               WarmUpTriggerSource::SYNC_ROWSET)) { | 
| 423 |  |                             LOG(INFO) << "found duplicate warmup task for rowset " | 
| 424 |  |                                       << rs->rowset_id() << ", skip it"; | 
| 425 |  |                             break; | 
| 426 |  |                         } | 
| 427 |  |                         warm_up_state_updated = true; | 
| 428 |  |                     } | 
| 429 |  |                     // clang-format off | 
| 430 |  |                     auto self = std::dynamic_pointer_cast<CloudTablet>(shared_from_this()); | 
| 431 |  |                     _engine.file_cache_block_downloader().submit_download_task(io::DownloadFileMeta { | 
| 432 |  |                             .path = storage_resource.value()->remote_segment_path(*rowset_meta, seg_id), | 
| 433 |  |                             .file_size = rs->rowset_meta()->segment_file_size(seg_id), | 
| 434 |  |                             .file_system = storage_resource.value()->fs, | 
| 435 |  |                             .ctx = | 
| 436 |  |                                     { | 
| 437 |  |                                             .expiration_time = expiration_time, | 
| 438 |  |                                             .is_dryrun = config::enable_reader_dryrun_when_download_file_cache, | 
| 439 |  |                                             .is_warmup = true | 
| 440 |  |                                     }, | 
| 441 |  |                             .download_done {[=](Status st) { | 
| 442 |  |                                 DBUG_EXECUTE_IF("CloudTablet::add_rowsets.download_data.callback.block_compaction_rowset", { | 
| 443 |  |                                             if (rs->version().second > rs->version().first) { | 
| 444 |  |                                                 auto sleep_time = dp->param<int>("sleep", 3); | 
| 445 |  |                                                 LOG_INFO( | 
| 446 |  |                                                         "[verbose] block download for rowset={}, " | 
| 447 |  |                                                         "version={}, sleep={}", | 
| 448 |  |                                                         rs->rowset_id().to_string(), | 
| 449 |  |                                                         rs->version().to_string(), sleep_time); | 
| 450 |  |                                                 std::this_thread::sleep_for( | 
| 451 |  |                                                         std::chrono::seconds(sleep_time)); | 
| 452 |  |                                             } | 
| 453 |  |                                 }); | 
| 454 |  |                                 self->complete_rowset_segment_warmup(WarmUpTriggerSource::SYNC_ROWSET, rowset_meta->rowset_id(), st, 1, 0); | 
| 455 |  |                                 if (!st) { | 
| 456 |  |                                     LOG_WARNING("add rowset warm up error ").error(st); | 
| 457 |  |                                 } | 
| 458 |  |                             }}, | 
| 459 |  |                     }); | 
| 460 |  |  | 
| 461 |  |                     auto download_idx_file = [&, self](const io::Path& idx_path, int64_t idx_size) { | 
| 462 |  |                         io::DownloadFileMeta meta { | 
| 463 |  |                                 .path = idx_path, | 
| 464 |  |                                 .file_size = idx_size, | 
| 465 |  |                                 .file_system = storage_resource.value()->fs, | 
| 466 |  |                                 .ctx = | 
| 467 |  |                                         { | 
| 468 |  |                                                 .expiration_time = expiration_time, | 
| 469 |  |                                                 .is_dryrun = config::enable_reader_dryrun_when_download_file_cache, | 
| 470 |  |                                                 .is_warmup = true | 
| 471 |  |                                         }, | 
| 472 |  |                                 .download_done {[=](Status st) { | 
| 473 |  |                                     DBUG_EXECUTE_IF("CloudTablet::add_rowsets.download_idx.callback.block", { | 
| 474 |  |                                                 // clang-format on | 
| 475 |  |                                                 auto sleep_time = dp->param<int>("sleep", 3); | 
| 476 |  |                                                 LOG_INFO( | 
| 477 |  |                                                         "[verbose] block download for " | 
| 478 |  |                                                         "rowset={}, inverted_idx_file={}, " | 
| 479 |  |                                                         "sleep={}", | 
| 480 |  |                                                         rs->rowset_id().to_string(), | 
| 481 |  |                                                         idx_path.string(), sleep_time); | 
| 482 |  |                                                 std::this_thread::sleep_for( | 
| 483 |  |                                                         std::chrono::seconds(sleep_time)); | 
| 484 |  |                                                 // clang-format off | 
| 485 |  |                                     }); | 
| 486 |  |                                     self->complete_rowset_segment_warmup(WarmUpTriggerSource::SYNC_ROWSET, rowset_meta->rowset_id(), st, 0, 1); | 
| 487 |  |                                     if (!st) { | 
| 488 |  |                                         LOG_WARNING("add rowset warm up error ").error(st); | 
| 489 |  |                                     } | 
| 490 |  |                                 }}, | 
| 491 |  |                         }; | 
| 492 |  |                         self->update_rowset_warmup_state_inverted_idx_num_unlocked(WarmUpTriggerSource::SYNC_ROWSET, rowset_meta->rowset_id(), 1); | 
| 493 |  |                         _engine.file_cache_block_downloader().submit_download_task(std::move(meta)); | 
| 494 |  |                         g_file_cache_cloud_tablet_submitted_index_num << 1; | 
| 495 |  |                         g_file_cache_cloud_tablet_submitted_index_size << idx_size; | 
| 496 |  |                     }; | 
| 497 |  |                     // clang-format on | 
| 498 |  |                     auto schema_ptr = rowset_meta->tablet_schema(); | 
| 499 |  |                     auto idx_version = schema_ptr->get_inverted_index_storage_format(); | 
| 500 |  |                     if (idx_version == InvertedIndexStorageFormatPB::V1) { | 
| 501 |  |                         std::unordered_map<int64_t, int64_t> index_size_map; | 
| 502 |  |                         auto&& inverted_index_info = rowset_meta->inverted_index_file_info(seg_id); | 
| 503 |  |                         for (const auto& info : inverted_index_info.index_info()) { | 
| 504 |  |                             if (info.index_file_size() != -1) { | 
| 505 |  |                                 index_size_map[info.index_id()] = info.index_file_size(); | 
| 506 |  |                             } else { | 
| 507 |  |                                 VLOG_DEBUG << "Invalid index_file_size for segment_id " << seg_id | 
| 508 |  |                                            << ", index_id " << info.index_id(); | 
| 509 |  |                             } | 
| 510 |  |                         } | 
| 511 |  |                         for (const auto& index : schema_ptr->inverted_indexes()) { | 
| 512 |  |                             auto idx_path = storage_resource.value()->remote_idx_v1_path( | 
| 513 |  |                                     *rowset_meta, seg_id, index->index_id(), | 
| 514 |  |                                     index->get_index_suffix()); | 
| 515 |  |                             download_idx_file(idx_path, index_size_map[index->index_id()]); | 
| 516 |  |                         } | 
| 517 |  |                     } else { | 
| 518 |  |                         if (schema_ptr->has_inverted_index() || schema_ptr->has_ann_index()) { | 
| 519 |  |                             auto&& inverted_index_info = | 
| 520 |  |                                     rowset_meta->inverted_index_file_info(seg_id); | 
| 521 |  |                             int64_t idx_size = 0; | 
| 522 |  |                             if (inverted_index_info.has_index_size()) { | 
| 523 |  |                                 idx_size = inverted_index_info.index_size(); | 
| 524 |  |                             } else { | 
| 525 |  |                                 VLOG_DEBUG << "index_size is not set for segment " << seg_id; | 
| 526 |  |                             } | 
| 527 |  |                             auto idx_path = storage_resource.value()->remote_idx_v2_path( | 
| 528 |  |                                     *rowset_meta, seg_id); | 
| 529 |  |                             download_idx_file(idx_path, idx_size); | 
| 530 |  |                         } | 
| 531 |  |                     } | 
| 532 |  |                 } | 
| 533 |  | #endif | 
| 534 | 0 |             } | 
| 535 | 732 |             _rs_version_map.emplace(rs->version(), rs); | 
| 536 | 732 |             _timestamped_version_tracker.add_version(rs->version()); | 
| 537 | 732 |             _max_version = std::max(rs->end_version(), _max_version); | 
| 538 | 732 |             update_base_size(*rs); | 
| 539 | 732 |         } | 
| 540 | 247 |         _tablet_meta->add_rowsets_unchecked(rowsets); | 
| 541 | 247 |     }; | 
| 542 |  |  | 
| 543 | 247 |     if (!version_overlap) { | 
| 544 | 247 |         add_rowsets_directly(to_add); | 
| 545 | 247 |         return; | 
| 546 | 247 |     } | 
| 547 |  |  | 
| 548 |  |     // Filter out existed rowsets | 
| 549 | 0 |     auto remove_it = | 
| 550 | 0 |             std::remove_if(to_add.begin(), to_add.end(), [this](const RowsetSharedPtr& rs) { | 
| 551 | 0 |                 if (auto find_it = _rs_version_map.find(rs->version()); | 
| 552 | 0 |                     find_it == _rs_version_map.end()) { | 
| 553 | 0 |                     return false; | 
| 554 | 0 |                 } else if (find_it->second->rowset_id() == rs->rowset_id()) { | 
| 555 | 0 |                     return true; // Same rowset | 
| 556 | 0 |                 } | 
| 557 |  |  | 
| 558 |  |                 // If version of rowset in `to_add` is equal to rowset in tablet but rowset_id is not equal, | 
| 559 |  |                 // replace existed rowset with `to_add` rowset. This may occur when: | 
| 560 |  |                 //  1. schema change converts rowsets which have been double written to new tablet | 
| 561 |  |                 //  2. cumu compaction picks single overlapping input rowset to perform compaction | 
| 562 |  |  | 
| 563 |  |                 // add existed rowset to unused_rowsets to remove delete bitmap and recycle cached data | 
| 564 |  |  | 
| 565 | 0 |                 std::vector<RowsetSharedPtr> unused_rowsets; | 
| 566 | 0 |                 if (auto find_it = _rs_version_map.find(rs->version()); | 
| 567 | 0 |                     find_it != _rs_version_map.end()) { | 
| 568 | 0 |                     if (find_it->second->rowset_id() == rs->rowset_id()) { | 
| 569 | 0 |                         LOG(WARNING) << "tablet_id=" << tablet_id() | 
| 570 | 0 |                                      << ", rowset_id=" << rs->rowset_id().to_string() | 
| 571 | 0 |                                      << ", existed rowset_id=" | 
| 572 | 0 |                                      << find_it->second->rowset_id().to_string(); | 
| 573 | 0 |                         DCHECK(find_it->second->rowset_id() != rs->rowset_id()) | 
| 574 | 0 |                                 << "tablet_id=" << tablet_id() | 
| 575 | 0 |                                 << ", rowset_id=" << rs->rowset_id().to_string() | 
| 576 | 0 |                                 << ", existed rowset_id=" | 
| 577 | 0 |                                 << find_it->second->rowset_id().to_string(); | 
| 578 | 0 |                     } | 
| 579 | 0 |                     unused_rowsets.push_back(find_it->second); | 
| 580 | 0 |                 } | 
| 581 | 0 |                 add_unused_rowsets(unused_rowsets); | 
| 582 |  | 
 | 
| 583 | 0 |                 _tablet_meta->delete_rs_meta_by_version(rs->version(), nullptr); | 
| 584 | 0 |                 _rs_version_map[rs->version()] = rs; | 
| 585 | 0 |                 _tablet_meta->add_rowsets_unchecked({rs}); | 
| 586 | 0 |                 update_base_size(*rs); | 
| 587 | 0 |                 return true; | 
| 588 | 0 |             }); | 
| 589 |  | 
 | 
| 590 | 0 |     to_add.erase(remove_it, to_add.end()); | 
| 591 |  |  | 
| 592 |  |     // delete rowsets with overlapped version | 
| 593 | 0 |     std::vector<RowsetSharedPtr> to_add_directly; | 
| 594 | 0 |     for (auto& to_add_rs : to_add) { | 
| 595 |  |         // delete rowsets with overlapped version | 
| 596 | 0 |         std::vector<RowsetSharedPtr> to_delete; | 
| 597 | 0 |         Version to_add_v = to_add_rs->version(); | 
| 598 |  |         // if start_version  > max_version, we can skip checking overlap here. | 
| 599 | 0 |         if (to_add_v.first > _max_version) { | 
| 600 |  |             // if start_version  > max_version, we can skip checking overlap here. | 
| 601 | 0 |             to_add_directly.push_back(to_add_rs); | 
| 602 | 0 |         } else { | 
| 603 | 0 |             to_add_directly.push_back(to_add_rs); | 
| 604 | 0 |             for (auto& [v, rs] : _rs_version_map) { | 
| 605 | 0 |                 if (to_add_v.contains(v)) { | 
| 606 | 0 |                     to_delete.push_back(rs); | 
| 607 | 0 |                 } | 
| 608 | 0 |             } | 
| 609 | 0 |             delete_rowsets(to_delete, meta_lock); | 
| 610 | 0 |         } | 
| 611 | 0 |     } | 
| 612 |  | 
 | 
| 613 | 0 |     add_rowsets_directly(to_add_directly); | 
| 614 | 0 | } | 
| 615 |  |  | 
| 616 |  | void CloudTablet::delete_rowsets(const std::vector<RowsetSharedPtr>& to_delete, | 
| 617 | 94 |                                  std::unique_lock<std::shared_mutex>&) { | 
| 618 | 94 |     if (to_delete.empty()) { | 
| 619 | 0 |         return; | 
| 620 | 0 |     } | 
| 621 | 94 |     std::vector<RowsetMetaSharedPtr> rs_metas; | 
| 622 | 94 |     rs_metas.reserve(to_delete.size()); | 
| 623 | 94 |     int64_t now = ::time(nullptr); | 
| 624 | 540 |     for (auto&& rs : to_delete) { | 
| 625 | 540 |         rs->rowset_meta()->set_stale_at(now); | 
| 626 | 540 |         rs_metas.push_back(rs->rowset_meta()); | 
| 627 | 540 |         _stale_rs_version_map[rs->version()] = rs; | 
| 628 | 540 |     } | 
| 629 | 94 |     _timestamped_version_tracker.add_stale_path_version(rs_metas); | 
| 630 | 540 |     for (auto&& rs : to_delete) { | 
| 631 | 540 |         _rs_version_map.erase(rs->version()); | 
| 632 | 540 |     } | 
| 633 |  |  | 
| 634 | 94 |     _tablet_meta->modify_rs_metas({}, rs_metas, false); | 
| 635 | 94 | } | 
| 636 |  |  | 
| 637 | 0 | uint64_t CloudTablet::delete_expired_stale_rowsets() { | 
| 638 | 0 |     if (config::enable_mow_verbose_log) { | 
| 639 | 0 |         LOG_INFO("begin delete_expired_stale_rowset for tablet={}", tablet_id()); | 
| 640 | 0 |     } | 
| 641 | 0 |     std::vector<RowsetSharedPtr> expired_rowsets; | 
| 642 |  |     // ATTN: trick, Use stale_rowsets to temporarily increase the reference count of the rowset shared pointer in _stale_rs_version_map so that in the recycle_cached_data function, it checks if the reference count is 2. | 
| 643 | 0 |     std::vector<std::pair<Version, std::vector<RowsetSharedPtr>>> deleted_stale_rowsets; | 
| 644 | 0 |     int64_t expired_stale_sweep_endtime = | 
| 645 | 0 |             ::time(nullptr) - config::tablet_rowset_stale_sweep_time_sec; | 
| 646 | 0 |     { | 
| 647 | 0 |         std::unique_lock wlock(_meta_lock); | 
| 648 |  | 
 | 
| 649 | 0 |         std::vector<int64_t> path_ids; | 
| 650 |  |         // capture the path version to delete | 
| 651 | 0 |         _timestamped_version_tracker.capture_expired_paths(expired_stale_sweep_endtime, &path_ids); | 
| 652 |  | 
 | 
| 653 | 0 |         if (path_ids.empty()) { | 
| 654 | 0 |             return 0; | 
| 655 | 0 |         } | 
| 656 |  |  | 
| 657 | 0 |         for (int64_t path_id : path_ids) { | 
| 658 | 0 |             int64_t start_version = -1; | 
| 659 | 0 |             int64_t end_version = -1; | 
| 660 | 0 |             std::vector<RowsetSharedPtr> stale_rowsets; | 
| 661 |  |             // delete stale versions in version graph | 
| 662 | 0 |             auto version_path = _timestamped_version_tracker.fetch_and_delete_path_by_id(path_id); | 
| 663 | 0 |             for (auto& v_ts : version_path->timestamped_versions()) { | 
| 664 | 0 |                 auto rs_it = _stale_rs_version_map.find(v_ts->version()); | 
| 665 | 0 |                 if (rs_it != _stale_rs_version_map.end()) { | 
| 666 | 0 |                     expired_rowsets.push_back(rs_it->second); | 
| 667 | 0 |                     stale_rowsets.push_back(rs_it->second); | 
| 668 | 0 |                     VLOG_DEBUG << "erase stale rowset, tablet_id=" << tablet_id() | 
| 669 | 0 |                                << " rowset_id=" << rs_it->second->rowset_id().to_string() | 
| 670 | 0 |                                << " version=" << rs_it->first.to_string(); | 
| 671 | 0 |                     _stale_rs_version_map.erase(rs_it); | 
| 672 | 0 |                 } else { | 
| 673 | 0 |                     LOG(WARNING) << "cannot find stale rowset " << v_ts->version() << " in tablet " | 
| 674 | 0 |                                  << tablet_id(); | 
| 675 |  |                     // clang-format off | 
| 676 | 0 |                     DCHECK(false) << [this, &wlock]() { wlock.unlock(); std::string json; get_compaction_status(&json); return json; }(); | 
| 677 |  |                     // clang-format on | 
| 678 | 0 |                 } | 
| 679 | 0 |                 if (start_version < 0) { | 
| 680 | 0 |                     start_version = v_ts->version().first; | 
| 681 | 0 |                 } | 
| 682 | 0 |                 end_version = v_ts->version().second; | 
| 683 | 0 |                 _tablet_meta->delete_stale_rs_meta_by_version(v_ts->version()); | 
| 684 | 0 |             } | 
| 685 | 0 |             Version version(start_version, end_version); | 
| 686 | 0 |             if (!stale_rowsets.empty()) { | 
| 687 | 0 |                 deleted_stale_rowsets.emplace_back(version, std::move(stale_rowsets)); | 
| 688 | 0 |             } | 
| 689 | 0 |         } | 
| 690 | 0 |         _reconstruct_version_tracker_if_necessary(); | 
| 691 | 0 |     } | 
| 692 |  |  | 
| 693 |  |     // if the rowset is not used by any query, we can recycle its cached data early. | 
| 694 | 0 |     auto recycled_rowsets = recycle_cached_data(expired_rowsets); | 
| 695 | 0 |     if (!recycled_rowsets.empty()) { | 
| 696 | 0 |         auto& manager = ExecEnv::GetInstance()->storage_engine().to_cloud().cloud_warm_up_manager(); | 
| 697 | 0 |         manager.recycle_cache(tablet_id(), recycled_rowsets); | 
| 698 | 0 |     } | 
| 699 | 0 |     if (config::enable_mow_verbose_log) { | 
| 700 | 0 |         LOG_INFO("finish delete_expired_stale_rowset for tablet={}", tablet_id()); | 
| 701 | 0 |     } | 
| 702 |  | 
 | 
| 703 | 0 |     add_unused_rowsets(expired_rowsets); | 
| 704 | 0 |     if (config::enable_agg_and_remove_pre_rowsets_delete_bitmap && keys_type() == UNIQUE_KEYS && | 
| 705 | 0 |         enable_unique_key_merge_on_write() && !deleted_stale_rowsets.empty()) { | 
| 706 |  |         // agg delete bitmap for pre rowsets; record unused delete bitmap key ranges | 
| 707 | 0 |         OlapStopWatch watch; | 
| 708 | 0 |         for (const auto& [version, unused_rowsets] : deleted_stale_rowsets) { | 
| 709 |  |             // agg delete bitmap for pre rowset | 
| 710 | 0 |             DeleteBitmapKeyRanges remove_delete_bitmap_key_ranges; | 
| 711 | 0 |             agg_delete_bitmap_for_stale_rowsets(version, remove_delete_bitmap_key_ranges); | 
| 712 |  |             // add remove delete bitmap | 
| 713 | 0 |             if (!remove_delete_bitmap_key_ranges.empty()) { | 
| 714 | 0 |                 std::vector<RowsetId> rowset_ids; | 
| 715 | 0 |                 for (const auto& rs : unused_rowsets) { | 
| 716 | 0 |                     rowset_ids.push_back(rs->rowset_id()); | 
| 717 | 0 |                 } | 
| 718 | 0 |                 std::lock_guard<std::mutex> lock(_gc_mutex); | 
| 719 | 0 |                 _unused_delete_bitmap.push_back( | 
| 720 | 0 |                         std::make_pair(rowset_ids, remove_delete_bitmap_key_ranges)); | 
| 721 | 0 |             } | 
| 722 | 0 |         } | 
| 723 | 0 |         LOG(INFO) << "agg pre rowsets delete bitmap. tablet_id=" << tablet_id() | 
| 724 | 0 |                   << ", size=" << deleted_stale_rowsets.size() | 
| 725 | 0 |                   << ", cost(us)=" << watch.get_elapse_time_us(); | 
| 726 | 0 |     } | 
| 727 | 0 |     return expired_rowsets.size(); | 
| 728 | 0 | } | 
| 729 |  |  | 
| 730 | 0 | bool CloudTablet::need_remove_unused_rowsets() { | 
| 731 | 0 |     std::lock_guard<std::mutex> lock(_gc_mutex); | 
| 732 | 0 |     return !_unused_rowsets.empty() || !_unused_delete_bitmap.empty(); | 
| 733 | 0 | } | 
| 734 |  |  | 
| 735 | 0 | void CloudTablet::add_unused_rowsets(const std::vector<RowsetSharedPtr>& rowsets) { | 
| 736 | 0 |     std::lock_guard<std::mutex> lock(_gc_mutex); | 
| 737 | 0 |     for (const auto& rowset : rowsets) { | 
| 738 | 0 |         _unused_rowsets[rowset->rowset_id()] = rowset; | 
| 739 | 0 |         g_unused_rowsets_bytes << rowset->total_disk_size(); | 
| 740 | 0 |     } | 
| 741 | 0 |     g_unused_rowsets_count << rowsets.size(); | 
| 742 | 0 | } | 
| 743 |  |  | 
| 744 | 0 | void CloudTablet::remove_unused_rowsets() { | 
| 745 | 0 |     std::vector<std::shared_ptr<Rowset>> removed_rowsets; | 
| 746 | 0 |     int64_t removed_delete_bitmap_num = 0; | 
| 747 | 0 |     OlapStopWatch watch; | 
| 748 | 0 |     { | 
| 749 | 0 |         std::lock_guard<std::mutex> lock(_gc_mutex); | 
| 750 |  |         // 1. remove unused rowsets's cache data and delete bitmap | 
| 751 | 0 |         for (auto it = _unused_rowsets.begin(); it != _unused_rowsets.end();) { | 
| 752 | 0 |             auto& rs = it->second; | 
| 753 | 0 |             if (rs.use_count() > 1) { | 
| 754 | 0 |                 LOG(WARNING) << "tablet_id:" << tablet_id() << " rowset: " << rs->rowset_id() | 
| 755 | 0 |                              << " has " << rs.use_count() << " references, it cannot be removed"; | 
| 756 | 0 |                 ++it; | 
| 757 | 0 |                 continue; | 
| 758 | 0 |             } | 
| 759 | 0 |             tablet_meta()->remove_rowset_delete_bitmap(rs->rowset_id(), rs->version()); | 
| 760 | 0 |             _rowset_warm_up_states.erase(rs->rowset_id()); | 
| 761 | 0 |             rs->clear_cache(); | 
| 762 | 0 |             g_unused_rowsets_count << -1; | 
| 763 | 0 |             g_unused_rowsets_bytes << -rs->total_disk_size(); | 
| 764 | 0 |             removed_rowsets.push_back(std::move(rs)); | 
| 765 | 0 |             it = _unused_rowsets.erase(it); | 
| 766 | 0 |         } | 
| 767 | 0 |     } | 
| 768 |  | 
 | 
| 769 | 0 |     { | 
| 770 | 0 |         std::vector<RecycledRowsets> recycled_rowsets; | 
| 771 |  | 
 | 
| 772 | 0 |         for (auto& rs : removed_rowsets) { | 
| 773 | 0 |             auto index_names = rs->get_index_file_names(); | 
| 774 | 0 |             recycled_rowsets.emplace_back(rs->rowset_id(), rs->num_segments(), index_names); | 
| 775 | 0 |             int64_t segment_size_sum = 0; | 
| 776 | 0 |             for (int32_t i = 0; i < rs->num_segments(); i++) { | 
| 777 | 0 |                 segment_size_sum += rs->rowset_meta()->segment_file_size(i); | 
| 778 | 0 |             } | 
| 779 | 0 |             g_file_cache_recycle_cached_data_segment_num << rs->num_segments(); | 
| 780 | 0 |             g_file_cache_recycle_cached_data_segment_size << segment_size_sum; | 
| 781 | 0 |             g_file_cache_recycle_cached_data_index_num << index_names.size(); | 
| 782 | 0 |         } | 
| 783 |  | 
 | 
| 784 | 0 |         if (recycled_rowsets.size() > 0) { | 
| 785 | 0 |             auto& manager = | 
| 786 | 0 |                     ExecEnv::GetInstance()->storage_engine().to_cloud().cloud_warm_up_manager(); | 
| 787 | 0 |             manager.recycle_cache(tablet_id(), recycled_rowsets); | 
| 788 | 0 |         } | 
| 789 | 0 |     } | 
| 790 |  | 
 | 
| 791 | 0 |     { | 
| 792 | 0 |         std::lock_guard<std::mutex> lock(_gc_mutex); | 
| 793 |  |         // 2. remove delete bitmap of pre rowsets | 
| 794 | 0 |         for (auto it = _unused_delete_bitmap.begin(); it != _unused_delete_bitmap.end();) { | 
| 795 | 0 |             auto& rowset_ids = std::get<0>(*it); | 
| 796 | 0 |             bool find_unused_rowset = false; | 
| 797 | 0 |             for (const auto& rowset_id : rowset_ids) { | 
| 798 | 0 |                 if (_unused_rowsets.find(rowset_id) != _unused_rowsets.end()) { | 
| 799 | 0 |                     LOG(INFO) << "can not remove pre rowset delete bitmap because rowset is in use" | 
| 800 | 0 |                               << ", tablet_id=" << tablet_id() << ", rowset_id=" << rowset_id; | 
| 801 | 0 |                     find_unused_rowset = true; | 
| 802 | 0 |                     break; | 
| 803 | 0 |                 } | 
| 804 | 0 |             } | 
| 805 | 0 |             if (find_unused_rowset) { | 
| 806 | 0 |                 ++it; | 
| 807 | 0 |                 continue; | 
| 808 | 0 |             } | 
| 809 | 0 |             auto& key_ranges = std::get<1>(*it); | 
| 810 | 0 |             tablet_meta()->delete_bitmap().remove(key_ranges); | 
| 811 | 0 |             it = _unused_delete_bitmap.erase(it); | 
| 812 | 0 |             removed_delete_bitmap_num++; | 
| 813 |  |             // TODO(kaijie): recycle cache for unused delete bitmap | 
| 814 | 0 |         } | 
| 815 | 0 |     } | 
| 816 |  | 
 | 
| 817 | 0 |     LOG(INFO) << "tablet_id=" << tablet_id() << ", unused_rowset size=" << _unused_rowsets.size() | 
| 818 | 0 |               << ", unused_delete_bitmap size=" << _unused_delete_bitmap.size() | 
| 819 | 0 |               << ", removed_rowsets_num=" << removed_rowsets.size() | 
| 820 | 0 |               << ", removed_delete_bitmap_num=" << removed_delete_bitmap_num | 
| 821 | 0 |               << ", cost(us)=" << watch.get_elapse_time_us(); | 
| 822 | 0 | } | 
| 823 |  |  | 
| 824 | 732 | void CloudTablet::update_base_size(const Rowset& rs) { | 
| 825 |  |     // Define base rowset as the rowset of version [2-x] | 
| 826 | 732 |     if (rs.start_version() == 2) { | 
| 827 | 98 |         _base_size = rs.total_disk_size(); | 
| 828 | 98 |     } | 
| 829 | 732 | } | 
| 830 |  |  | 
| 831 | 0 | void CloudTablet::clear_cache() { | 
| 832 | 0 |     auto recycled_rowsets = CloudTablet::recycle_cached_data(get_snapshot_rowset(true)); | 
| 833 | 0 |     if (!recycled_rowsets.empty()) { | 
| 834 | 0 |         auto& manager = ExecEnv::GetInstance()->storage_engine().to_cloud().cloud_warm_up_manager(); | 
| 835 | 0 |         manager.recycle_cache(tablet_id(), recycled_rowsets); | 
| 836 | 0 |     } | 
| 837 | 0 |     _engine.tablet_mgr().erase_tablet(tablet_id()); | 
| 838 | 0 | } | 
| 839 |  |  | 
| 840 |  | std::vector<RecycledRowsets> CloudTablet::recycle_cached_data( | 
| 841 | 0 |         const std::vector<RowsetSharedPtr>& rowsets) { | 
| 842 | 0 |     std::vector<RecycledRowsets> recycled_rowsets; | 
| 843 | 0 |     for (const auto& rs : rowsets) { | 
| 844 |  |         // rowsets and tablet._rs_version_map each hold a rowset shared_ptr, so at this point, the reference count of the shared_ptr is at least 2. | 
| 845 | 0 |         if (rs.use_count() > 2) { | 
| 846 | 0 |             LOG(WARNING) << "Rowset " << rs->rowset_id().to_string() << " has " << rs.use_count() | 
| 847 | 0 |                          << " references. File Cache won't be recycled when query is using it."; | 
| 848 | 0 |             continue; | 
| 849 | 0 |         } | 
| 850 | 0 |         rs->clear_cache(); | 
| 851 | 0 |         auto index_names = rs->get_index_file_names(); | 
| 852 | 0 |         recycled_rowsets.emplace_back(rs->rowset_id(), rs->num_segments(), index_names); | 
| 853 |  | 
 | 
| 854 | 0 |         int64_t segment_size_sum = 0; | 
| 855 | 0 |         for (int32_t i = 0; i < rs->num_segments(); i++) { | 
| 856 | 0 |             segment_size_sum += rs->rowset_meta()->segment_file_size(i); | 
| 857 | 0 |         } | 
| 858 | 0 |         g_file_cache_recycle_cached_data_segment_num << rs->num_segments(); | 
| 859 | 0 |         g_file_cache_recycle_cached_data_segment_size << segment_size_sum; | 
| 860 | 0 |         g_file_cache_recycle_cached_data_index_num << index_names.size(); | 
| 861 | 0 |     } | 
| 862 | 0 |     return recycled_rowsets; | 
| 863 | 0 | } | 
| 864 |  |  | 
| 865 |  | void CloudTablet::reset_approximate_stats(int64_t num_rowsets, int64_t num_segments, | 
| 866 | 0 |                                           int64_t num_rows, int64_t data_size) { | 
| 867 | 0 |     _approximate_num_segments.store(num_segments, std::memory_order_relaxed); | 
| 868 | 0 |     _approximate_num_rows.store(num_rows, std::memory_order_relaxed); | 
| 869 | 0 |     _approximate_data_size.store(data_size, std::memory_order_relaxed); | 
| 870 | 0 |     int64_t cumu_num_deltas = 0; | 
| 871 | 0 |     int64_t cumu_num_rowsets = 0; | 
| 872 | 0 |     auto cp = _cumulative_point.load(std::memory_order_relaxed); | 
| 873 | 0 |     for (auto& [v, r] : _rs_version_map) { | 
| 874 | 0 |         if (v.second < cp) { | 
| 875 | 0 |             continue; | 
| 876 | 0 |         } | 
| 877 | 0 |         cumu_num_deltas += r->is_segments_overlapping() ? r->num_segments() : 1; | 
| 878 | 0 |         ++cumu_num_rowsets; | 
| 879 | 0 |     } | 
| 880 |  |     // num_rowsets may be less than the size of _rs_version_map when there are some hole rowsets | 
| 881 |  |     // in the version map, so we use the max value to ensure that the approximate number | 
| 882 |  |     // of rowsets is at least the size of _rs_version_map. | 
| 883 |  |     // Note that this is not the exact number of rowsets, but an approximate number. | 
| 884 | 0 |     int64_t approximate_num_rowsets = | 
| 885 | 0 |             std::max(num_rowsets, static_cast<int64_t>(_rs_version_map.size())); | 
| 886 | 0 |     _approximate_num_rowsets.store(approximate_num_rowsets, std::memory_order_relaxed); | 
| 887 | 0 |     _approximate_cumu_num_rowsets.store(cumu_num_rowsets, std::memory_order_relaxed); | 
| 888 | 0 |     _approximate_cumu_num_deltas.store(cumu_num_deltas, std::memory_order_relaxed); | 
| 889 | 0 | } | 
| 890 |  |  | 
| 891 |  | Result<std::unique_ptr<RowsetWriter>> CloudTablet::create_rowset_writer( | 
| 892 | 0 |         RowsetWriterContext& context, bool vertical) { | 
| 893 | 0 |     context.rowset_id = _engine.next_rowset_id(); | 
| 894 |  |     // FIXME(plat1ko): Seems `tablet_id` and `index_id` has been set repeatedly | 
| 895 | 0 |     context.tablet_id = tablet_id(); | 
| 896 | 0 |     context.index_id = index_id(); | 
| 897 | 0 |     context.partition_id = partition_id(); | 
| 898 | 0 |     context.enable_unique_key_merge_on_write = enable_unique_key_merge_on_write(); | 
| 899 | 0 |     context.encrypt_algorithm = tablet_meta()->encryption_algorithm(); | 
| 900 | 0 |     return RowsetFactory::create_rowset_writer(_engine, context, vertical); | 
| 901 | 0 | } | 
| 902 |  |  | 
| 903 |  | // create a rowset writer with rowset_id and seg_id | 
| 904 |  | // after writer, merge this transient rowset with original rowset | 
| 905 |  | Result<std::unique_ptr<RowsetWriter>> CloudTablet::create_transient_rowset_writer( | 
| 906 |  |         const Rowset& rowset, std::shared_ptr<PartialUpdateInfo> partial_update_info, | 
| 907 | 0 |         int64_t txn_expiration) { | 
| 908 | 0 |     if (rowset.rowset_meta_state() != RowsetStatePB::BEGIN_PARTIAL_UPDATE && | 
| 909 | 0 |         rowset.rowset_meta_state() != RowsetStatePB::COMMITTED) [[unlikely]] { | 
| 910 | 0 |         auto msg = fmt::format( | 
| 911 | 0 |                 "wrong rowset state when create_transient_rowset_writer, rowset state should be " | 
| 912 | 0 |                 "BEGIN_PARTIAL_UPDATE or COMMITTED, but found {}, rowset_id={}, tablet_id={}", | 
| 913 | 0 |                 RowsetStatePB_Name(rowset.rowset_meta_state()), rowset.rowset_id().to_string(), | 
| 914 | 0 |                 tablet_id()); | 
| 915 |  |         // see `CloudRowsetWriter::build` for detail. | 
| 916 |  |         // if this is in a retry task, the rowset state may have been changed to RowsetStatePB::COMMITTED | 
| 917 |  |         // in `RowsetMeta::merge_rowset_meta()` in previous trials. | 
| 918 | 0 |         LOG(WARNING) << msg; | 
| 919 | 0 |         DCHECK(false) << msg; | 
| 920 | 0 |     } | 
| 921 | 0 |     RowsetWriterContext context; | 
| 922 | 0 |     context.rowset_state = PREPARED; | 
| 923 | 0 |     context.segments_overlap = OVERLAPPING; | 
| 924 |  |     // During a partial update, the extracted columns of a variant should not be included in the tablet schema. | 
| 925 |  |     // This is because the partial update for a variant needs to ignore the extracted columns. | 
| 926 |  |     // Otherwise, the schema types in different rowsets might be inconsistent. When performing a partial update, | 
| 927 |  |     // the complete variant is constructed by reading all the sub-columns of the variant. | 
| 928 | 0 |     context.tablet_schema = rowset.tablet_schema()->copy_without_variant_extracted_columns(); | 
| 929 | 0 |     context.newest_write_timestamp = UnixSeconds(); | 
| 930 | 0 |     context.tablet_id = table_id(); | 
| 931 | 0 |     context.enable_segcompaction = false; | 
| 932 | 0 |     context.write_type = DataWriteType::TYPE_DIRECT; | 
| 933 | 0 |     context.partial_update_info = std::move(partial_update_info); | 
| 934 | 0 |     context.is_transient_rowset_writer = true; | 
| 935 | 0 |     context.rowset_id = rowset.rowset_id(); | 
| 936 | 0 |     context.tablet_id = tablet_id(); | 
| 937 | 0 |     context.index_id = index_id(); | 
| 938 | 0 |     context.partition_id = partition_id(); | 
| 939 | 0 |     context.enable_unique_key_merge_on_write = enable_unique_key_merge_on_write(); | 
| 940 | 0 |     context.txn_expiration = txn_expiration; | 
| 941 | 0 |     context.encrypt_algorithm = tablet_meta()->encryption_algorithm(); | 
| 942 |  | 
 | 
| 943 | 0 |     auto storage_resource = rowset.rowset_meta()->remote_storage_resource(); | 
| 944 | 0 |     if (!storage_resource) { | 
| 945 | 0 |         return ResultError(std::move(storage_resource.error())); | 
| 946 | 0 |     } | 
| 947 |  |  | 
| 948 | 0 |     context.storage_resource = *storage_resource.value(); | 
| 949 |  | 
 | 
| 950 | 0 |     return RowsetFactory::create_rowset_writer(_engine, context, false) | 
| 951 | 0 |             .transform([&](auto&& writer) { | 
| 952 | 0 |                 writer->set_segment_start_id(cast_set<int32_t>(rowset.num_segments())); | 
| 953 | 0 |                 return writer; | 
| 954 | 0 |             }); | 
| 955 | 0 | } | 
| 956 |  |  | 
| 957 | 3 | int64_t CloudTablet::get_cloud_base_compaction_score() const { | 
| 958 | 3 |     if (_tablet_meta->compaction_policy() == CUMULATIVE_TIME_SERIES_POLICY) { | 
| 959 | 0 |         bool has_delete = false; | 
| 960 | 0 |         int64_t point = cumulative_layer_point(); | 
| 961 | 0 |         std::shared_lock<std::shared_mutex> rlock(_meta_lock); | 
| 962 | 0 |         for (const auto& [_, rs_meta] : _tablet_meta->all_rs_metas()) { | 
| 963 | 0 |             if (rs_meta->start_version() >= point) { | 
| 964 | 0 |                 continue; | 
| 965 | 0 |             } | 
| 966 | 0 |             if (rs_meta->has_delete_predicate()) { | 
| 967 | 0 |                 has_delete = true; | 
| 968 | 0 |                 break; | 
| 969 | 0 |             } | 
| 970 | 0 |         } | 
| 971 | 0 |         if (!has_delete) { | 
| 972 | 0 |             return 0; | 
| 973 | 0 |         } | 
| 974 | 0 |     } | 
| 975 |  |  | 
| 976 | 3 |     return _approximate_num_rowsets.load(std::memory_order_relaxed) - | 
| 977 | 3 |            _approximate_cumu_num_rowsets.load(std::memory_order_relaxed); | 
| 978 | 3 | } | 
| 979 |  |  | 
| 980 | 1 | int64_t CloudTablet::get_cloud_cumu_compaction_score() const { | 
| 981 |  |     // TODO(plat1ko): Propose an algorithm that considers tablet's key type, number of delete rowsets, | 
| 982 |  |     //  number of tablet versions simultaneously. | 
| 983 | 1 |     return _approximate_cumu_num_deltas.load(std::memory_order_relaxed); | 
| 984 | 1 | } | 
| 985 |  |  | 
| 986 |  | // return a json string to show the compaction status of this tablet | 
| 987 | 33 | void CloudTablet::get_compaction_status(std::string* json_result) { | 
| 988 | 33 |     rapidjson::Document root; | 
| 989 | 33 |     root.SetObject(); | 
| 990 |  |  | 
| 991 | 33 |     rapidjson::Document path_arr; | 
| 992 | 33 |     path_arr.SetArray(); | 
| 993 |  |  | 
| 994 | 33 |     std::vector<RowsetSharedPtr> rowsets; | 
| 995 | 33 |     std::vector<RowsetSharedPtr> stale_rowsets; | 
| 996 | 33 |     { | 
| 997 | 33 |         std::shared_lock rdlock(_meta_lock); | 
| 998 | 33 |         rowsets.reserve(_rs_version_map.size()); | 
| 999 | 148 |         for (auto& it : _rs_version_map) { | 
| 1000 | 148 |             rowsets.push_back(it.second); | 
| 1001 | 148 |         } | 
| 1002 | 33 |         stale_rowsets.reserve(_stale_rs_version_map.size()); | 
| 1003 | 540 |         for (auto& it : _stale_rs_version_map) { | 
| 1004 | 540 |             stale_rowsets.push_back(it.second); | 
| 1005 | 540 |         } | 
| 1006 | 33 |     } | 
| 1007 | 33 |     std::sort(rowsets.begin(), rowsets.end(), Rowset::comparator); | 
| 1008 | 33 |     std::sort(stale_rowsets.begin(), stale_rowsets.end(), Rowset::comparator); | 
| 1009 |  |  | 
| 1010 |  |     // get snapshot version path json_doc | 
| 1011 | 33 |     _timestamped_version_tracker.get_stale_version_path_json_doc(path_arr); | 
| 1012 | 33 |     root.AddMember("cumulative point", _cumulative_point.load(), root.GetAllocator()); | 
| 1013 | 33 |     rapidjson::Value cumu_value; | 
| 1014 | 33 |     std::string format_str = ToStringFromUnixMillis(_last_cumu_compaction_failure_millis.load()); | 
| 1015 | 33 |     cumu_value.SetString(format_str.c_str(), cast_set<uint>(format_str.length()), | 
| 1016 | 33 |                          root.GetAllocator()); | 
| 1017 | 33 |     root.AddMember("last cumulative failure time", cumu_value, root.GetAllocator()); | 
| 1018 | 33 |     rapidjson::Value base_value; | 
| 1019 | 33 |     format_str = ToStringFromUnixMillis(_last_base_compaction_failure_millis.load()); | 
| 1020 | 33 |     base_value.SetString(format_str.c_str(), cast_set<uint>(format_str.length()), | 
| 1021 | 33 |                          root.GetAllocator()); | 
| 1022 | 33 |     root.AddMember("last base failure time", base_value, root.GetAllocator()); | 
| 1023 | 33 |     rapidjson::Value full_value; | 
| 1024 | 33 |     format_str = ToStringFromUnixMillis(_last_full_compaction_failure_millis.load()); | 
| 1025 | 33 |     full_value.SetString(format_str.c_str(), cast_set<uint>(format_str.length()), | 
| 1026 | 33 |                          root.GetAllocator()); | 
| 1027 | 33 |     root.AddMember("last full failure time", full_value, root.GetAllocator()); | 
| 1028 | 33 |     rapidjson::Value cumu_success_value; | 
| 1029 | 33 |     format_str = ToStringFromUnixMillis(_last_cumu_compaction_success_millis.load()); | 
| 1030 | 33 |     cumu_success_value.SetString(format_str.c_str(), cast_set<uint>(format_str.length()), | 
| 1031 | 33 |                                  root.GetAllocator()); | 
| 1032 | 33 |     root.AddMember("last cumulative success time", cumu_success_value, root.GetAllocator()); | 
| 1033 | 33 |     rapidjson::Value base_success_value; | 
| 1034 | 33 |     format_str = ToStringFromUnixMillis(_last_base_compaction_success_millis.load()); | 
| 1035 | 33 |     base_success_value.SetString(format_str.c_str(), cast_set<uint>(format_str.length()), | 
| 1036 | 33 |                                  root.GetAllocator()); | 
| 1037 | 33 |     root.AddMember("last base success time", base_success_value, root.GetAllocator()); | 
| 1038 | 33 |     rapidjson::Value full_success_value; | 
| 1039 | 33 |     format_str = ToStringFromUnixMillis(_last_full_compaction_success_millis.load()); | 
| 1040 | 33 |     full_success_value.SetString(format_str.c_str(), cast_set<uint>(format_str.length()), | 
| 1041 | 33 |                                  root.GetAllocator()); | 
| 1042 | 33 |     root.AddMember("last full success time", full_success_value, root.GetAllocator()); | 
| 1043 | 33 |     rapidjson::Value cumu_schedule_value; | 
| 1044 | 33 |     format_str = ToStringFromUnixMillis(_last_cumu_compaction_schedule_millis.load()); | 
| 1045 | 33 |     cumu_schedule_value.SetString(format_str.c_str(), cast_set<uint>(format_str.length()), | 
| 1046 | 33 |                                   root.GetAllocator()); | 
| 1047 | 33 |     root.AddMember("last cumulative schedule time", cumu_schedule_value, root.GetAllocator()); | 
| 1048 | 33 |     rapidjson::Value base_schedule_value; | 
| 1049 | 33 |     format_str = ToStringFromUnixMillis(_last_base_compaction_schedule_millis.load()); | 
| 1050 | 33 |     base_schedule_value.SetString(format_str.c_str(), cast_set<uint>(format_str.length()), | 
| 1051 | 33 |                                   root.GetAllocator()); | 
| 1052 | 33 |     root.AddMember("last base schedule time", base_schedule_value, root.GetAllocator()); | 
| 1053 | 33 |     rapidjson::Value full_schedule_value; | 
| 1054 | 33 |     format_str = ToStringFromUnixMillis(_last_full_compaction_schedule_millis.load()); | 
| 1055 | 33 |     full_schedule_value.SetString(format_str.c_str(), cast_set<uint>(format_str.length()), | 
| 1056 | 33 |                                   root.GetAllocator()); | 
| 1057 | 33 |     root.AddMember("last full schedule time", full_schedule_value, root.GetAllocator()); | 
| 1058 | 33 |     rapidjson::Value cumu_compaction_status_value; | 
| 1059 | 33 |     cumu_compaction_status_value.SetString(_last_cumu_compaction_status.c_str(), | 
| 1060 | 33 |                                            cast_set<uint>(_last_cumu_compaction_status.length()), | 
| 1061 | 33 |                                            root.GetAllocator()); | 
| 1062 | 33 |     root.AddMember("last cumulative status", cumu_compaction_status_value, root.GetAllocator()); | 
| 1063 | 33 |     rapidjson::Value base_compaction_status_value; | 
| 1064 | 33 |     base_compaction_status_value.SetString(_last_base_compaction_status.c_str(), | 
| 1065 | 33 |                                            cast_set<uint>(_last_base_compaction_status.length()), | 
| 1066 | 33 |                                            root.GetAllocator()); | 
| 1067 | 33 |     root.AddMember("last base status", base_compaction_status_value, root.GetAllocator()); | 
| 1068 | 33 |     rapidjson::Value full_compaction_status_value; | 
| 1069 | 33 |     full_compaction_status_value.SetString(_last_full_compaction_status.c_str(), | 
| 1070 | 33 |                                            cast_set<uint>(_last_full_compaction_status.length()), | 
| 1071 | 33 |                                            root.GetAllocator()); | 
| 1072 | 33 |     root.AddMember("last full status", full_compaction_status_value, root.GetAllocator()); | 
| 1073 | 33 |     rapidjson::Value exec_compaction_time; | 
| 1074 | 33 |     std::string num_str {std::to_string(exec_compaction_time_us.load())}; | 
| 1075 | 33 |     exec_compaction_time.SetString(num_str.c_str(), cast_set<uint>(num_str.length()), | 
| 1076 | 33 |                                    root.GetAllocator()); | 
| 1077 | 33 |     root.AddMember("exec compaction time us", exec_compaction_time, root.GetAllocator()); | 
| 1078 | 33 |     rapidjson::Value local_read_time; | 
| 1079 | 33 |     num_str = std::to_string(local_read_time_us.load()); | 
| 1080 | 33 |     local_read_time.SetString(num_str.c_str(), cast_set<uint>(num_str.length()), | 
| 1081 | 33 |                               root.GetAllocator()); | 
| 1082 | 33 |     root.AddMember("compaction local read time us", local_read_time, root.GetAllocator()); | 
| 1083 | 33 |     rapidjson::Value remote_read_time; | 
| 1084 | 33 |     num_str = std::to_string(remote_read_time_us.load()); | 
| 1085 | 33 |     remote_read_time.SetString(num_str.c_str(), cast_set<uint>(num_str.length()), | 
| 1086 | 33 |                                root.GetAllocator()); | 
| 1087 | 33 |     root.AddMember("compaction remote read time us", remote_read_time, root.GetAllocator()); | 
| 1088 |  |  | 
| 1089 |  |     // print all rowsets' version as an array | 
| 1090 | 33 |     rapidjson::Document versions_arr; | 
| 1091 | 33 |     rapidjson::Document missing_versions_arr; | 
| 1092 | 33 |     versions_arr.SetArray(); | 
| 1093 | 33 |     missing_versions_arr.SetArray(); | 
| 1094 | 33 |     int64_t last_version = -1; | 
| 1095 | 148 |     for (auto& rowset : rowsets) { | 
| 1096 | 148 |         const Version& ver = rowset->version(); | 
| 1097 | 148 |         if (ver.first != last_version + 1) { | 
| 1098 | 0 |             rapidjson::Value miss_value; | 
| 1099 | 0 |             miss_value.SetString(fmt::format("[{}-{}]", last_version + 1, ver.first - 1).c_str(), | 
| 1100 | 0 |                                  missing_versions_arr.GetAllocator()); | 
| 1101 | 0 |             missing_versions_arr.PushBack(miss_value, missing_versions_arr.GetAllocator()); | 
| 1102 | 0 |         } | 
| 1103 | 148 |         rapidjson::Value value; | 
| 1104 | 148 |         std::string version_str = rowset->get_rowset_info_str(); | 
| 1105 | 148 |         value.SetString(version_str.c_str(), cast_set<uint32_t>(version_str.length()), | 
| 1106 | 148 |                         versions_arr.GetAllocator()); | 
| 1107 | 148 |         versions_arr.PushBack(value, versions_arr.GetAllocator()); | 
| 1108 | 148 |         last_version = ver.second; | 
| 1109 | 148 |     } | 
| 1110 | 33 |     root.AddMember("rowsets", versions_arr, root.GetAllocator()); | 
| 1111 | 33 |     root.AddMember("missing_rowsets", missing_versions_arr, root.GetAllocator()); | 
| 1112 |  |  | 
| 1113 |  |     // print all stale rowsets' version as an array | 
| 1114 | 33 |     rapidjson::Document stale_versions_arr; | 
| 1115 | 33 |     stale_versions_arr.SetArray(); | 
| 1116 | 540 |     for (auto& rowset : stale_rowsets) { | 
| 1117 | 540 |         rapidjson::Value value; | 
| 1118 | 540 |         std::string version_str = rowset->get_rowset_info_str(); | 
| 1119 | 540 |         value.SetString(version_str.c_str(), cast_set<uint32_t>(version_str.length()), | 
| 1120 | 540 |                         stale_versions_arr.GetAllocator()); | 
| 1121 | 540 |         stale_versions_arr.PushBack(value, stale_versions_arr.GetAllocator()); | 
| 1122 | 540 |     } | 
| 1123 | 33 |     root.AddMember("stale_rowsets", stale_versions_arr, root.GetAllocator()); | 
| 1124 |  |  | 
| 1125 |  |     // add stale version rowsets | 
| 1126 | 33 |     root.AddMember("stale version path", path_arr, root.GetAllocator()); | 
| 1127 |  |  | 
| 1128 |  |     // to json string | 
| 1129 | 33 |     rapidjson::StringBuffer strbuf; | 
| 1130 | 33 |     rapidjson::PrettyWriter<rapidjson::StringBuffer> writer(strbuf); | 
| 1131 | 33 |     root.Accept(writer); | 
| 1132 | 33 |     *json_result = std::string(strbuf.GetString()); | 
| 1133 | 33 | } | 
| 1134 |  |  | 
| 1135 | 0 | void CloudTablet::set_cumulative_layer_point(int64_t new_point) { | 
| 1136 | 0 |     if (new_point == Tablet::K_INVALID_CUMULATIVE_POINT || new_point >= _cumulative_point) { | 
| 1137 | 0 |         _cumulative_point = new_point; | 
| 1138 | 0 |         return; | 
| 1139 | 0 |     } | 
| 1140 |  |     // cumulative point should only be reset to -1, or be increased | 
| 1141 |  |     // FIXME: could happen in currently unresolved race conditions | 
| 1142 | 0 |     LOG(WARNING) << "Unexpected cumulative point: " << new_point | 
| 1143 | 0 |                  << ", origin: " << _cumulative_point.load(); | 
| 1144 | 0 | } | 
| 1145 |  |  | 
| 1146 |  | Status CloudTablet::check_rowset_schema_for_build_index(std::vector<TColumn>& columns, | 
| 1147 | 10 |                                                         int schema_version) { | 
| 1148 | 10 |     std::map<std::string, TabletColumn> fe_col_map; | 
| 1149 | 14 |     for (int i = 0; i < columns.size(); i++) { | 
| 1150 | 4 |         fe_col_map[columns[i].column_name] = TabletColumn(columns[i]); | 
| 1151 | 4 |     } | 
| 1152 |  |  | 
| 1153 | 10 |     std::shared_lock rlock(_meta_lock); | 
| 1154 | 10 |     for (const auto& [version, rs] : _rs_version_map) { | 
| 1155 | 4 |         if (version.first == 0) { | 
| 1156 | 0 |             continue; | 
| 1157 | 0 |         } | 
| 1158 |  |  | 
| 1159 | 4 |         if (rs->tablet_schema()->schema_version() >= schema_version) { | 
| 1160 | 0 |             continue; | 
| 1161 | 0 |         } | 
| 1162 |  |  | 
| 1163 | 4 |         for (auto rs_col : rs->tablet_schema()->columns()) { | 
| 1164 | 4 |             auto find_ret = fe_col_map.find(rs_col->name()); | 
| 1165 | 4 |             if (find_ret == fe_col_map.end()) { | 
| 1166 | 1 |                 return Status::InternalError( | 
| 1167 | 1 |                         "check rowset meta failed:rowset's col is dropped in FE."); | 
| 1168 | 1 |             } | 
| 1169 |  |  | 
| 1170 | 3 |             if (rs_col->unique_id() != find_ret->second.unique_id()) { | 
| 1171 | 1 |                 return Status::InternalError("check rowset meta failed:col id not match."); | 
| 1172 | 1 |             } | 
| 1173 |  |  | 
| 1174 | 2 |             if (rs_col->type() != find_ret->second.type()) { | 
| 1175 | 1 |                 return Status::InternalError("check rowset meta failed:col type not match."); | 
| 1176 | 1 |             } | 
| 1177 | 2 |         } | 
| 1178 | 4 |     } | 
| 1179 |  |  | 
| 1180 | 7 |     return Status::OK(); | 
| 1181 | 10 | } | 
| 1182 |  |  | 
| 1183 |  | Result<RowsetSharedPtr> CloudTablet::pick_a_rowset_for_index_change(int schema_version, | 
| 1184 | 9 |                                                                     bool& is_base_rowset) { | 
| 1185 | 9 |     TEST_SYNC_POINT_RETURN_WITH_VALUE("CloudTablet::pick_a_rowset_for_index_change", | 
| 1186 | 2 |                                       Result<RowsetSharedPtr>(nullptr)); | 
| 1187 | 2 |     RowsetSharedPtr ret_rowset = nullptr; | 
| 1188 | 2 |     std::shared_lock rlock(_meta_lock); | 
| 1189 | 2 |     for (const auto& [version, rs] : _rs_version_map) { | 
| 1190 | 2 |         if (version.first == 0) { | 
| 1191 | 0 |             continue; | 
| 1192 | 0 |         } | 
| 1193 | 2 |         if (rs->num_rows() == 0) { | 
| 1194 | 1 |             VLOG_DEBUG << "[index_change]find empty rs, index change may " | 
| 1195 | 0 |                           "failed, id=" | 
| 1196 | 0 |                        << rs->rowset_id().to_string(); | 
| 1197 | 1 |         } | 
| 1198 |  |  | 
| 1199 | 2 |         if (rs->tablet_schema()->schema_version() >= schema_version) { | 
| 1200 | 2 |             VLOG_DEBUG << "[index_change] skip rowset " << rs->tablet_schema()->schema_version() | 
| 1201 | 0 |                        << "," << schema_version; | 
| 1202 | 2 |             continue; | 
| 1203 | 2 |         } | 
| 1204 |  |  | 
| 1205 | 0 |         if (ret_rowset == nullptr) { | 
| 1206 | 0 |             ret_rowset = rs; | 
| 1207 | 0 |             continue; | 
| 1208 | 0 |         } | 
| 1209 |  |  | 
| 1210 | 0 |         if (rs->start_version() > ret_rowset->start_version()) { | 
| 1211 | 0 |             ret_rowset = rs; | 
| 1212 | 0 |         } | 
| 1213 | 0 |     } | 
| 1214 |  |  | 
| 1215 | 2 |     if (ret_rowset != nullptr) { | 
| 1216 | 0 |         is_base_rowset = ret_rowset->version().first < _cumulative_point; | 
| 1217 | 0 |     } | 
| 1218 |  |  | 
| 1219 | 2 |     return ret_rowset; | 
| 1220 | 9 | } | 
| 1221 |  |  | 
| 1222 | 0 | std::vector<RowsetSharedPtr> CloudTablet::pick_candidate_rowsets_to_base_compaction() { | 
| 1223 | 0 |     std::vector<RowsetSharedPtr> candidate_rowsets; | 
| 1224 | 0 |     { | 
| 1225 | 0 |         std::shared_lock rlock(_meta_lock); | 
| 1226 | 0 |         for (const auto& [version, rs] : _rs_version_map) { | 
| 1227 | 0 |             if (version.first != 0 && version.first < _cumulative_point && | 
| 1228 | 0 |                 (_alter_version == -1 || version.second <= _alter_version)) { | 
| 1229 | 0 |                 candidate_rowsets.push_back(rs); | 
| 1230 | 0 |             } | 
| 1231 | 0 |         } | 
| 1232 | 0 |     } | 
| 1233 | 0 |     std::sort(candidate_rowsets.begin(), candidate_rowsets.end(), Rowset::comparator); | 
| 1234 | 0 |     return candidate_rowsets; | 
| 1235 | 0 | } | 
| 1236 |  |  | 
| 1237 | 0 | std::vector<RowsetSharedPtr> CloudTablet::pick_candidate_rowsets_to_full_compaction() { | 
| 1238 | 0 |     std::vector<RowsetSharedPtr> candidate_rowsets; | 
| 1239 | 0 |     { | 
| 1240 | 0 |         std::shared_lock rlock(_meta_lock); | 
| 1241 | 0 |         for (auto& [v, rs] : _rs_version_map) { | 
| 1242 |  |             // MUST NOT compact rowset [0-1] for some historical reasons (see cloud_schema_change) | 
| 1243 | 0 |             if (v.first != 0) { | 
| 1244 | 0 |                 candidate_rowsets.push_back(rs); | 
| 1245 | 0 |             } | 
| 1246 | 0 |         } | 
| 1247 | 0 |     } | 
| 1248 | 0 |     std::sort(candidate_rowsets.begin(), candidate_rowsets.end(), Rowset::comparator); | 
| 1249 | 0 |     return candidate_rowsets; | 
| 1250 | 0 | } | 
| 1251 |  |  | 
| 1252 | 0 | CalcDeleteBitmapExecutor* CloudTablet::calc_delete_bitmap_executor() { | 
| 1253 | 0 |     return _engine.calc_delete_bitmap_executor(); | 
| 1254 | 0 | } | 
| 1255 |  |  | 
| 1256 |  | Status CloudTablet::save_delete_bitmap(const TabletTxnInfo* txn_info, int64_t txn_id, | 
| 1257 |  |                                        DeleteBitmapPtr delete_bitmap, RowsetWriter* rowset_writer, | 
| 1258 |  |                                        const RowsetIdUnorderedSet& cur_rowset_ids, int64_t lock_id, | 
| 1259 | 0 |                                        int64_t next_visible_version) { | 
| 1260 | 0 |     RowsetSharedPtr rowset = txn_info->rowset; | 
| 1261 | 0 |     int64_t cur_version = rowset->start_version(); | 
| 1262 |  |     // update delete bitmap info, in order to avoid recalculation when trying again | 
| 1263 | 0 |     RETURN_IF_ERROR(_engine.txn_delete_bitmap_cache().update_tablet_txn_info( | 
| 1264 | 0 |             txn_id, tablet_id(), delete_bitmap, cur_rowset_ids, PublishStatus::PREPARE)); | 
| 1265 |  |  | 
| 1266 | 0 |     if (txn_info->partial_update_info && txn_info->partial_update_info->is_partial_update() && | 
| 1267 | 0 |         rowset_writer->num_rows() > 0) { | 
| 1268 | 0 |         DBUG_EXECUTE_IF("CloudTablet::save_delete_bitmap.update_tmp_rowset.error", { | 
| 1269 | 0 |             return Status::InternalError<false>("injected update_tmp_rowset error."); | 
| 1270 | 0 |         }); | 
| 1271 | 0 |         const auto& rowset_meta = rowset->rowset_meta(); | 
| 1272 | 0 |         RETURN_IF_ERROR(_engine.meta_mgr().update_tmp_rowset(*rowset_meta)); | 
| 1273 | 0 |     } | 
| 1274 |  |  | 
| 1275 | 0 |     RETURN_IF_ERROR(save_delete_bitmap_to_ms(cur_version, txn_id, delete_bitmap, lock_id, | 
| 1276 | 0 |                                              next_visible_version, rowset)); | 
| 1277 |  |  | 
| 1278 |  |     // store the delete bitmap with sentinel marks in txn_delete_bitmap_cache because if the txn is retried for some reason, | 
| 1279 |  |     // it will use the delete bitmap from txn_delete_bitmap_cache when re-calculating the delete bitmap, during which it will do | 
| 1280 |  |     // delete bitmap correctness check. If we store the new_delete_bitmap, the delete bitmap correctness check will fail | 
| 1281 | 0 |     RETURN_IF_ERROR(_engine.txn_delete_bitmap_cache().update_tablet_txn_info( | 
| 1282 | 0 |             txn_id, tablet_id(), delete_bitmap, cur_rowset_ids, PublishStatus::SUCCEED, | 
| 1283 | 0 |             txn_info->publish_info)); | 
| 1284 |  |  | 
| 1285 | 0 |     DBUG_EXECUTE_IF("CloudTablet::save_delete_bitmap.enable_sleep", { | 
| 1286 | 0 |         auto sleep_sec = dp->param<int>("sleep", 5); | 
| 1287 | 0 |         std::this_thread::sleep_for(std::chrono::seconds(sleep_sec)); | 
| 1288 | 0 |     }); | 
| 1289 |  | 
 | 
| 1290 | 0 |     DBUG_EXECUTE_IF("CloudTablet::save_delete_bitmap.injected_error", { | 
| 1291 | 0 |         auto retry = dp->param<bool>("retry", false); | 
| 1292 | 0 |         auto sleep_sec = dp->param<int>("sleep", 0); | 
| 1293 | 0 |         std::this_thread::sleep_for(std::chrono::seconds(sleep_sec)); | 
| 1294 | 0 |         if (retry) { // return DELETE_BITMAP_LOCK_ERROR to let it retry | 
| 1295 | 0 |             return Status::Error<ErrorCode::DELETE_BITMAP_LOCK_ERROR>( | 
| 1296 | 0 |                     "injected DELETE_BITMAP_LOCK_ERROR"); | 
| 1297 | 0 |         } else { | 
| 1298 | 0 |             return Status::InternalError<false>("injected non-retryable error"); | 
| 1299 | 0 |         } | 
| 1300 | 0 |     }); | 
| 1301 |  | 
 | 
| 1302 | 0 |     return Status::OK(); | 
| 1303 | 0 | } | 
| 1304 |  |  | 
| 1305 |  | Status CloudTablet::save_delete_bitmap_to_ms(int64_t cur_version, int64_t txn_id, | 
| 1306 |  |                                              DeleteBitmapPtr delete_bitmap, int64_t lock_id, | 
| 1307 | 0 |                                              int64_t next_visible_version, RowsetSharedPtr rowset) { | 
| 1308 | 0 |     DeleteBitmapPtr new_delete_bitmap = std::make_shared<DeleteBitmap>(tablet_id()); | 
| 1309 | 0 |     for (auto iter = delete_bitmap->delete_bitmap.begin(); | 
| 1310 | 0 |          iter != delete_bitmap->delete_bitmap.end(); ++iter) { | 
| 1311 |  |         // skip sentinel mark, which is used for delete bitmap correctness check | 
| 1312 | 0 |         if (std::get<1>(iter->first) != DeleteBitmap::INVALID_SEGMENT_ID) { | 
| 1313 | 0 |             new_delete_bitmap->merge( | 
| 1314 | 0 |                     {std::get<0>(iter->first), std::get<1>(iter->first), cur_version}, | 
| 1315 | 0 |                     iter->second); | 
| 1316 | 0 |         } | 
| 1317 | 0 |     } | 
| 1318 |  |     // lock_id != -1 means this is in an explict txn | 
| 1319 | 0 |     bool is_explicit_txn = (lock_id != -1); | 
| 1320 | 0 |     auto ms_lock_id = !is_explicit_txn ? txn_id : lock_id; | 
| 1321 | 0 |     std::optional<StorageResource> storage_resource; | 
| 1322 | 0 |     auto storage_resource_result = rowset->rowset_meta()->remote_storage_resource(); | 
| 1323 | 0 |     if (storage_resource_result) { | 
| 1324 | 0 |         storage_resource = *storage_resource_result.value(); | 
| 1325 | 0 |     } | 
| 1326 | 0 |     RETURN_IF_ERROR(_engine.meta_mgr().update_delete_bitmap( | 
| 1327 | 0 |             *this, ms_lock_id, LOAD_INITIATOR_ID, new_delete_bitmap.get(), new_delete_bitmap.get(), | 
| 1328 | 0 |             rowset->rowset_id().to_string(), storage_resource, | 
| 1329 | 0 |             config::delete_bitmap_store_write_version, txn_id, is_explicit_txn, | 
| 1330 | 0 |             next_visible_version)); | 
| 1331 | 0 |     return Status::OK(); | 
| 1332 | 0 | } | 
| 1333 |  |  | 
| 1334 | 0 | Versions CloudTablet::calc_missed_versions(int64_t spec_version, Versions existing_versions) const { | 
| 1335 | 0 |     DCHECK(spec_version > 0) << "invalid spec_version: " << spec_version; | 
| 1336 |  |  | 
| 1337 |  |     // sort the existing versions in ascending order | 
| 1338 | 0 |     std::sort(existing_versions.begin(), existing_versions.end(), | 
| 1339 | 0 |               [](const Version& a, const Version& b) { | 
| 1340 |  |                   // simple because 2 versions are certainly not overlapping | 
| 1341 | 0 |                   return a.first < b.first; | 
| 1342 | 0 |               }); | 
| 1343 |  |  | 
| 1344 |  |     // From the first version(=0), find the missing version until spec_version | 
| 1345 | 0 |     int64_t last_version = -1; | 
| 1346 | 0 |     Versions missed_versions; | 
| 1347 | 0 |     for (const Version& version : existing_versions) { | 
| 1348 | 0 |         if (version.first > last_version + 1) { | 
| 1349 |  |             // there is a hole between versions | 
| 1350 | 0 |             missed_versions.emplace_back(last_version + 1, std::min(version.first, spec_version)); | 
| 1351 | 0 |         } | 
| 1352 | 0 |         last_version = version.second; | 
| 1353 | 0 |         if (last_version >= spec_version) { | 
| 1354 | 0 |             break; | 
| 1355 | 0 |         } | 
| 1356 | 0 |     } | 
| 1357 | 0 |     if (last_version < spec_version) { | 
| 1358 |  |         // there is a hole between the last version and the specificed version. | 
| 1359 | 0 |         missed_versions.emplace_back(last_version + 1, spec_version); | 
| 1360 | 0 |     } | 
| 1361 | 0 |     return missed_versions; | 
| 1362 | 0 | } | 
| 1363 |  |  | 
| 1364 |  | Status CloudTablet::calc_delete_bitmap_for_compaction( | 
| 1365 |  |         const std::vector<RowsetSharedPtr>& input_rowsets, const RowsetSharedPtr& output_rowset, | 
| 1366 |  |         const RowIdConversion& rowid_conversion, ReaderType compaction_type, int64_t merged_rows, | 
| 1367 |  |         int64_t filtered_rows, int64_t initiator, DeleteBitmapPtr& output_rowset_delete_bitmap, | 
| 1368 | 0 |         bool allow_delete_in_cumu_compaction, int64_t& get_delete_bitmap_lock_start_time) { | 
| 1369 | 0 |     output_rowset_delete_bitmap = std::make_shared<DeleteBitmap>(tablet_id()); | 
| 1370 | 0 |     std::unique_ptr<RowLocationSet> missed_rows; | 
| 1371 | 0 |     if ((config::enable_missing_rows_correctness_check || | 
| 1372 | 0 |          config::enable_mow_compaction_correctness_check_core || | 
| 1373 | 0 |          config::enable_mow_compaction_correctness_check_fail) && | 
| 1374 | 0 |         !allow_delete_in_cumu_compaction && | 
| 1375 | 0 |         (compaction_type == ReaderType::READER_CUMULATIVE_COMPACTION || | 
| 1376 | 0 |          !config::enable_prune_delete_sign_when_base_compaction)) { | 
| 1377 |  |         // also check duplicate key for base compaction when config::enable_prune_delete_sign_when_base_compaction==false | 
| 1378 | 0 |         missed_rows = std::make_unique<RowLocationSet>(); | 
| 1379 | 0 |         LOG(INFO) << "RowLocation Set inited succ for tablet:" << tablet_id(); | 
| 1380 | 0 |     } | 
| 1381 |  | 
 | 
| 1382 | 0 |     std::unique_ptr<std::map<RowsetSharedPtr, RowLocationPairList>> location_map; | 
| 1383 | 0 |     if (config::enable_rowid_conversion_correctness_check && | 
| 1384 | 0 |         tablet_schema()->cluster_key_uids().empty()) { | 
| 1385 | 0 |         location_map = std::make_unique<std::map<RowsetSharedPtr, RowLocationPairList>>(); | 
| 1386 | 0 |         LOG(INFO) << "Location Map inited succ for tablet:" << tablet_id(); | 
| 1387 | 0 |     } | 
| 1388 |  |  | 
| 1389 |  |     // 1. calc delete bitmap for historical data | 
| 1390 | 0 |     RETURN_IF_ERROR(_engine.meta_mgr().sync_tablet_rowsets(this)); | 
| 1391 | 0 |     Version version = max_version(); | 
| 1392 | 0 |     std::size_t missed_rows_size = 0; | 
| 1393 | 0 |     calc_compaction_output_rowset_delete_bitmap( | 
| 1394 | 0 |             input_rowsets, rowid_conversion, 0, version.second + 1, missed_rows.get(), | 
| 1395 | 0 |             location_map.get(), tablet_meta()->delete_bitmap(), output_rowset_delete_bitmap.get()); | 
| 1396 | 0 |     if (missed_rows) { | 
| 1397 | 0 |         missed_rows_size = missed_rows->size(); | 
| 1398 | 0 |         if (!allow_delete_in_cumu_compaction) { | 
| 1399 | 0 |             if ((compaction_type == ReaderType::READER_CUMULATIVE_COMPACTION || | 
| 1400 | 0 |                  !config::enable_prune_delete_sign_when_base_compaction) && | 
| 1401 | 0 |                 tablet_state() == TABLET_RUNNING) { | 
| 1402 | 0 |                 if (merged_rows + filtered_rows >= 0 && | 
| 1403 | 0 |                     merged_rows + filtered_rows != missed_rows_size) { | 
| 1404 | 0 |                     std::string err_msg = fmt::format( | 
| 1405 | 0 |                             "cumulative compaction: the merged rows({}), the filtered rows({}) is " | 
| 1406 | 0 |                             "not equal to missed rows({}) in rowid conversion, tablet_id: {}, " | 
| 1407 | 0 |                             "table_id:{}", | 
| 1408 | 0 |                             merged_rows, filtered_rows, missed_rows_size, tablet_id(), table_id()); | 
| 1409 | 0 |                     LOG(WARNING) << err_msg; | 
| 1410 | 0 |                     if (config::enable_mow_compaction_correctness_check_core) { | 
| 1411 | 0 |                         CHECK(false) << err_msg; | 
| 1412 | 0 |                     } else if (config::enable_mow_compaction_correctness_check_fail) { | 
| 1413 | 0 |                         return Status::InternalError<false>(err_msg); | 
| 1414 | 0 |                     } else { | 
| 1415 | 0 |                         DCHECK(false) << err_msg; | 
| 1416 | 0 |                     } | 
| 1417 | 0 |                 } | 
| 1418 | 0 |             } | 
| 1419 | 0 |         } | 
| 1420 | 0 |     } | 
| 1421 | 0 |     if (location_map) { | 
| 1422 | 0 |         RETURN_IF_ERROR(check_rowid_conversion(output_rowset, *location_map)); | 
| 1423 | 0 |         location_map->clear(); | 
| 1424 | 0 |     } | 
| 1425 |  |  | 
| 1426 |  |     // 2. calc delete bitmap for incremental data | 
| 1427 | 0 |     int64_t t1 = MonotonicMicros(); | 
| 1428 | 0 |     RETURN_IF_ERROR(_engine.meta_mgr().get_delete_bitmap_update_lock( | 
| 1429 | 0 |             *this, COMPACTION_DELETE_BITMAP_LOCK_ID, initiator)); | 
| 1430 | 0 |     int64_t t2 = MonotonicMicros(); | 
| 1431 | 0 |     if (compaction_type == ReaderType::READER_CUMULATIVE_COMPACTION) { | 
| 1432 | 0 |         g_cu_compaction_get_delete_bitmap_lock_time_ms << (t2 - t1) / 1000; | 
| 1433 | 0 |     } else if (compaction_type == ReaderType::READER_BASE_COMPACTION) { | 
| 1434 | 0 |         g_base_compaction_get_delete_bitmap_lock_time_ms << (t2 - t1) / 1000; | 
| 1435 | 0 |     } | 
| 1436 | 0 |     get_delete_bitmap_lock_start_time = t2; | 
| 1437 | 0 |     RETURN_IF_ERROR(_engine.meta_mgr().sync_tablet_rowsets(this)); | 
| 1438 | 0 |     int64_t t3 = MonotonicMicros(); | 
| 1439 |  | 
 | 
| 1440 | 0 |     calc_compaction_output_rowset_delete_bitmap( | 
| 1441 | 0 |             input_rowsets, rowid_conversion, version.second, UINT64_MAX, missed_rows.get(), | 
| 1442 | 0 |             location_map.get(), tablet_meta()->delete_bitmap(), output_rowset_delete_bitmap.get()); | 
| 1443 | 0 |     int64_t t4 = MonotonicMicros(); | 
| 1444 | 0 |     if (location_map) { | 
| 1445 | 0 |         RETURN_IF_ERROR(check_rowid_conversion(output_rowset, *location_map)); | 
| 1446 | 0 |     } | 
| 1447 | 0 |     int64_t t5 = MonotonicMicros(); | 
| 1448 |  |  | 
| 1449 |  |     // 3. store delete bitmap | 
| 1450 | 0 |     DeleteBitmapPtr delete_bitmap_v2 = nullptr; | 
| 1451 | 0 |     auto delete_bitmap_size = output_rowset_delete_bitmap->delete_bitmap.size(); | 
| 1452 | 0 |     auto store_version = config::delete_bitmap_store_write_version; | 
| 1453 | 0 |     if (store_version == 2 || store_version == 3) { | 
| 1454 | 0 |         delete_bitmap_v2 = std::make_shared<DeleteBitmap>(*output_rowset_delete_bitmap); | 
| 1455 | 0 |         std::vector<std::pair<RowsetId, int64_t>> retained_rowsets_to_seg_num; | 
| 1456 | 0 |         { | 
| 1457 | 0 |             std::shared_lock rlock(get_header_lock()); | 
| 1458 | 0 |             for (const auto& [rowset_version, rowset_ptr] : rowset_map()) { | 
| 1459 | 0 |                 if (rowset_version.second < output_rowset->start_version()) { | 
| 1460 | 0 |                     retained_rowsets_to_seg_num.emplace_back( | 
| 1461 | 0 |                             std::make_pair(rowset_ptr->rowset_id(), rowset_ptr->num_segments())); | 
| 1462 | 0 |                 } | 
| 1463 | 0 |             } | 
| 1464 | 0 |         } | 
| 1465 | 0 |         if (config::enable_agg_delta_delete_bitmap_for_store_v2) { | 
| 1466 | 0 |             tablet_meta()->delete_bitmap().subset_and_agg( | 
| 1467 | 0 |                     retained_rowsets_to_seg_num, output_rowset->start_version(), | 
| 1468 | 0 |                     output_rowset->end_version(), delete_bitmap_v2.get()); | 
| 1469 | 0 |         } else { | 
| 1470 | 0 |             tablet_meta()->delete_bitmap().subset( | 
| 1471 | 0 |                     retained_rowsets_to_seg_num, output_rowset->start_version(), | 
| 1472 | 0 |                     output_rowset->end_version(), delete_bitmap_v2.get()); | 
| 1473 | 0 |         } | 
| 1474 | 0 |     } | 
| 1475 | 0 |     std::optional<StorageResource> storage_resource; | 
| 1476 | 0 |     auto storage_resource_result = output_rowset->rowset_meta()->remote_storage_resource(); | 
| 1477 | 0 |     if (storage_resource_result) { | 
| 1478 | 0 |         storage_resource = *storage_resource_result.value(); | 
| 1479 | 0 |     } | 
| 1480 | 0 |     auto st = _engine.meta_mgr().update_delete_bitmap( | 
| 1481 | 0 |             *this, -1, initiator, output_rowset_delete_bitmap.get(), delete_bitmap_v2.get(), | 
| 1482 | 0 |             output_rowset->rowset_id().to_string(), storage_resource, store_version); | 
| 1483 | 0 |     int64_t t6 = MonotonicMicros(); | 
| 1484 | 0 |     LOG(INFO) << "calc_delete_bitmap_for_compaction, tablet_id=" << tablet_id() | 
| 1485 | 0 |               << ", get lock cost " << (t2 - t1) << " us, sync rowsets cost " << (t3 - t2) | 
| 1486 | 0 |               << " us, calc delete bitmap cost " << (t4 - t3) << " us, check rowid conversion cost " | 
| 1487 | 0 |               << (t5 - t4) << " us, store delete bitmap cost " << (t6 - t5) | 
| 1488 | 0 |               << " us, st=" << st.to_string() << ". store_version=" << store_version | 
| 1489 | 0 |               << ", calculated delete bitmap size=" << delete_bitmap_size | 
| 1490 | 0 |               << ", update delete bitmap size=" | 
| 1491 | 0 |               << output_rowset_delete_bitmap->delete_bitmap.size(); | 
| 1492 | 0 |     return st; | 
| 1493 | 0 | } | 
| 1494 |  |  | 
| 1495 |  | void CloudTablet::agg_delete_bitmap_for_compaction( | 
| 1496 |  |         int64_t start_version, int64_t end_version, const std::vector<RowsetSharedPtr>& pre_rowsets, | 
| 1497 |  |         DeleteBitmapPtr& new_delete_bitmap, | 
| 1498 | 0 |         std::map<std::string, int64_t>& pre_rowset_to_versions) { | 
| 1499 | 0 |     for (auto& rowset : pre_rowsets) { | 
| 1500 | 0 |         for (uint32_t seg_id = 0; seg_id < rowset->num_segments(); ++seg_id) { | 
| 1501 | 0 |             auto d = tablet_meta()->delete_bitmap().get_agg_without_cache( | 
| 1502 | 0 |                     {rowset->rowset_id(), seg_id, end_version}, start_version); | 
| 1503 | 0 |             if (d->isEmpty()) { | 
| 1504 | 0 |                 continue; | 
| 1505 | 0 |             } | 
| 1506 | 0 |             VLOG_DEBUG << "agg delete bitmap for tablet_id=" << tablet_id() | 
| 1507 | 0 |                        << ", rowset_id=" << rowset->rowset_id() << ", seg_id=" << seg_id | 
| 1508 | 0 |                        << ", rowset_version=" << rowset->version().to_string() | 
| 1509 | 0 |                        << ". compaction start_version=" << start_version | 
| 1510 | 0 |                        << ", end_version=" << end_version | 
| 1511 | 0 |                        << ". delete_bitmap cardinality=" << d->cardinality(); | 
| 1512 | 0 |             DeleteBitmap::BitmapKey end_key {rowset->rowset_id(), seg_id, end_version}; | 
| 1513 | 0 |             new_delete_bitmap->set(end_key, *d); | 
| 1514 | 0 |             pre_rowset_to_versions[rowset->rowset_id().to_string()] = rowset->version().second; | 
| 1515 | 0 |         } | 
| 1516 | 0 |     } | 
| 1517 | 0 | } | 
| 1518 |  |  | 
| 1519 | 0 | Status CloudTablet::sync_meta() { | 
| 1520 | 0 |     if (!config::enable_file_cache) { | 
| 1521 | 0 |         return Status::OK(); | 
| 1522 | 0 |     } | 
| 1523 |  |  | 
| 1524 | 0 |     TabletMetaSharedPtr tablet_meta; | 
| 1525 | 0 |     auto st = _engine.meta_mgr().get_tablet_meta(tablet_id(), &tablet_meta); | 
| 1526 | 0 |     if (!st.ok()) { | 
| 1527 | 0 |         if (st.is<ErrorCode::NOT_FOUND>()) { | 
| 1528 | 0 |             clear_cache(); | 
| 1529 | 0 |         } | 
| 1530 | 0 |         return st; | 
| 1531 | 0 |     } | 
| 1532 |  |  | 
| 1533 | 0 |     auto new_ttl_seconds = tablet_meta->ttl_seconds(); | 
| 1534 | 0 |     if (_tablet_meta->ttl_seconds() != new_ttl_seconds) { | 
| 1535 | 0 |         _tablet_meta->set_ttl_seconds(new_ttl_seconds); | 
| 1536 | 0 |         int64_t cur_time = UnixSeconds(); | 
| 1537 | 0 |         std::shared_lock rlock(_meta_lock); | 
| 1538 | 0 |         for (auto& [_, rs] : _rs_version_map) { | 
| 1539 | 0 |             for (int seg_id = 0; seg_id < rs->num_segments(); ++seg_id) { | 
| 1540 | 0 |                 int64_t new_expiration_time = | 
| 1541 | 0 |                         new_ttl_seconds + rs->rowset_meta()->newest_write_timestamp(); | 
| 1542 | 0 |                 new_expiration_time = new_expiration_time > cur_time ? new_expiration_time : 0; | 
| 1543 | 0 |                 auto file_key = Segment::file_cache_key(rs->rowset_id().to_string(), seg_id); | 
| 1544 | 0 |                 auto* file_cache = io::FileCacheFactory::instance()->get_by_path(file_key); | 
| 1545 | 0 |                 file_cache->modify_expiration_time(file_key, new_expiration_time); | 
| 1546 | 0 |             } | 
| 1547 | 0 |         } | 
| 1548 | 0 |     } | 
| 1549 |  | 
 | 
| 1550 | 0 |     auto new_compaction_policy = tablet_meta->compaction_policy(); | 
| 1551 | 0 |     if (_tablet_meta->compaction_policy() != new_compaction_policy) { | 
| 1552 | 0 |         _tablet_meta->set_compaction_policy(new_compaction_policy); | 
| 1553 | 0 |     } | 
| 1554 | 0 |     auto new_time_series_compaction_goal_size_mbytes = | 
| 1555 | 0 |             tablet_meta->time_series_compaction_goal_size_mbytes(); | 
| 1556 | 0 |     if (_tablet_meta->time_series_compaction_goal_size_mbytes() != | 
| 1557 | 0 |         new_time_series_compaction_goal_size_mbytes) { | 
| 1558 | 0 |         _tablet_meta->set_time_series_compaction_goal_size_mbytes( | 
| 1559 | 0 |                 new_time_series_compaction_goal_size_mbytes); | 
| 1560 | 0 |     } | 
| 1561 | 0 |     auto new_time_series_compaction_file_count_threshold = | 
| 1562 | 0 |             tablet_meta->time_series_compaction_file_count_threshold(); | 
| 1563 | 0 |     if (_tablet_meta->time_series_compaction_file_count_threshold() != | 
| 1564 | 0 |         new_time_series_compaction_file_count_threshold) { | 
| 1565 | 0 |         _tablet_meta->set_time_series_compaction_file_count_threshold( | 
| 1566 | 0 |                 new_time_series_compaction_file_count_threshold); | 
| 1567 | 0 |     } | 
| 1568 | 0 |     auto new_time_series_compaction_time_threshold_seconds = | 
| 1569 | 0 |             tablet_meta->time_series_compaction_time_threshold_seconds(); | 
| 1570 | 0 |     if (_tablet_meta->time_series_compaction_time_threshold_seconds() != | 
| 1571 | 0 |         new_time_series_compaction_time_threshold_seconds) { | 
| 1572 | 0 |         _tablet_meta->set_time_series_compaction_time_threshold_seconds( | 
| 1573 | 0 |                 new_time_series_compaction_time_threshold_seconds); | 
| 1574 | 0 |     } | 
| 1575 | 0 |     auto new_time_series_compaction_empty_rowsets_threshold = | 
| 1576 | 0 |             tablet_meta->time_series_compaction_empty_rowsets_threshold(); | 
| 1577 | 0 |     if (_tablet_meta->time_series_compaction_empty_rowsets_threshold() != | 
| 1578 | 0 |         new_time_series_compaction_empty_rowsets_threshold) { | 
| 1579 | 0 |         _tablet_meta->set_time_series_compaction_empty_rowsets_threshold( | 
| 1580 | 0 |                 new_time_series_compaction_empty_rowsets_threshold); | 
| 1581 | 0 |     } | 
| 1582 | 0 |     auto new_time_series_compaction_level_threshold = | 
| 1583 | 0 |             tablet_meta->time_series_compaction_level_threshold(); | 
| 1584 | 0 |     if (_tablet_meta->time_series_compaction_level_threshold() != | 
| 1585 | 0 |         new_time_series_compaction_level_threshold) { | 
| 1586 | 0 |         _tablet_meta->set_time_series_compaction_level_threshold( | 
| 1587 | 0 |                 new_time_series_compaction_level_threshold); | 
| 1588 | 0 |     } | 
| 1589 |  | 
 | 
| 1590 | 0 |     return Status::OK(); | 
| 1591 | 0 | } | 
| 1592 |  |  | 
| 1593 | 0 | void CloudTablet::build_tablet_report_info(TTabletInfo* tablet_info) { | 
| 1594 | 0 |     std::shared_lock rdlock(_meta_lock); | 
| 1595 | 0 |     tablet_info->__set_total_version_count(_tablet_meta->version_count()); | 
| 1596 | 0 |     tablet_info->__set_tablet_id(_tablet_meta->tablet_id()); | 
| 1597 |  |     // Currently, this information will not be used by the cloud report, | 
| 1598 |  |     // but it may be used in the future. | 
| 1599 | 0 | } | 
| 1600 |  |  | 
| 1601 |  | Status CloudTablet::check_delete_bitmap_cache(int64_t txn_id, | 
| 1602 | 0 |                                               DeleteBitmap* expected_delete_bitmap) { | 
| 1603 | 0 |     DeleteBitmapPtr cached_delete_bitmap; | 
| 1604 | 0 |     CloudStorageEngine& engine = ExecEnv::GetInstance()->storage_engine().to_cloud(); | 
| 1605 | 0 |     Status st = engine.txn_delete_bitmap_cache().get_delete_bitmap( | 
| 1606 | 0 |             txn_id, tablet_id(), &cached_delete_bitmap, nullptr, nullptr); | 
| 1607 | 0 |     if (st.ok()) { | 
| 1608 | 0 |         bool res = (expected_delete_bitmap->cardinality() == cached_delete_bitmap->cardinality()); | 
| 1609 | 0 |         auto msg = fmt::format( | 
| 1610 | 0 |                 "delete bitmap cache check failed, cur_cardinality={}, cached_cardinality={}" | 
| 1611 | 0 |                 "txn_id={}, tablet_id={}", | 
| 1612 | 0 |                 expected_delete_bitmap->cardinality(), cached_delete_bitmap->cardinality(), txn_id, | 
| 1613 | 0 |                 tablet_id()); | 
| 1614 | 0 |         if (!res) { | 
| 1615 | 0 |             DCHECK(res) << msg; | 
| 1616 | 0 |             return Status::InternalError<false>(msg); | 
| 1617 | 0 |         } | 
| 1618 | 0 |     } | 
| 1619 | 0 |     return Status::OK(); | 
| 1620 | 0 | } | 
| 1621 |  |  | 
| 1622 | 34 | WarmUpState CloudTablet::get_rowset_warmup_state(RowsetId rowset_id) { | 
| 1623 | 34 |     std::shared_lock rlock(_meta_lock); | 
| 1624 | 34 |     if (!_rowset_warm_up_states.contains(rowset_id)) { | 
| 1625 | 1 |         return {.trigger_source = WarmUpTriggerSource::NONE, .progress = WarmUpProgress::NONE}; | 
| 1626 | 1 |     } | 
| 1627 | 33 |     auto& warmup_info = _rowset_warm_up_states[rowset_id]; | 
| 1628 | 33 |     warmup_info.update_state(); | 
| 1629 | 33 |     return warmup_info.state; | 
| 1630 | 34 | } | 
| 1631 |  |  | 
| 1632 |  | bool CloudTablet::add_rowset_warmup_state(const RowsetMeta& rowset, WarmUpTriggerSource source, | 
| 1633 | 34 |                                           std::chrono::steady_clock::time_point start_tp) { | 
| 1634 | 34 |     std::lock_guard wlock(_meta_lock); | 
| 1635 | 34 |     return add_rowset_warmup_state_unlocked(rowset, source, start_tp); | 
| 1636 | 34 | } | 
| 1637 |  |  | 
| 1638 |  | bool CloudTablet::update_rowset_warmup_state_inverted_idx_num(WarmUpTriggerSource source, | 
| 1639 | 5 |                                                               RowsetId rowset_id, int64_t delta) { | 
| 1640 | 5 |     std::lock_guard wlock(_meta_lock); | 
| 1641 | 5 |     return update_rowset_warmup_state_inverted_idx_num_unlocked(source, rowset_id, delta); | 
| 1642 | 5 | } | 
| 1643 |  |  | 
| 1644 |  | bool CloudTablet::update_rowset_warmup_state_inverted_idx_num_unlocked(WarmUpTriggerSource source, | 
| 1645 |  |                                                                        RowsetId rowset_id, | 
| 1646 | 5 |                                                                        int64_t delta) { | 
| 1647 | 5 |     auto it = _rowset_warm_up_states.find(rowset_id); | 
| 1648 | 5 |     if (it == _rowset_warm_up_states.end()) { | 
| 1649 | 0 |         return false; | 
| 1650 | 0 |     } | 
| 1651 | 5 |     if (it->second.state.trigger_source != source) { | 
| 1652 |  |         // Only the same trigger source can update the state | 
| 1653 | 2 |         return false; | 
| 1654 | 2 |     } | 
| 1655 | 3 |     it->second.num_inverted_idx += delta; | 
| 1656 | 3 |     return true; | 
| 1657 | 5 | } | 
| 1658 |  |  | 
| 1659 |  | bool CloudTablet::add_rowset_warmup_state_unlocked(const RowsetMeta& rowset, | 
| 1660 |  |                                                    WarmUpTriggerSource source, | 
| 1661 | 34 |                                                    std::chrono::steady_clock::time_point start_tp) { | 
| 1662 | 34 |     auto rowset_id = rowset.rowset_id(); | 
| 1663 |  |  | 
| 1664 |  |     // Check if rowset already has warmup state | 
| 1665 | 34 |     if (_rowset_warm_up_states.contains(rowset_id)) { | 
| 1666 | 10 |         auto existing_state = _rowset_warm_up_states[rowset_id].state; | 
| 1667 |  |  | 
| 1668 |  |         // For job-triggered warmup (one-time and periodic warmup), allow it to proceed | 
| 1669 |  |         // except when there's already another job-triggered warmup in progress | 
| 1670 | 10 |         if (source == WarmUpTriggerSource::JOB) { | 
| 1671 | 5 |             if (existing_state.trigger_source == WarmUpTriggerSource::JOB && | 
| 1672 | 5 |                 existing_state.progress == WarmUpProgress::DOING) { | 
| 1673 |  |                 // Same job type already in progress, skip to avoid duplicate warmup | 
| 1674 | 1 |                 return false; | 
| 1675 | 1 |             } | 
| 1676 | 5 |         } else { | 
| 1677 |  |             // For non-job warmup (EVENT_DRIVEN, SYNC_ROWSET), skip if any warmup exists | 
| 1678 | 5 |             return false; | 
| 1679 | 5 |         } | 
| 1680 | 10 |     } | 
| 1681 |  |  | 
| 1682 | 28 |     if (source == WarmUpTriggerSource::JOB) { | 
| 1683 | 9 |         g_file_cache_warm_up_rowset_triggered_by_job_num << 1; | 
| 1684 | 19 |     } else if (source == WarmUpTriggerSource::SYNC_ROWSET) { | 
| 1685 | 6 |         g_file_cache_warm_up_rowset_triggered_by_sync_rowset_num << 1; | 
| 1686 | 13 |     } else if (source == WarmUpTriggerSource::EVENT_DRIVEN) { | 
| 1687 | 13 |         g_file_cache_warm_up_rowset_triggered_by_event_driven_num << 1; | 
| 1688 | 13 |     } | 
| 1689 | 28 |     _rowset_warm_up_states[rowset_id] = { | 
| 1690 | 28 |             .state = {.trigger_source = source, | 
| 1691 | 28 |                       .progress = (rowset.num_segments() == 0 ? WarmUpProgress::DONE | 
| 1692 | 28 |                                                               : WarmUpProgress::DOING)}, | 
| 1693 | 28 |             .num_segments = rowset.num_segments(), | 
| 1694 | 28 |             .start_tp = start_tp}; | 
| 1695 | 28 |     return true; | 
| 1696 | 34 | } | 
| 1697 |  |  | 
| 1698 | 51 | void CloudTablet::RowsetWarmUpInfo::update_state() { | 
| 1699 | 51 |     if (has_finished()) { | 
| 1700 | 14 |         g_file_cache_warm_up_rowset_complete_num << 1; | 
| 1701 | 14 |         auto cost = std::chrono::duration_cast<std::chrono::milliseconds>( | 
| 1702 | 14 |                             std::chrono::steady_clock::now() - start_tp) | 
| 1703 | 14 |                             .count(); | 
| 1704 | 14 |         g_file_cache_warm_up_rowset_all_segments_latency << cost; | 
| 1705 | 14 |         state.progress = WarmUpProgress::DONE; | 
| 1706 | 14 |     } | 
| 1707 | 51 | } | 
| 1708 |  |  | 
| 1709 |  | WarmUpState CloudTablet::complete_rowset_segment_warmup(WarmUpTriggerSource trigger_source, | 
| 1710 |  |                                                         RowsetId rowset_id, Status status, | 
| 1711 |  |                                                         int64_t segment_num, | 
| 1712 | 21 |                                                         int64_t inverted_idx_num) { | 
| 1713 | 21 |     std::lock_guard wlock(_meta_lock); | 
| 1714 | 21 |     auto it = _rowset_warm_up_states.find(rowset_id); | 
| 1715 | 21 |     if (it == _rowset_warm_up_states.end()) { | 
| 1716 | 1 |         return {.trigger_source = WarmUpTriggerSource::NONE, .progress = WarmUpProgress::NONE}; | 
| 1717 | 1 |     } | 
| 1718 | 20 |     auto& warmup_info = it->second; | 
| 1719 | 20 |     if (warmup_info.state.trigger_source != trigger_source) { | 
| 1720 |  |         // Only the same trigger source can update the state | 
| 1721 | 2 |         return warmup_info.state; | 
| 1722 | 2 |     } | 
| 1723 | 18 |     VLOG_DEBUG << "complete rowset segment warmup for rowset " << rowset_id << ", " << status; | 
| 1724 | 18 |     if (segment_num > 0) { | 
| 1725 | 16 |         g_file_cache_warm_up_segment_complete_num << segment_num; | 
| 1726 | 16 |         if (!status.ok()) { | 
| 1727 | 1 |             g_file_cache_warm_up_segment_failed_num << segment_num; | 
| 1728 | 1 |         } | 
| 1729 | 16 |     } | 
| 1730 | 18 |     if (inverted_idx_num > 0) { | 
| 1731 | 2 |         g_file_cache_warm_up_inverted_idx_complete_num << inverted_idx_num; | 
| 1732 | 2 |         if (!status.ok()) { | 
| 1733 | 0 |             g_file_cache_warm_up_inverted_idx_failed_num << inverted_idx_num; | 
| 1734 | 0 |         } | 
| 1735 | 2 |     } | 
| 1736 | 18 |     warmup_info.done(segment_num, inverted_idx_num); | 
| 1737 | 18 |     return warmup_info.state; | 
| 1738 | 20 | } | 
| 1739 |  |  | 
| 1740 | 200 | bool CloudTablet::is_rowset_warmed_up(const RowsetId& rowset_id) const { | 
| 1741 | 200 |     auto it = _rowset_warm_up_states.find(rowset_id); | 
| 1742 | 200 |     if (it == _rowset_warm_up_states.end()) { | 
| 1743 | 58 |         return false; | 
| 1744 | 58 |     } | 
| 1745 | 142 |     return it->second.state.progress == WarmUpProgress::DONE; | 
| 1746 | 200 | } | 
| 1747 |  |  | 
| 1748 | 598 | void CloudTablet::add_warmed_up_rowset(const RowsetId& rowset_id) { | 
| 1749 | 598 |     _rowset_warm_up_states[rowset_id] = { | 
| 1750 | 598 |             .state = {.trigger_source = WarmUpTriggerSource::SYNC_ROWSET, | 
| 1751 | 598 |                       .progress = WarmUpProgress::DONE}, | 
| 1752 | 598 |             .num_segments = 1, | 
| 1753 | 598 |             .start_tp = std::chrono::steady_clock::now()}; | 
| 1754 | 598 | } | 
| 1755 |  |  | 
| 1756 |  | #include "common/compile_check_end.h" | 
| 1757 |  | } // namespace doris |