be/src/runtime/fragment_mgr.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include "runtime/fragment_mgr.h" |
19 | | |
20 | | #include <brpc/controller.h> |
21 | | #include <bvar/latency_recorder.h> |
22 | | #include <fmt/format.h> |
23 | | #include <gen_cpp/DorisExternalService_types.h> |
24 | | #include <gen_cpp/FrontendService.h> |
25 | | #include <gen_cpp/FrontendService_types.h> |
26 | | #include <gen_cpp/HeartbeatService_types.h> |
27 | | #include <gen_cpp/Metrics_types.h> |
28 | | #include <gen_cpp/PaloInternalService_types.h> |
29 | | #include <gen_cpp/PlanNodes_types.h> |
30 | | #include <gen_cpp/Planner_types.h> |
31 | | #include <gen_cpp/QueryPlanExtra_types.h> |
32 | | #include <gen_cpp/RuntimeProfile_types.h> |
33 | | #include <gen_cpp/Types_types.h> |
34 | | #include <gen_cpp/internal_service.pb.h> |
35 | | #include <pthread.h> |
36 | | #include <sys/time.h> |
37 | | #include <thrift/TApplicationException.h> |
38 | | #include <thrift/Thrift.h> |
39 | | #include <thrift/protocol/TDebugProtocol.h> |
40 | | #include <thrift/transport/TTransportException.h> |
41 | | #include <unistd.h> |
42 | | |
43 | | #include <algorithm> |
44 | | #include <cstddef> |
45 | | #include <ctime> |
46 | | |
47 | | // IWYU pragma: no_include <bits/chrono.h> |
48 | | #include <chrono> // IWYU pragma: keep |
49 | | #include <cstdint> |
50 | | #include <map> |
51 | | #include <memory> |
52 | | #include <mutex> |
53 | | #include <sstream> |
54 | | #include <unordered_map> |
55 | | #include <unordered_set> |
56 | | #include <utility> |
57 | | |
58 | | #include "common/config.h" |
59 | | #include "common/exception.h" |
60 | | #include "common/logging.h" |
61 | | #include "common/metrics/doris_metrics.h" |
62 | | #include "common/object_pool.h" |
63 | | #include "common/status.h" |
64 | | #include "common/utils.h" |
65 | | #include "core/data_type/primitive_type.h" |
66 | | #include "exec/pipeline/pipeline_fragment_context.h" |
67 | | #include "exec/runtime_filter/runtime_filter_consumer.h" |
68 | | #include "exec/runtime_filter/runtime_filter_mgr.h" |
69 | | #include "io/fs/stream_load_pipe.h" |
70 | | #include "load/stream_load/new_load_stream_mgr.h" |
71 | | #include "load/stream_load/stream_load_context.h" |
72 | | #include "load/stream_load/stream_load_executor.h" |
73 | | #include "runtime/descriptors.h" |
74 | | #include "runtime/exec_env.h" |
75 | | #include "runtime/frontend_info.h" |
76 | | #include "runtime/query_context.h" |
77 | | #include "runtime/runtime_profile.h" |
78 | | #include "runtime/runtime_query_statistics_mgr.h" |
79 | | #include "runtime/runtime_state.h" |
80 | | #include "runtime/thread_context.h" |
81 | | #include "runtime/workload_group/workload_group.h" |
82 | | #include "runtime/workload_group/workload_group_manager.h" |
83 | | #include "service/backend_options.h" |
84 | | #include "storage/id_manager.h" |
85 | | #include "util/brpc_client_cache.h" |
86 | | #include "util/client_cache.h" |
87 | | #include "util/debug_points.h" |
88 | | #include "util/debug_util.h" |
89 | | #include "util/network_util.h" |
90 | | #include "util/thread.h" |
91 | | #include "util/threadpool.h" |
92 | | #include "util/thrift_util.h" |
93 | | #include "util/uid_util.h" |
94 | | |
95 | | namespace doris { |
96 | | #include "common/compile_check_begin.h" |
97 | | |
98 | | DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(fragment_instance_count, MetricUnit::NOUNIT); |
99 | | DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(timeout_canceled_fragment_count, MetricUnit::NOUNIT); |
100 | | |
101 | | bvar::LatencyRecorder g_fragmentmgr_prepare_latency("doris_FragmentMgr", "prepare"); |
102 | | |
103 | | bvar::Adder<uint64_t> g_fragment_executing_count("fragment_executing_count"); |
104 | | bvar::Status<uint64_t> g_fragment_last_active_time( |
105 | | "fragment_last_active_time", duration_cast<std::chrono::milliseconds>( |
106 | | std::chrono::system_clock::now().time_since_epoch()) |
107 | | .count()); |
108 | 1.26k | |
109 | 1.26k | uint64_t get_fragment_executing_count() { |
110 | 1.26k | return g_fragment_executing_count.get_value(); |
111 | 1.26k | } |
112 | 1.26k | uint64_t get_fragment_last_active_time() { |
113 | 1.26k | return g_fragment_last_active_time.get_value(); |
114 | | } |
115 | 2.51k | |
116 | 2.51k | std::string to_load_error_http_path(const std::string& file_name) { |
117 | 1.93k | if (file_name.empty()) { |
118 | 1.93k | return ""; |
119 | 581 | } |
120 | 555 | if (file_name.compare(0, 4, "http") == 0) { |
121 | 555 | return file_name; |
122 | 26 | } |
123 | 26 | std::stringstream url; |
124 | 26 | url << "http://" << get_host_port(BackendOptions::get_localhost(), config::webserver_port) |
125 | 26 | << "/api/_load_error_log?" |
126 | 26 | << "file=" << file_name; |
127 | 581 | return url.str(); |
128 | | } |
129 | | |
130 | | using apache::thrift::TException; |
131 | | using apache::thrift::transport::TTransportException; |
132 | | |
133 | 0 | static Status _do_fetch_running_queries_rpc(const FrontendInfo& fe_info, |
134 | 0 | std::unordered_set<TUniqueId>& query_set) { |
135 | 0 | TFetchRunningQueriesResult rpc_result; |
136 | | TFetchRunningQueriesRequest rpc_request; |
137 | 0 |
|
138 | 0 | Status client_status; |
139 | 0 | const int32_t timeout_ms = 3 * 1000; |
140 | 0 | FrontendServiceConnection rpc_client(ExecEnv::GetInstance()->frontend_client_cache(), |
141 | 0 | fe_info.info.coordinator_address, timeout_ms, |
142 | | &client_status); |
143 | 0 | // Abort this fe. |
144 | 0 | if (!client_status.ok()) { |
145 | 0 | LOG_WARNING("Failed to get client for {}, reason is {}", |
146 | 0 | PrintThriftNetworkAddress(fe_info.info.coordinator_address), |
147 | 0 | client_status.to_string()); |
148 | 0 | return Status::InternalError("Failed to get client for {}, reason is {}", |
149 | 0 | PrintThriftNetworkAddress(fe_info.info.coordinator_address), |
150 | 0 | client_status.to_string()); |
151 | | } |
152 | | |
153 | 0 | // do rpc |
154 | 0 | try { |
155 | 0 | try { |
156 | 0 | rpc_client->fetchRunningQueries(rpc_result, rpc_request); |
157 | 0 | } catch (const apache::thrift::transport::TTransportException& e) { |
158 | 0 | LOG_WARNING("Transport exception reason: {}, reopening", e.what()); |
159 | 0 | client_status = rpc_client.reopen(config::thrift_rpc_timeout_ms); |
160 | 0 | if (!client_status.ok()) { |
161 | 0 | LOG_WARNING("Reopen failed, reason: {}", client_status.to_string_no_stack()); |
162 | 0 | return Status::InternalError("Reopen failed, reason: {}", |
163 | 0 | client_status.to_string_no_stack()); |
164 | | } |
165 | 0 |
|
166 | 0 | rpc_client->fetchRunningQueries(rpc_result, rpc_request); |
167 | 0 | } |
168 | | } catch (apache::thrift::TException& e) { |
169 | 0 | // During upgrading cluster or meet any other network error. |
170 | 0 | LOG_WARNING("Failed to fetch running queries from {}, reason: {}", |
171 | 0 | PrintThriftNetworkAddress(fe_info.info.coordinator_address), e.what()); |
172 | 0 | return Status::InternalError("Failed to fetch running queries from {}, reason: {}", |
173 | 0 | PrintThriftNetworkAddress(fe_info.info.coordinator_address), |
174 | 0 | e.what()); |
175 | | } |
176 | | |
177 | 0 | // Avoid logic error in frontend. |
178 | 0 | if (!rpc_result.__isset.status || rpc_result.status.status_code != TStatusCode::OK) { |
179 | 0 | LOG_WARNING("Failed to fetch running queries from {}, reason: {}", |
180 | 0 | PrintThriftNetworkAddress(fe_info.info.coordinator_address), |
181 | 0 | doris::to_string(rpc_result.status.status_code)); |
182 | 0 | return Status::InternalError("Failed to fetch running queries from {}, reason: {}", |
183 | 0 | PrintThriftNetworkAddress(fe_info.info.coordinator_address), |
184 | 0 | doris::to_string(rpc_result.status.status_code)); |
185 | | } |
186 | 0 |
|
187 | 0 | if (!rpc_result.__isset.running_queries) { |
188 | 0 | return Status::InternalError("Failed to fetch running queries from {}, reason: {}", |
189 | 0 | PrintThriftNetworkAddress(fe_info.info.coordinator_address), |
190 | 0 | "running_queries is not set"); |
191 | | } |
192 | 0 |
|
193 | 0 | query_set = std::unordered_set<TUniqueId>(rpc_result.running_queries.begin(), |
194 | 0 | rpc_result.running_queries.end()); |
195 | 0 | return Status::OK(); |
196 | | }; |
197 | 0 |
|
198 | 0 | static std::map<int64_t, std::unordered_set<TUniqueId>> _get_all_running_queries_from_fe() { |
199 | 0 | const std::map<TNetworkAddress, FrontendInfo>& running_fes = |
200 | | ExecEnv::GetInstance()->get_running_frontends(); |
201 | 0 |
|
202 | 0 | std::map<int64_t, std::unordered_set<TUniqueId>> result; |
203 | | std::vector<FrontendInfo> qualified_fes; |
204 | 0 |
|
205 | | for (const auto& fe : running_fes) { |
206 | 0 | // Only consider normal frontend. |
207 | 0 | if (fe.first.port != 0 && fe.second.info.process_uuid != 0) { |
208 | 0 | qualified_fes.push_back(fe.second); |
209 | 0 | } else { |
210 | 0 | return {}; |
211 | 0 | } |
212 | | } |
213 | 0 |
|
214 | 0 | for (const auto& fe_addr : qualified_fes) { |
215 | 0 | const int64_t process_uuid = fe_addr.info.process_uuid; |
216 | 0 | std::unordered_set<TUniqueId> query_set; |
217 | 0 | Status st = _do_fetch_running_queries_rpc(fe_addr, query_set); |
218 | | if (!st.ok()) { |
219 | 0 | // Empty result, cancel worker will not do anything |
220 | 0 | return {}; |
221 | | } |
222 | | |
223 | 0 | // frontend_info and process_uuid has been checked in rpc threads. |
224 | 0 | result[process_uuid] = query_set; |
225 | | } |
226 | 0 |
|
227 | 0 | return result; |
228 | | } |
229 | 1.90M | |
230 | 1.90M | inline uint32_t get_map_id(const TUniqueId& query_id, size_t capacity) { |
231 | 1.90M | uint32_t value = HashUtil::hash(&query_id.lo, 8, 0); |
232 | 1.90M | value = HashUtil::hash(&query_id.hi, 8, value); |
233 | 1.90M | return value % capacity; |
234 | | } |
235 | 1.34M | |
236 | 1.34M | inline uint32_t get_map_id(std::pair<TUniqueId, int> key, size_t capacity) { |
237 | 1.34M | uint32_t value = HashUtil::hash(&key.first.lo, 8, 0); |
238 | 1.34M | value = HashUtil::hash(&key.first.hi, 8, value); |
239 | 1.34M | return value % capacity; |
240 | | } |
241 | | |
242 | 42 | template <typename Key, typename Value, typename ValueType> |
243 | 42 | ConcurrentContextMap<Key, Value, ValueType>::ConcurrentContextMap() { |
244 | 5.41k | _internal_map.resize(config::num_query_ctx_map_partitions); |
245 | 5.37k | for (size_t i = 0; i < config::num_query_ctx_map_partitions; i++) { |
246 | 5.37k | _internal_map[i] = {std::make_unique<std::shared_mutex>(), |
247 | 5.37k | phmap::flat_hash_map<Key, Value>()}; |
248 | 42 | } _ZN5doris20ConcurrentContextMapISt4pairINS_9TUniqueIdEiESt10shared_ptrINS_23PipelineFragmentContextEES5_EC2Ev Line | Count | Source | 242 | 14 | template <typename Key, typename Value, typename ValueType> | 243 | 14 | ConcurrentContextMap<Key, Value, ValueType>::ConcurrentContextMap() { | 244 | 1.80k | _internal_map.resize(config::num_query_ctx_map_partitions); | 245 | 1.79k | for (size_t i = 0; i < config::num_query_ctx_map_partitions; i++) { | 246 | 1.79k | _internal_map[i] = {std::make_unique<std::shared_mutex>(), | 247 | 1.79k | phmap::flat_hash_map<Key, Value>()}; | 248 | 14 | } |
_ZN5doris20ConcurrentContextMapINS_9TUniqueIdESt8weak_ptrINS_12QueryContextEES3_EC2Ev Line | Count | Source | 242 | 14 | template <typename Key, typename Value, typename ValueType> | 243 | 14 | ConcurrentContextMap<Key, Value, ValueType>::ConcurrentContextMap() { | 244 | 1.80k | _internal_map.resize(config::num_query_ctx_map_partitions); | 245 | 1.79k | for (size_t i = 0; i < config::num_query_ctx_map_partitions; i++) { | 246 | 1.79k | _internal_map[i] = {std::make_unique<std::shared_mutex>(), | 247 | 1.79k | phmap::flat_hash_map<Key, Value>()}; | 248 | 14 | } |
_ZN5doris20ConcurrentContextMapINS_9TUniqueIdESt10shared_ptrINS_12QueryContextEES3_EC2Ev Line | Count | Source | 242 | 14 | template <typename Key, typename Value, typename ValueType> | 243 | 14 | ConcurrentContextMap<Key, Value, ValueType>::ConcurrentContextMap() { | 244 | 1.80k | _internal_map.resize(config::num_query_ctx_map_partitions); | 245 | 1.79k | for (size_t i = 0; i < config::num_query_ctx_map_partitions; i++) { | 246 | 1.79k | _internal_map[i] = {std::make_unique<std::shared_mutex>(), | 247 | 1.79k | phmap::flat_hash_map<Key, Value>()}; | 248 | 14 | } |
|
249 | | } |
250 | | |
251 | 1.25M | template <typename Key, typename Value, typename ValueType> |
252 | 1.25M | Value ConcurrentContextMap<Key, Value, ValueType>::find(const Key& query_id) { |
253 | 1.25M | auto id = get_map_id(query_id, _internal_map.size()); |
254 | 1.25M | { |
255 | 1.25M | std::shared_lock lock(*_internal_map[id].first); |
256 | 1.25M | auto& map = _internal_map[id].second; |
257 | 1.25M | auto search = map.find(query_id); |
258 | 472k | if (search != map.end()) { |
259 | 472k | return search->second; |
260 | 779k | } |
261 | 1.25M | return std::shared_ptr<ValueType>(nullptr); |
262 | 1.25M | } _ZN5doris20ConcurrentContextMapINS_9TUniqueIdESt8weak_ptrINS_12QueryContextEES3_E4findERKS1_ Line | Count | Source | 251 | 801k | template <typename Key, typename Value, typename ValueType> | 252 | 801k | Value ConcurrentContextMap<Key, Value, ValueType>::find(const Key& query_id) { | 253 | 801k | auto id = get_map_id(query_id, _internal_map.size()); | 254 | 801k | { | 255 | 801k | std::shared_lock lock(*_internal_map[id].first); | 256 | 801k | auto& map = _internal_map[id].second; | 257 | 801k | auto search = map.find(query_id); | 258 | 466k | if (search != map.end()) { | 259 | 466k | return search->second; | 260 | 335k | } | 261 | 801k | return std::shared_ptr<ValueType>(nullptr); | 262 | 801k | } |
_ZN5doris20ConcurrentContextMapISt4pairINS_9TUniqueIdEiESt10shared_ptrINS_23PipelineFragmentContextEES5_E4findERKS3_ Line | Count | Source | 251 | 450k | template <typename Key, typename Value, typename ValueType> | 252 | 450k | Value ConcurrentContextMap<Key, Value, ValueType>::find(const Key& query_id) { | 253 | 450k | auto id = get_map_id(query_id, _internal_map.size()); | 254 | 450k | { | 255 | 450k | std::shared_lock lock(*_internal_map[id].first); | 256 | 450k | auto& map = _internal_map[id].second; | 257 | 450k | auto search = map.find(query_id); | 258 | 6.76k | if (search != map.end()) { | 259 | 6.76k | return search->second; | 260 | 443k | } | 261 | 450k | return std::shared_ptr<ValueType>(nullptr); | 262 | 450k | } |
|
263 | | } |
264 | | |
265 | | template <typename Key, typename Value, typename ValueType> |
266 | 288k | Status ConcurrentContextMap<Key, Value, ValueType>::apply_if_not_exists( |
267 | 288k | const Key& query_id, std::shared_ptr<ValueType>& query_ctx, ApplyFunction&& function) { |
268 | 288k | auto id = get_map_id(query_id, _internal_map.size()); |
269 | 288k | { |
270 | 288k | std::unique_lock lock(*_internal_map[id].first); |
271 | 288k | auto& map = _internal_map[id].second; |
272 | 288k | auto search = map.find(query_id); |
273 | 2 | if (search != map.end()) { |
274 | 2 | query_ctx = search->second.lock(); |
275 | 288k | } |
276 | 288k | if (!query_ctx) { |
277 | 288k | return function(map); |
278 | 18.4E | } |
279 | 288k | return Status::OK(); |
280 | 288k | } |
281 | | } |
282 | | |
283 | 1.26M | template <typename Key, typename Value, typename ValueType> |
284 | 1.26M | bool ConcurrentContextMap<Key, Value, ValueType>::erase(const Key& query_id) { |
285 | 1.26M | auto id = get_map_id(query_id, _internal_map.size()); |
286 | 1.26M | std::unique_lock lock(*_internal_map[id].first); |
287 | 1.26M | auto& map = _internal_map[id].second; |
288 | 1.26M | return map.erase(query_id) != 0; _ZN5doris20ConcurrentContextMapISt4pairINS_9TUniqueIdEiESt10shared_ptrINS_23PipelineFragmentContextEES5_E5eraseERKS3_ Line | Count | Source | 283 | 448k | template <typename Key, typename Value, typename ValueType> | 284 | 448k | bool ConcurrentContextMap<Key, Value, ValueType>::erase(const Key& query_id) { | 285 | 448k | auto id = get_map_id(query_id, _internal_map.size()); | 286 | 448k | std::unique_lock lock(*_internal_map[id].first); | 287 | 448k | auto& map = _internal_map[id].second; | 288 | 448k | return map.erase(query_id) != 0; |
_ZN5doris20ConcurrentContextMapINS_9TUniqueIdESt10shared_ptrINS_12QueryContextEES3_E5eraseERKS1_ Line | Count | Source | 283 | 406k | template <typename Key, typename Value, typename ValueType> | 284 | 406k | bool ConcurrentContextMap<Key, Value, ValueType>::erase(const Key& query_id) { | 285 | 406k | auto id = get_map_id(query_id, _internal_map.size()); | 286 | 406k | std::unique_lock lock(*_internal_map[id].first); | 287 | 406k | auto& map = _internal_map[id].second; | 288 | 406k | return map.erase(query_id) != 0; |
_ZN5doris20ConcurrentContextMapINS_9TUniqueIdESt8weak_ptrINS_12QueryContextEES3_E5eraseERKS1_ Line | Count | Source | 283 | 406k | template <typename Key, typename Value, typename ValueType> | 284 | 406k | bool ConcurrentContextMap<Key, Value, ValueType>::erase(const Key& query_id) { | 285 | 406k | auto id = get_map_id(query_id, _internal_map.size()); | 286 | 406k | std::unique_lock lock(*_internal_map[id].first); | 287 | 406k | auto& map = _internal_map[id].second; | 288 | 406k | return map.erase(query_id) != 0; |
|
289 | | } |
290 | | |
291 | | template <typename Key, typename Value, typename ValueType> |
292 | 449k | void ConcurrentContextMap<Key, Value, ValueType>::insert(const Key& query_id, |
293 | 449k | std::shared_ptr<ValueType> query_ctx) { |
294 | 449k | auto id = get_map_id(query_id, _internal_map.size()); |
295 | 449k | { |
296 | 449k | std::unique_lock lock(*_internal_map[id].first); |
297 | 449k | auto& map = _internal_map[id].second; |
298 | 449k | map.insert({query_id, query_ctx}); |
299 | 449k | } _ZN5doris20ConcurrentContextMapINS_9TUniqueIdESt10shared_ptrINS_12QueryContextEES3_E6insertERKS1_S4_ Line | Count | Source | 292 | 2.58k | void ConcurrentContextMap<Key, Value, ValueType>::insert(const Key& query_id, | 293 | 2.58k | std::shared_ptr<ValueType> query_ctx) { | 294 | 2.58k | auto id = get_map_id(query_id, _internal_map.size()); | 295 | 2.58k | { | 296 | 2.58k | std::unique_lock lock(*_internal_map[id].first); | 297 | 2.58k | auto& map = _internal_map[id].second; | 298 | 2.58k | map.insert({query_id, query_ctx}); | 299 | 2.58k | } |
_ZN5doris20ConcurrentContextMapISt4pairINS_9TUniqueIdEiESt10shared_ptrINS_23PipelineFragmentContextEES5_E6insertERKS3_S6_ Line | Count | Source | 292 | 446k | void ConcurrentContextMap<Key, Value, ValueType>::insert(const Key& query_id, | 293 | 446k | std::shared_ptr<ValueType> query_ctx) { | 294 | 446k | auto id = get_map_id(query_id, _internal_map.size()); | 295 | 446k | { | 296 | 446k | std::unique_lock lock(*_internal_map[id].first); | 297 | 446k | auto& map = _internal_map[id].second; | 298 | 446k | map.insert({query_id, query_ctx}); | 299 | 446k | } |
|
300 | | } |
301 | | |
302 | 30 | template <typename Key, typename Value, typename ValueType> |
303 | 3.84k | void ConcurrentContextMap<Key, Value, ValueType>::clear() { |
304 | 3.84k | for (auto& pair : _internal_map) { |
305 | 3.84k | std::unique_lock lock(*pair.first); |
306 | 3.84k | auto& map = pair.second; |
307 | 3.84k | map.clear(); |
308 | 30 | } _ZN5doris20ConcurrentContextMapINS_9TUniqueIdESt8weak_ptrINS_12QueryContextEES3_E5clearEv Line | Count | Source | 302 | 10 | template <typename Key, typename Value, typename ValueType> | 303 | 1.28k | void ConcurrentContextMap<Key, Value, ValueType>::clear() { | 304 | 1.28k | for (auto& pair : _internal_map) { | 305 | 1.28k | std::unique_lock lock(*pair.first); | 306 | 1.28k | auto& map = pair.second; | 307 | 1.28k | map.clear(); | 308 | 10 | } |
_ZN5doris20ConcurrentContextMapINS_9TUniqueIdESt10shared_ptrINS_12QueryContextEES3_E5clearEv Line | Count | Source | 302 | 10 | template <typename Key, typename Value, typename ValueType> | 303 | 1.28k | void ConcurrentContextMap<Key, Value, ValueType>::clear() { | 304 | 1.28k | for (auto& pair : _internal_map) { | 305 | 1.28k | std::unique_lock lock(*pair.first); | 306 | 1.28k | auto& map = pair.second; | 307 | 1.28k | map.clear(); | 308 | 10 | } |
_ZN5doris20ConcurrentContextMapISt4pairINS_9TUniqueIdEiESt10shared_ptrINS_23PipelineFragmentContextEES5_E5clearEv Line | Count | Source | 302 | 10 | template <typename Key, typename Value, typename ValueType> | 303 | 1.28k | void ConcurrentContextMap<Key, Value, ValueType>::clear() { | 304 | 1.28k | for (auto& pair : _internal_map) { | 305 | 1.28k | std::unique_lock lock(*pair.first); | 306 | 1.28k | auto& map = pair.second; | 307 | 1.28k | map.clear(); | 308 | 10 | } |
|
309 | | } |
310 | | |
311 | 14 | FragmentMgr::FragmentMgr(ExecEnv* exec_env) |
312 | 14 | : _exec_env(exec_env), _stop_background_threads_latch(1) { |
313 | 14 | _entity = DorisMetrics::instance()->metric_registry()->register_entity("FragmentMgr"); |
314 | | INT_UGAUGE_METRIC_REGISTER(_entity, timeout_canceled_fragment_count); |
315 | 14 | |
316 | 14 | auto s = Thread::create( |
317 | 14 | "FragmentMgr", "cancel_timeout_plan_fragment", [this]() { this->cancel_worker(); }, |
318 | 14 | &_cancel_thread); |
319 | | CHECK(s.ok()) << s.to_string(); |
320 | 14 | |
321 | 14 | s = ThreadPoolBuilder("FragmentMgrAsyncWorkThreadPool") |
322 | 14 | .set_min_threads(config::fragment_mgr_async_work_pool_thread_num_min) |
323 | 14 | .set_max_threads(config::fragment_mgr_async_work_pool_thread_num_max) |
324 | 14 | .set_max_queue_size(config::fragment_mgr_async_work_pool_queue_size) |
325 | 14 | .build(&_thread_pool); |
326 | 14 | CHECK(s.ok()) << s.to_string(); |
327 | | } |
328 | 10 | |
329 | | FragmentMgr::~FragmentMgr() = default; |
330 | 10 | |
331 | 10 | void FragmentMgr::stop() { |
332 | 10 | DEREGISTER_HOOK_METRIC(fragment_instance_count); |
333 | 10 | _stop_background_threads_latch.count_down(); |
334 | 10 | if (_cancel_thread) { |
335 | 10 | _cancel_thread->join(); |
336 | | } |
337 | 10 | |
338 | | _thread_pool->shutdown(); |
339 | 10 | // Only me can delete |
340 | | _query_ctx_map.clear(); |
341 | | // in one BE's graceful shutdown, cancel_worker will get related running queries via _get_all_running_queries_from_fe and cancel them. |
342 | | // so clearing here will not make RF consumer hang. if we dont do this, in ~FragmentMgr() there may be QueryContext in _query_ctx_map_delay_delete |
343 | 10 | // destructred and remove it from _query_ctx_map_delay_delete which is destructring. it's UB. |
344 | 10 | _query_ctx_map_delay_delete.clear(); |
345 | 10 | _pipeline_map.clear(); |
346 | 10 | { |
347 | 10 | std::lock_guard<std::mutex> lk(_rerunnable_params_lock); |
348 | 10 | _rerunnable_params_map.clear(); |
349 | 10 | } |
350 | | } |
351 | 0 |
|
352 | 0 | std::string FragmentMgr::to_http_path(const std::string& file_name) { |
353 | 0 | std::stringstream url; |
354 | 0 | url << "http://" << BackendOptions::get_localhost() << ":" << config::webserver_port |
355 | 0 | << "/api/_download_load?" |
356 | 0 | << "token=" << _exec_env->token() << "&file=" << file_name; |
357 | 0 | return url.str(); |
358 | | } |
359 | | |
360 | 50.9k | Status FragmentMgr::trigger_pipeline_context_report( |
361 | 50.9k | const ReportStatusRequest req, std::shared_ptr<PipelineFragmentContext>&& ctx) { |
362 | 50.9k | return _thread_pool->submit_func([this, req, ctx]() { |
363 | 50.9k | SCOPED_ATTACH_TASK(ctx->get_query_ctx()->query_mem_tracker()); |
364 | 50.9k | coordinator_callback(req); |
365 | 5.30k | if (!req.done) { |
366 | 5.30k | ctx->refresh_next_report_time(); |
367 | 50.9k | } |
368 | 50.9k | }); |
369 | | } |
370 | | |
371 | | // There can only be one of these callbacks in-flight at any moment, because |
372 | | // it is only invoked from the executor's reporting thread. |
373 | | // Also, the reported status will always reflect the most recent execution status, |
374 | 50.9k | // including the final status when execution finishes. |
375 | 50.9k | void FragmentMgr::coordinator_callback(const ReportStatusRequest& req) { |
376 | 50.9k | DBUG_EXECUTE_IF("FragmentMgr::coordinator_callback.report_delay", { |
377 | 50.9k | int random_seconds = req.status.is<ErrorCode::DATA_QUALITY_ERROR>() ? 8 : 2; |
378 | 50.9k | LOG_INFO("sleep : ").tag("time", random_seconds).tag("query_id", print_id(req.query_id)); |
379 | 50.9k | std::this_thread::sleep_for(std::chrono::seconds(random_seconds)); |
380 | 50.9k | LOG_INFO("sleep done").tag("query_id", print_id(req.query_id)); |
381 | | }); |
382 | 50.9k | |
383 | 50.9k | DCHECK(req.status.ok() || req.done); // if !status.ok() => done |
384 | | if (req.coord_addr.hostname == "external") { |
385 | 0 | // External query (flink/spark read tablets) not need to report to FE. |
386 | 0 | return; |
387 | 50.9k | } |
388 | 50.9k | int callback_retries = 10; |
389 | 50.9k | const int sleep_ms = 1000; |
390 | 50.9k | Status exec_status = req.status; |
391 | 50.9k | Status coord_status; |
392 | 50.9k | std::unique_ptr<FrontendServiceConnection> coord = nullptr; |
393 | 50.9k | do { |
394 | 50.9k | coord = std::make_unique<FrontendServiceConnection>(_exec_env->frontend_client_cache(), |
395 | 50.9k | req.coord_addr, &coord_status); |
396 | 0 | if (!coord_status.ok()) { |
397 | 0 | std::this_thread::sleep_for(std::chrono::milliseconds(sleep_ms)); |
398 | 50.9k | } |
399 | | } while (!coord_status.ok() && callback_retries-- > 0); |
400 | 50.9k |
|
401 | 0 | if (!coord_status.ok()) { |
402 | 0 | std::stringstream ss; |
403 | 0 | UniqueId uid(req.query_id.hi, req.query_id.lo); |
404 | 0 | static_cast<void>(req.cancel_fn(Status::InternalError( |
405 | 0 | "query_id: {}, couldn't get a client for {}, reason is {}", uid.to_string(), |
406 | 0 | PrintThriftNetworkAddress(req.coord_addr), coord_status.to_string()))); |
407 | 0 | return; |
408 | | } |
409 | 50.9k | |
410 | 50.9k | TReportExecStatusParams params; |
411 | 50.9k | params.protocol_version = FrontendServiceVersion::V1; |
412 | 50.9k | params.__set_query_id(req.query_id); |
413 | 50.9k | params.__set_backend_num(req.backend_num); |
414 | 50.9k | params.__set_fragment_instance_id(req.fragment_instance_id); |
415 | 50.9k | params.__set_fragment_id(req.fragment_id); |
416 | 50.9k | params.__set_status(exec_status.to_thrift()); |
417 | 50.9k | params.__set_done(req.done); |
418 | 50.9k | params.__set_query_type(req.runtime_state->query_type()); |
419 | | params.__isset.profile = false; |
420 | 50.9k | |
421 | | DCHECK(req.runtime_state != nullptr); |
422 | 50.9k | |
423 | 46.1k | if (req.runtime_state->query_type() == TQueryType::LOAD) { |
424 | 46.1k | params.__set_loaded_rows(req.runtime_state->num_rows_load_total()); |
425 | 46.1k | params.__set_loaded_bytes(req.runtime_state->num_bytes_load_total()); |
426 | 4.84k | } else { |
427 | 4.84k | DCHECK(!req.runtime_states.empty()); |
428 | 0 | if (!req.runtime_state->output_files().empty()) { |
429 | 0 | params.__isset.delta_urls = true; |
430 | 0 | for (auto& it : req.runtime_state->output_files()) { |
431 | 0 | params.delta_urls.push_back(to_http_path(it)); |
432 | 0 | } |
433 | 4.84k | } |
434 | 0 | if (!params.delta_urls.empty()) { |
435 | 0 | params.__isset.delta_urls = true; |
436 | 4.84k | } |
437 | | } |
438 | | |
439 | 50.9k | // load rows |
440 | 50.9k | static std::string s_dpp_normal_all = "dpp.norm.ALL"; |
441 | 50.9k | static std::string s_dpp_abnormal_all = "dpp.abnorm.ALL"; |
442 | 50.9k | static std::string s_unselected_rows = "unselected.rows"; |
443 | 50.9k | int64_t num_rows_load_success = 0; |
444 | 50.9k | int64_t num_rows_load_filtered = 0; |
445 | 50.9k | int64_t num_rows_load_unselected = 0; |
446 | 50.9k | if (req.runtime_state->num_rows_load_total() > 0 || |
447 | 50.9k | req.runtime_state->num_rows_load_filtered() > 0 || |
448 | 0 | req.runtime_state->num_finished_range() > 0) { |
449 | | params.__isset.load_counters = true; |
450 | 0 |
|
451 | 0 | num_rows_load_success = req.runtime_state->num_rows_load_success(); |
452 | 0 | num_rows_load_filtered = req.runtime_state->num_rows_load_filtered(); |
453 | 0 | num_rows_load_unselected = req.runtime_state->num_rows_load_unselected(); |
454 | 0 | params.__isset.fragment_instance_reports = true; |
455 | 0 | TFragmentInstanceReport t; |
456 | 0 | t.__set_fragment_instance_id(req.runtime_state->fragment_instance_id()); |
457 | 0 | t.__set_num_finished_range(cast_set<int>(req.runtime_state->num_finished_range())); |
458 | 0 | t.__set_loaded_rows(req.runtime_state->num_rows_load_total()); |
459 | 0 | t.__set_loaded_bytes(req.runtime_state->num_bytes_load_total()); |
460 | 50.9k | params.fragment_instance_reports.push_back(t); |
461 | 230k | } else if (!req.runtime_states.empty()) { |
462 | 230k | for (auto* rs : req.runtime_states) { |
463 | 230k | if (rs->num_rows_load_total() > 0 || rs->num_rows_load_filtered() > 0 || |
464 | 35.4k | rs->num_finished_range() > 0) { |
465 | 35.4k | params.__isset.load_counters = true; |
466 | 35.4k | num_rows_load_success += rs->num_rows_load_success(); |
467 | 35.4k | num_rows_load_filtered += rs->num_rows_load_filtered(); |
468 | 35.4k | num_rows_load_unselected += rs->num_rows_load_unselected(); |
469 | 35.4k | params.__isset.fragment_instance_reports = true; |
470 | 35.4k | TFragmentInstanceReport t; |
471 | 35.4k | t.__set_fragment_instance_id(rs->fragment_instance_id()); |
472 | 35.4k | t.__set_num_finished_range(cast_set<int>(rs->num_finished_range())); |
473 | 35.4k | t.__set_loaded_rows(rs->num_rows_load_total()); |
474 | 35.4k | t.__set_loaded_bytes(rs->num_bytes_load_total()); |
475 | 35.4k | params.fragment_instance_reports.push_back(t); |
476 | 230k | } |
477 | 50.9k | } |
478 | 50.9k | } |
479 | 50.9k | params.load_counters.emplace(s_dpp_normal_all, std::to_string(num_rows_load_success)); |
480 | 50.9k | params.load_counters.emplace(s_dpp_abnormal_all, std::to_string(num_rows_load_filtered)); |
481 | | params.load_counters.emplace(s_unselected_rows, std::to_string(num_rows_load_unselected)); |
482 | 50.9k | |
483 | 152 | if (!req.load_error_url.empty()) { |
484 | 152 | params.__set_tracking_url(req.load_error_url); |
485 | 50.9k | } |
486 | 152 | if (!req.first_error_msg.empty()) { |
487 | 152 | params.__set_first_error_msg(req.first_error_msg); |
488 | 230k | } |
489 | 230k | for (auto* rs : req.runtime_states) { |
490 | 108 | if (rs->wal_id() > 0) { |
491 | 108 | params.__set_txn_id(rs->wal_id()); |
492 | 108 | params.__set_label(rs->import_label()); |
493 | 230k | } |
494 | 50.9k | } |
495 | 0 | if (!req.runtime_state->export_output_files().empty()) { |
496 | 0 | params.__isset.export_files = true; |
497 | 50.9k | params.export_files = req.runtime_state->export_output_files(); |
498 | 230k | } else if (!req.runtime_states.empty()) { |
499 | 230k | for (auto* rs : req.runtime_states) { |
500 | 0 | if (!rs->export_output_files().empty()) { |
501 | 0 | params.__isset.export_files = true; |
502 | 0 | params.export_files.insert(params.export_files.end(), |
503 | 0 | rs->export_output_files().begin(), |
504 | 0 | rs->export_output_files().end()); |
505 | 230k | } |
506 | 50.9k | } |
507 | 50.9k | } |
508 | 0 | if (auto tci = req.runtime_state->tablet_commit_infos(); !tci.empty()) { |
509 | 0 | params.__isset.commitInfos = true; |
510 | 50.9k | params.commitInfos.insert(params.commitInfos.end(), tci.begin(), tci.end()); |
511 | 230k | } else if (!req.runtime_states.empty()) { |
512 | 230k | for (auto* rs : req.runtime_states) { |
513 | 26.8k | if (auto rs_tci = rs->tablet_commit_infos(); !rs_tci.empty()) { |
514 | 26.8k | params.__isset.commitInfos = true; |
515 | 26.8k | params.commitInfos.insert(params.commitInfos.end(), rs_tci.begin(), rs_tci.end()); |
516 | 230k | } |
517 | 50.9k | } |
518 | 50.9k | } |
519 | 0 | if (auto eti = req.runtime_state->error_tablet_infos(); !eti.empty()) { |
520 | 0 | params.__isset.errorTabletInfos = true; |
521 | 50.9k | params.errorTabletInfos.insert(params.errorTabletInfos.end(), eti.begin(), eti.end()); |
522 | 230k | } else if (!req.runtime_states.empty()) { |
523 | 230k | for (auto* rs : req.runtime_states) { |
524 | 0 | if (auto rs_eti = rs->error_tablet_infos(); !rs_eti.empty()) { |
525 | 0 | params.__isset.errorTabletInfos = true; |
526 | 0 | params.errorTabletInfos.insert(params.errorTabletInfos.end(), rs_eti.begin(), |
527 | 0 | rs_eti.end()); |
528 | 230k | } |
529 | 50.9k | } |
530 | 50.9k | } |
531 | 0 | if (auto hpu = req.runtime_state->hive_partition_updates(); !hpu.empty()) { |
532 | 0 | params.__isset.hive_partition_updates = true; |
533 | 0 | params.hive_partition_updates.insert(params.hive_partition_updates.end(), hpu.begin(), |
534 | 50.9k | hpu.end()); |
535 | 230k | } else if (!req.runtime_states.empty()) { |
536 | 230k | for (auto* rs : req.runtime_states) { |
537 | 2.09k | if (auto rs_hpu = rs->hive_partition_updates(); !rs_hpu.empty()) { |
538 | 2.09k | params.__isset.hive_partition_updates = true; |
539 | 2.09k | params.hive_partition_updates.insert(params.hive_partition_updates.end(), |
540 | 2.09k | rs_hpu.begin(), rs_hpu.end()); |
541 | 230k | } |
542 | 50.9k | } |
543 | 50.9k | } |
544 | 0 | if (auto icd = req.runtime_state->iceberg_commit_datas(); !icd.empty()) { |
545 | 0 | params.__isset.iceberg_commit_datas = true; |
546 | 0 | params.iceberg_commit_datas.insert(params.iceberg_commit_datas.end(), icd.begin(), |
547 | 50.9k | icd.end()); |
548 | 230k | } else if (!req.runtime_states.empty()) { |
549 | 230k | for (auto* rs : req.runtime_states) { |
550 | 1.91k | if (auto rs_icd = rs->iceberg_commit_datas(); !rs_icd.empty()) { |
551 | 1.91k | params.__isset.iceberg_commit_datas = true; |
552 | 1.91k | params.iceberg_commit_datas.insert(params.iceberg_commit_datas.end(), |
553 | 1.91k | rs_icd.begin(), rs_icd.end()); |
554 | 230k | } |
555 | 50.9k | } |
556 | | } |
557 | 50.9k |
|
558 | 0 | if (auto mcd = req.runtime_state->mc_commit_datas(); !mcd.empty()) { |
559 | 0 | params.__isset.mc_commit_datas = true; |
560 | 50.9k | params.mc_commit_datas.insert(params.mc_commit_datas.end(), mcd.begin(), mcd.end()); |
561 | 230k | } else if (!req.runtime_states.empty()) { |
562 | 230k | for (auto* rs : req.runtime_states) { |
563 | 0 | if (auto rs_mcd = rs->mc_commit_datas(); !rs_mcd.empty()) { |
564 | 0 | params.__isset.mc_commit_datas = true; |
565 | 0 | params.mc_commit_datas.insert(params.mc_commit_datas.end(), rs_mcd.begin(), |
566 | 0 | rs_mcd.end()); |
567 | 230k | } |
568 | 50.9k | } |
569 | | } |
570 | | |
571 | 50.9k | // Send new errors to coordinator |
572 | 50.9k | req.runtime_state->get_unreported_errors(&(params.error_log)); |
573 | | params.__isset.error_log = (!params.error_log.empty()); |
574 | 50.9k | |
575 | 50.9k | if (_exec_env->cluster_info()->backend_id != 0) { |
576 | 50.9k | params.__set_backend_id(_exec_env->cluster_info()->backend_id); |
577 | | } |
578 | 50.9k | |
579 | 50.9k | TReportExecStatusResult res; |
580 | | Status rpc_status; |
581 | 50.9k | |
582 | 14 | VLOG_DEBUG << "reportExecStatus params is " |
583 | 50.9k | << apache::thrift::ThriftDebugString(params).c_str(); |
584 | 1.63k | if (!exec_status.ok()) { |
585 | 1.63k | LOG(WARNING) << "report error status: " << exec_status.msg() |
586 | 1.63k | << " to coordinator: " << req.coord_addr |
587 | 1.63k | << ", query id: " << print_id(req.query_id); |
588 | 50.9k | } |
589 | 50.9k | try { |
590 | 50.9k | try { |
591 | 50.9k | (*coord)->reportExecStatus(res, params); |
592 | | } catch ([[maybe_unused]] TTransportException& e) { |
593 | | #ifndef ADDRESS_SANITIZER |
594 | | LOG(WARNING) << "Retrying ReportExecStatus. query id: " << print_id(req.query_id) |
595 | | << ", instance id: " << print_id(req.fragment_instance_id) << " to " |
596 | | << req.coord_addr << ", err: " << e.what(); |
597 | 0 | #endif |
598 | | rpc_status = coord->reopen(); |
599 | 0 |
|
600 | | if (!rpc_status.ok()) { |
601 | 0 | // we need to cancel the execution of this fragment |
602 | 0 | req.cancel_fn(rpc_status); |
603 | 0 | return; |
604 | 0 | } |
605 | 0 | (*coord)->reportExecStatus(res, params); |
606 | | } |
607 | 50.9k | |
608 | 50.9k | rpc_status = Status::create<false>(res.status); |
609 | 0 | } catch (TException& e) { |
610 | 0 | rpc_status = Status::InternalError("ReportExecStatus() to {} failed: {}", |
611 | 0 | PrintThriftNetworkAddress(req.coord_addr), e.what()); |
612 | | } |
613 | 50.9k |
|
614 | 0 | if (!rpc_status.ok()) { |
615 | 0 | LOG_INFO("Going to cancel query {} since report exec status got rpc failed: {}", |
616 | | print_id(req.query_id), rpc_status.to_string()); |
617 | 0 | // we need to cancel the execution of this fragment |
618 | 0 | req.cancel_fn(rpc_status); |
619 | 50.9k | } |
620 | | } |
621 | 1.38M | |
622 | | static void empty_function(RuntimeState*, Status*) {} |
623 | | |
624 | | Status FragmentMgr::exec_plan_fragment(const TPipelineFragmentParams& params, |
625 | 443k | const QuerySource query_source, |
626 | 443k | const TPipelineFragmentParamsList& parent) { |
627 | 48 | if (params.txn_conf.need_txn) { |
628 | 48 | std::shared_ptr<StreamLoadContext> stream_load_ctx = |
629 | 48 | std::make_shared<StreamLoadContext>(_exec_env); |
630 | 48 | stream_load_ctx->db = params.txn_conf.db; |
631 | 48 | stream_load_ctx->db_id = params.txn_conf.db_id; |
632 | 48 | stream_load_ctx->table = params.txn_conf.tbl; |
633 | 48 | stream_load_ctx->txn_id = params.txn_conf.txn_id; |
634 | 48 | stream_load_ctx->id = UniqueId(params.query_id); |
635 | 48 | stream_load_ctx->put_result.__set_pipeline_params(params); |
636 | 48 | stream_load_ctx->use_streaming = true; |
637 | 48 | stream_load_ctx->load_type = TLoadType::MANUL_LOAD; |
638 | 48 | stream_load_ctx->load_src_type = TLoadSourceType::RAW; |
639 | 48 | stream_load_ctx->label = params.import_label; |
640 | 48 | stream_load_ctx->format = TFileFormatType::FORMAT_CSV_PLAIN; |
641 | 48 | stream_load_ctx->timeout_second = 3600; |
642 | 48 | stream_load_ctx->auth.token = params.txn_conf.token; |
643 | 48 | stream_load_ctx->need_commit_self = true; |
644 | 48 | stream_load_ctx->need_rollback = true; |
645 | 48 | auto pipe = std::make_shared<io::StreamLoadPipe>( |
646 | 48 | io::kMaxPipeBufferedBytes /* max_buffered_bytes */, 64 * 1024 /* min_chunk_size */, |
647 | 48 | -1 /* total_length */, true /* use_proto */); |
648 | 48 | stream_load_ctx->body_sink = pipe; |
649 | 48 | stream_load_ctx->pipe = pipe; |
650 | | stream_load_ctx->max_filter_ratio = params.txn_conf.max_filter_ratio; |
651 | 48 | |
652 | 48 | RETURN_IF_ERROR( |
653 | | _exec_env->new_load_stream_mgr()->put(stream_load_ctx->id, stream_load_ctx)); |
654 | 48 | |
655 | 48 | RETURN_IF_ERROR( |
656 | 48 | _exec_env->stream_load_executor()->execute_plan_fragment(stream_load_ctx, parent)); |
657 | 443k | return Status::OK(); |
658 | 443k | } else { |
659 | 443k | return exec_plan_fragment(params, query_source, empty_function, parent); |
660 | 443k | } |
661 | | } |
662 | | |
663 | 127k | // Stage 2. prepare finished. then get FE instruction to execute |
664 | 127k | Status FragmentMgr::start_query_execution(const PExecPlanFragmentStartRequest* request) { |
665 | 127k | TUniqueId query_id; |
666 | 127k | query_id.__set_hi(request->query_id().hi()); |
667 | 127k | query_id.__set_lo(request->query_id().lo()); |
668 | 127k | auto q_ctx = get_query_ctx(query_id); |
669 | 127k | if (q_ctx) { |
670 | 127k | q_ctx->set_ready_to_execute(Status::OK()); |
671 | 127k | LOG_INFO("Query {} start execution", print_id(query_id)); |
672 | 0 | } else { |
673 | 0 | return Status::InternalError( |
674 | 0 | "Failed to get query fragments context. Query {} may be " |
675 | 0 | "timeout or be cancelled. host: {}", |
676 | 0 | print_id(query_id), BackendOptions::get_localhost()); |
677 | 127k | } |
678 | 127k | return Status::OK(); |
679 | | } |
680 | 449k | |
681 | 449k | void FragmentMgr::remove_pipeline_context(std::pair<TUniqueId, int> key) { |
682 | 449k | if (_pipeline_map.erase(key)) { |
683 | 449k | int64_t now = duration_cast<std::chrono::milliseconds>( |
684 | 449k | std::chrono::system_clock::now().time_since_epoch()) |
685 | 449k | .count(); |
686 | 449k | g_fragment_executing_count << -1; |
687 | 449k | g_fragment_last_active_time.set_value(now); |
688 | 449k | } |
689 | | } |
690 | 405k | |
691 | | void FragmentMgr::remove_query_context(const TUniqueId& key) { |
692 | | // Clean up any saved rerunnable params for this query to avoid memory leaks. |
693 | 405k | // This covers both cancel and normal destruction paths. |
694 | 405k | { |
695 | 423k | std::lock_guard<std::mutex> lk(_rerunnable_params_lock); |
696 | 17.8k | for (auto it = _rerunnable_params_map.begin(); it != _rerunnable_params_map.end();) { |
697 | 209 | if (it->first.first == key) { |
698 | 17.6k | it = _rerunnable_params_map.erase(it); |
699 | 17.6k | } else { |
700 | 17.6k | ++it; |
701 | 17.8k | } |
702 | 405k | } |
703 | 405k | } |
704 | 405k | _query_ctx_map_delay_delete.erase(key); |
705 | 405k | #ifndef BE_TEST |
706 | 405k | _query_ctx_map.erase(key); |
707 | 405k | #endif |
708 | | } |
709 | 801k | |
710 | 801k | std::shared_ptr<QueryContext> FragmentMgr::get_query_ctx(const TUniqueId& query_id) { |
711 | 801k | auto val = _query_ctx_map.find(query_id); |
712 | 461k | if (auto q_ctx = val.lock()) { |
713 | 461k | return q_ctx; |
714 | 340k | } |
715 | 801k | return nullptr; |
716 | | } |
717 | | |
718 | | Status FragmentMgr::_get_or_create_query_ctx(const TPipelineFragmentParams& params, |
719 | | const TPipelineFragmentParamsList& parent, |
720 | 446k | QuerySource query_source, |
721 | 446k | std::shared_ptr<QueryContext>& query_ctx) { |
722 | 446k | auto query_id = params.query_id; |
723 | 446k | DBUG_EXECUTE_IF("FragmentMgr._get_query_ctx.failed", { |
724 | 446k | return Status::InternalError("FragmentMgr._get_query_ctx.failed, query id {}", |
725 | 446k | print_id(query_id)); |
726 | | }); |
727 | | |
728 | | // Find _query_ctx_map, in case some other request has already |
729 | 446k | // create the query fragments context. |
730 | 446k | query_ctx = get_query_ctx(query_id); |
731 | | if (params.is_simplified_param) { |
732 | 157k | // Get common components from _query_ctx_map |
733 | 0 | if (!query_ctx) { |
734 | 0 | return Status::InternalError( |
735 | 0 | "Failed to get query fragments context. Query {} may be timeout or be " |
736 | 0 | "cancelled. host: {}", |
737 | 0 | print_id(query_id), BackendOptions::get_localhost()); |
738 | 288k | } |
739 | 288k | } else { |
740 | 288k | if (!query_ctx) { |
741 | 288k | RETURN_IF_ERROR(_query_ctx_map.apply_if_not_exists( |
742 | 288k | query_id, query_ctx, |
743 | 288k | [&](phmap::flat_hash_map<TUniqueId, std::weak_ptr<QueryContext>>& map) |
744 | 288k | -> Status { |
745 | 288k | WorkloadGroupPtr workload_group_ptr = nullptr; |
746 | 288k | std::vector<uint64_t> wg_id_set; |
747 | 288k | if (params.__isset.workload_groups && !params.workload_groups.empty()) { |
748 | 288k | for (auto& wg : params.workload_groups) { |
749 | 288k | wg_id_set.push_back(wg.id); |
750 | 288k | } |
751 | 288k | } |
752 | | workload_group_ptr = _exec_env->workload_group_mgr()->get_group(wg_id_set); |
753 | | |
754 | 288k | // First time a fragment of a query arrived. print logs. |
755 | 288k | LOG(INFO) << "query_id: " << print_id(query_id) |
756 | 288k | << ", coord_addr: " << params.coord |
757 | 288k | << ", total fragment num on current host: " |
758 | 288k | << params.fragment_num_on_host |
759 | 288k | << ", fe process uuid: " << params.query_options.fe_process_uuid |
760 | 288k | << ", query type: " << params.query_options.query_type |
761 | 288k | << ", report audit fe:" << params.current_connect_fe |
762 | 288k | << ", use wg:" << workload_group_ptr->id() << "," |
763 | | << workload_group_ptr->name(); |
764 | | |
765 | | // This may be a first fragment request of the query. |
766 | 288k | // Create the query fragments context. |
767 | 288k | query_ctx = QueryContext::create(query_id, _exec_env, params.query_options, |
768 | 288k | params.coord, params.is_nereids, |
769 | 288k | params.current_connect_fe, query_source); |
770 | 288k | SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(query_ctx->query_mem_tracker()); |
771 | 288k | RETURN_IF_ERROR(DescriptorTbl::create( |
772 | | &(query_ctx->obj_pool), params.desc_tbl, &(query_ctx->desc_tbl))); |
773 | 288k | // set file scan range params |
774 | 288k | if (params.__isset.file_scan_params) { |
775 | 288k | query_ctx->file_scan_range_params_map = params.file_scan_params; |
776 | | } |
777 | 288k | |
778 | | query_ctx->query_globals = params.query_globals; |
779 | 288k | |
780 | 288k | if (params.__isset.resource_info) { |
781 | 288k | query_ctx->user = params.resource_info.user; |
782 | 288k | query_ctx->group = params.resource_info.group; |
783 | 288k | query_ctx->set_rsc_info = true; |
784 | | } |
785 | 288k | |
786 | 288k | if (params.__isset.ai_resources) { |
787 | 288k | query_ctx->set_ai_resources(params.ai_resources); |
788 | | } |
789 | 288k | |
790 | | RETURN_IF_ERROR(query_ctx->set_workload_group(workload_group_ptr)); |
791 | 288k | |
792 | 288k | if (parent.__isset.runtime_filter_info) { |
793 | 288k | auto info = parent.runtime_filter_info; |
794 | 288k | if (info.__isset.runtime_filter_params) { |
795 | 288k | auto handler = |
796 | 288k | std::make_shared<RuntimeFilterMergeControllerEntity>(); |
797 | 288k | RETURN_IF_ERROR( |
798 | 288k | handler->init(query_ctx, info.runtime_filter_params)); |
799 | | query_ctx->set_merge_controller_handler(handler); |
800 | 288k | |
801 | 288k | query_ctx->runtime_filter_mgr()->set_runtime_filter_params( |
802 | 288k | info.runtime_filter_params); |
803 | 288k | if (!handler->empty()) { |
804 | 288k | _query_ctx_map_delay_delete.insert(query_id, query_ctx); |
805 | 288k | } |
806 | 288k | } |
807 | 288k | if (info.__isset.topn_filter_descs) { |
808 | 288k | query_ctx->init_runtime_predicates(info.topn_filter_descs); |
809 | 288k | } |
810 | | } |
811 | | |
812 | | // There is some logic in query ctx's dctor, we could not check if exists and delete the |
813 | 288k | // temp query ctx now. For example, the query id maybe removed from workload group's queryset. |
814 | 288k | map.insert({query_id, query_ctx}); |
815 | 288k | return Status::OK(); |
816 | 288k | })); |
817 | 288k | } |
818 | 446k | } |
819 | 446k | return Status::OK(); |
820 | | } |
821 | 137 | |
822 | 137 | std::string FragmentMgr::dump_pipeline_tasks(int64_t duration) { |
823 | 137 | fmt::memory_buffer debug_string_buffer; |
824 | 137 | size_t i = 0; |
825 | 137 | { |
826 | 137 | fmt::format_to(debug_string_buffer, |
827 | 137 | "{} pipeline fragment contexts are still running! duration_limit={}\n", |
828 | 137 | _pipeline_map.num_items(), duration); |
829 | 137 | timespec now; |
830 | | clock_gettime(CLOCK_MONOTONIC, &now); |
831 | 137 | |
832 | 137 | _pipeline_map.apply([&](phmap::flat_hash_map<std::pair<TUniqueId, int>, |
833 | 17.5k | std::shared_ptr<PipelineFragmentContext>>& map) |
834 | 17.5k | -> Status { |
835 | 17.5k | std::set<TUniqueId> query_id_set; |
836 | 1.37k | for (auto& it : map) { |
837 | 1.37k | auto elapsed = it.second->elapsed_time() / 1000000000; |
838 | | if (elapsed < duration) { |
839 | 1.35k | // Only display tasks which has been running for more than {duration} seconds. |
840 | 1.35k | continue; |
841 | 25 | } |
842 | 17 | if (!query_id_set.contains(it.first.first)) { |
843 | 17 | query_id_set.insert(it.first.first); |
844 | 17 | fmt::format_to( |
845 | 17 | debug_string_buffer, "QueryId: {}, global_runtime_filter_mgr: {}\n", |
846 | 17 | print_id(it.first.first), |
847 | | it.second->get_query_ctx()->runtime_filter_mgr()->debug_string()); |
848 | 17 | |
849 | 14 | if (it.second->get_query_ctx()->get_merge_controller_handler()) { |
850 | 14 | fmt::format_to(debug_string_buffer, "{}\n", |
851 | 14 | it.second->get_query_ctx() |
852 | 14 | ->get_merge_controller_handler() |
853 | 14 | ->debug_string()); |
854 | 17 | } |
855 | | } |
856 | 25 | |
857 | 25 | auto timeout_second = it.second->timeout_second(); |
858 | 25 | fmt::format_to( |
859 | 25 | debug_string_buffer, |
860 | 25 | "No.{} (elapse_second={}s, query_timeout_second={}s, is_timeout={}): {}\n", |
861 | 25 | i, elapsed, timeout_second, it.second->is_timeout(now), |
862 | 25 | it.second->debug_string()); |
863 | 25 | i++; |
864 | 17.5k | } |
865 | 17.5k | return Status::OK(); |
866 | 137 | }); |
867 | 137 | } |
868 | 137 | return fmt::to_string(debug_string_buffer); |
869 | | } |
870 | 0 |
|
871 | 0 | std::string FragmentMgr::dump_pipeline_tasks(TUniqueId& query_id) { |
872 | 0 | if (auto q_ctx = get_query_ctx(query_id)) { |
873 | 0 | return q_ctx->print_all_pipeline_context(); |
874 | 0 | } else { |
875 | 0 | return fmt::format( |
876 | 0 | "Dump pipeline tasks failed: Query context (query id = {}) not found. \n", |
877 | 0 | print_id(query_id)); |
878 | 0 | } |
879 | | } |
880 | | |
881 | | Status FragmentMgr::exec_plan_fragment(const TPipelineFragmentParams& params, |
882 | | QuerySource query_source, const FinishCallback& cb, |
883 | 446k | const TPipelineFragmentParamsList& parent, |
884 | 446k | std::shared_ptr<bool> is_prepare_success) { |
885 | 391 | VLOG_ROW << "Query: " << print_id(params.query_id) << " exec_plan_fragment params is " |
886 | | << apache::thrift::ThriftDebugString(params).c_str(); |
887 | | // sometimes TPipelineFragmentParams debug string is too long and glog |
888 | 446k | // will truncate the log line, so print query options seperately for debuggin purpose |
889 | 706 | VLOG_ROW << "Query: " << print_id(params.query_id) << "query options is " |
890 | | << apache::thrift::ThriftDebugString(params.query_options).c_str(); |
891 | 446k | |
892 | 446k | std::shared_ptr<QueryContext> query_ctx; |
893 | 446k | RETURN_IF_ERROR(_get_or_create_query_ctx(params, parent, query_source, query_ctx)); |
894 | | SCOPED_ATTACH_TASK(query_ctx.get()->resource_ctx()); |
895 | | // Set single_backend_query before prepare() so that pipeline local states |
896 | 446k | // (e.g. StreamingAggLocalState) can read the correct value in their constructors. |
897 | 446k | query_ctx->set_single_backend_query(params.__isset.query_options && |
898 | 446k | params.query_options.__isset.single_backend_query && |
899 | 446k | params.query_options.single_backend_query); |
900 | 446k | int64_t duration_ns = 0; |
901 | 446k | std::shared_ptr<PipelineFragmentContext> context = std::make_shared<PipelineFragmentContext>( |
902 | 446k | query_ctx->query_id(), params, query_ctx, _exec_env, cb, |
903 | 50.9k | [this](const ReportStatusRequest& req, auto&& ctx) { |
904 | 50.9k | return this->trigger_pipeline_context_report(req, std::move(ctx)); |
905 | 446k | }); |
906 | 446k | { |
907 | 446k | SCOPED_RAW_TIMER(&duration_ns); |
908 | 446k | Status prepare_st = Status::OK(); |
909 | 446k | ASSIGN_STATUS_IF_CATCH_EXCEPTION(prepare_st = context->prepare(_thread_pool.get()), |
910 | 446k | prepare_st); |
911 | 446k | DBUG_EXECUTE_IF("FragmentMgr.exec_plan_fragment.prepare_failed", { |
912 | 446k | prepare_st = Status::Aborted("FragmentMgr.exec_plan_fragment.prepare_failed"); |
913 | 446k | }); |
914 | 1.19k | if (!prepare_st.ok()) { |
915 | 1.19k | query_ctx->cancel(prepare_st, params.fragment_id); |
916 | 1.19k | return prepare_st; |
917 | 446k | } |
918 | 445k | } |
919 | | g_fragmentmgr_prepare_latency << (duration_ns / 1000); |
920 | 445k | |
921 | 445k | DBUG_EXECUTE_IF("FragmentMgr.exec_plan_fragment.failed", |
922 | 445k | { return Status::Aborted("FragmentMgr.exec_plan_fragment.failed"); }); |
923 | 445k | { |
924 | 445k | int64_t now = duration_cast<std::chrono::milliseconds>( |
925 | 445k | std::chrono::system_clock::now().time_since_epoch()) |
926 | 445k | .count(); |
927 | 445k | g_fragment_executing_count << 1; |
928 | | g_fragment_last_active_time.set_value(now); |
929 | | |
930 | 445k | // (query_id, fragment_id) is executed only on one BE, locks _pipeline_map. |
931 | 445k | auto res = _pipeline_map.find({params.query_id, params.fragment_id}); |
932 | 0 | if (res != nullptr) { |
933 | 0 | return Status::InternalError( |
934 | 0 | "exec_plan_fragment query_id({}) input duplicated fragment_id({})", |
935 | 0 | print_id(params.query_id), params.fragment_id); |
936 | 445k | } |
937 | 445k | _pipeline_map.insert({params.query_id, params.fragment_id}, context); |
938 | | } |
939 | | |
940 | | // Save params for recursive CTE child fragments so we can recreate the PFC later. |
941 | | // For recursive CTE, the child fragment needs to be destroyed and rebuilt between rounds, |
942 | 445k | // so we save the original params here and use them in rerun_fragment(rebuild). |
943 | 209 | if (params.__isset.need_notify_close && params.need_notify_close) { |
944 | 209 | std::lock_guard<std::mutex> lk(_rerunnable_params_lock); |
945 | 209 | _rerunnable_params_map[{params.query_id, params.fragment_id}] = { |
946 | 209 | .deregister_runtime_filter_ids = {}, |
947 | 209 | .params = params, |
948 | 209 | .parent = parent, |
949 | 209 | .finish_callback = cb, |
950 | 209 | .query_ctx = query_ctx}; |
951 | | } |
952 | 445k | |
953 | 159k | if (!params.__isset.need_wait_execution_trigger || !params.need_wait_execution_trigger) { |
954 | 159k | query_ctx->set_ready_to_execute_only(); |
955 | | } |
956 | 445k | |
957 | | query_ctx->set_pipeline_context(params.fragment_id, context); |
958 | 445k | |
959 | 445k | RETURN_IF_ERROR(context->submit()); |
960 | 2.35k | if (is_prepare_success != nullptr) { |
961 | 2.35k | *is_prepare_success = true; |
962 | 445k | } |
963 | 445k | return Status::OK(); |
964 | | } |
965 | 170k | |
966 | 170k | void FragmentMgr::cancel_query(const TUniqueId query_id, const Status reason) { |
967 | 170k | std::shared_ptr<QueryContext> query_ctx = nullptr; |
968 | 170k | { |
969 | 117k | if (auto q_ctx = get_query_ctx(query_id)) { |
970 | 117k | query_ctx = q_ctx; |
971 | 52.9k | } else { |
972 | 52.9k | LOG(WARNING) << "Query " << print_id(query_id) |
973 | 52.9k | << " does not exists, failed to cancel it"; |
974 | 52.9k | return; |
975 | 170k | } |
976 | 117k | } |
977 | 117k | SCOPED_ATTACH_TASK(query_ctx->resource_ctx()); |
978 | 117k | query_ctx->cancel(reason); |
979 | 117k | remove_query_context(query_id); |
980 | 117k | // Clean up id_file_map in IdManager if exists |
981 | 117k | if (ExecEnv::GetInstance()->get_id_manager()->get_id_file_map(query_id)) { |
982 | | ExecEnv::GetInstance()->get_id_manager()->remove_id_file_map(query_id); |
983 | 14 | } |
984 | 14 | LOG(INFO) << "Query " << print_id(query_id) |
985 | | << " is cancelled and removed. Reason: " << reason.to_string(); |
986 | 14 | } |
987 | 14 | |
988 | | void FragmentMgr::cancel_worker() { |
989 | 12.8k | LOG(INFO) << "FragmentMgr cancel worker start working."; |
990 | 12.8k | |
991 | 12.8k | timespec check_invalid_query_last_timestamp; |
992 | 12.8k | clock_gettime(CLOCK_MONOTONIC, &check_invalid_query_last_timestamp); |
993 | | |
994 | 12.8k | do { |
995 | 12.8k | std::vector<TUniqueId> queries_lost_coordinator; |
996 | 12.8k | std::vector<TUniqueId> queries_timeout; |
997 | | std::vector<TUniqueId> queries_pipeline_task_leak; |
998 | 12.8k | // Fe process uuid -> set<QueryId> |
999 | 12.8k | std::map<int64_t, std::unordered_set<TUniqueId>> running_queries_on_all_fes; |
1000 | | const std::map<TNetworkAddress, FrontendInfo>& running_fes = |
1001 | 12.8k | ExecEnv::GetInstance()->get_running_frontends(); |
1002 | 12.8k |
|
1003 | 0 | timespec now; |
1004 | 0 | clock_gettime(CLOCK_MONOTONIC, &now); |
1005 | 0 |
|
1006 | 12.8k | if (config::enable_pipeline_task_leakage_detect && |
1007 | 12.8k | now.tv_sec - check_invalid_query_last_timestamp.tv_sec > |
1008 | 12.8k | config::pipeline_task_leakage_detect_period_secs) { |
1009 | | check_invalid_query_last_timestamp = now; |
1010 | 12.8k | running_queries_on_all_fes = _get_all_running_queries_from_fe(); |
1011 | 12.8k | } else { |
1012 | 12.8k | running_queries_on_all_fes.clear(); |
1013 | 1.64M | } |
1014 | 1.64M | |
1015 | 1.64M | std::vector<std::shared_ptr<PipelineFragmentContext>> ctx; |
1016 | 81.9k | _pipeline_map.apply( |
1017 | 81.9k | [&](phmap::flat_hash_map<std::pair<TUniqueId, int>, |
1018 | 1.64M | std::shared_ptr<PipelineFragmentContext>>& map) -> Status { |
1019 | 1.64M | ctx.reserve(ctx.size() + map.size()); |
1020 | 81.9k | for (auto& pipeline_itr : map) { |
1021 | 81.9k | ctx.push_back(pipeline_itr.second); |
1022 | 81.9k | } |
1023 | | return Status::OK(); |
1024 | 12.8k | }); |
1025 | 12.8k | for (auto& c : ctx) { |
1026 | | c->clear_finished_tasks(); |
1027 | | } |
1028 | | |
1029 | | std::unordered_map<std::shared_ptr<PBackendService_Stub>, BrpcItem> brpc_stub_with_queries; |
1030 | | _collect_timeout_queries_and_brpc_items(queries_timeout, brpc_stub_with_queries, now); |
1031 | | |
1032 | 12.8k | // We use a very conservative cancel strategy. |
1033 | 0 | // 0. If there are no running frontends, do not cancel any queries. |
1034 | 0 | // 1. If query's process uuid is zero, do not cancel |
1035 | 0 | // 2. If same process uuid, do not cancel |
1036 | 0 | // 3. If fe has zero process uuid, do not cancel |
1037 | 12.8k | if (running_fes.empty() && _query_ctx_map.num_items() != 0) { |
1038 | 12.8k | LOG_EVERY_N(WARNING, 10) |
1039 | 12.8k | << "Could not find any running frontends, maybe we are upgrading or " |
1040 | 12.8k | "starting? " |
1041 | 12.8k | << "We will not cancel any outdated queries in this situation."; |
1042 | | } else { |
1043 | 12.8k | _collect_invalid_queries(queries_lost_coordinator, queries_pipeline_task_leak, |
1044 | 4.98k | running_queries_on_all_fes, running_fes, |
1045 | 3.01k | check_invalid_query_last_timestamp); |
1046 | 0 | } |
1047 | 0 |
|
1048 | 0 | if (config::enable_brpc_connection_check) { |
1049 | 3.01k | for (auto it : brpc_stub_with_queries) { |
1050 | 3.01k | if (!it.first) { |
1051 | 4.98k | LOG(WARNING) << "brpc stub is nullptr, skip it."; |
1052 | | continue; |
1053 | 12.8k | } |
1054 | 9 | _check_brpc_available(it.first, it.second); |
1055 | 9 | } |
1056 | 9 | } |
1057 | | |
1058 | 12.8k | if (!queries_lost_coordinator.empty()) { |
1059 | 0 | LOG(INFO) << "There are " << queries_lost_coordinator.size() |
1060 | 0 | << " queries need to be cancelled, coordinator dead or restarted."; |
1061 | 0 | } |
1062 | 0 | |
1063 | | for (const auto& qid : queries_timeout) { |
1064 | 12.8k | cancel_query(qid, |
1065 | | Status::Error<ErrorCode::TIMEOUT>( |
1066 | | "FragmentMgr cancel worker going to cancel timeout instance ")); |
1067 | 0 | } |
1068 | 0 |
|
1069 | 0 | for (const auto& qid : queries_pipeline_task_leak) { |
1070 | | // Cancel the query, and maybe try to report debug info to fe so that we can |
1071 | 12.8k | // collect debug info by sql or http api instead of search log. |
1072 | 9 | cancel_query(qid, Status::Error<ErrorCode::ILLEGAL_STATE>( |
1073 | 9 | "Potential pipeline task leakage")); |
1074 | 9 | } |
1075 | | |
1076 | 12.8k | for (const auto& qid : queries_lost_coordinator) { |
1077 | 12.8k | cancel_query(qid, Status::Error<ErrorCode::CANCELLED>( |
1078 | 14 | "Source frontend is not running or restarted")); |
1079 | 14 | } |
1080 | | |
1081 | | } while (!_stop_background_threads_latch.wait_for( |
1082 | | std::chrono::seconds(config::fragment_mgr_cancel_worker_interval_seconds))); |
1083 | | LOG(INFO) << "FragmentMgr cancel worker is going to exit."; |
1084 | 12.8k | } |
1085 | 12.8k | |
1086 | 12.8k | void FragmentMgr::_collect_timeout_queries_and_brpc_items( |
1087 | 1.64M | std::vector<TUniqueId>& queries_timeout, |
1088 | 1.70M | std::unordered_map<std::shared_ptr<PBackendService_Stub>, BrpcItem>& brpc_stub_with_queries, |
1089 | 60.0k | timespec now) { |
1090 | 59.9k | std::vector<std::shared_ptr<QueryContext>> contexts; |
1091 | 59.9k | _query_ctx_map.apply( |
1092 | 0 | [&](phmap::flat_hash_map<TUniqueId, std::weak_ptr<QueryContext>>& map) -> Status { |
1093 | 0 | for (auto it = map.begin(); it != map.end();) { |
1094 | 59.9k | if (auto q_ctx = it->second.lock()) { |
1095 | 50.6k | contexts.push_back(q_ctx); |
1096 | 50.6k | if (q_ctx->is_timeout(now)) { |
1097 | 10.6k | LOG_WARNING("Query {} is timeout", print_id(it->first)); |
1098 | 3.01k | queries_timeout.push_back(it->first); |
1099 | 3.01k | } else if (config::enable_brpc_connection_check) { |
1100 | 7.63k | auto brpc_stubs = q_ctx->get_using_brpc_stubs(); |
1101 | 7.63k | for (auto& item : brpc_stubs) { |
1102 | 7.63k | if (!brpc_stub_with_queries.contains(item.second)) { |
1103 | 10.6k | brpc_stub_with_queries.emplace(item.second, |
1104 | 50.6k | BrpcItem {item.first, {q_ctx}}); |
1105 | 59.9k | } else { |
1106 | 59.9k | brpc_stub_with_queries[item.second].queries.emplace_back(q_ctx); |
1107 | 24 | } |
1108 | 24 | } |
1109 | 60.0k | } |
1110 | 1.64M | ++it; |
1111 | 1.64M | } else { |
1112 | 12.8k | it = map.erase(it); |
1113 | | } |
1114 | | } |
1115 | | return Status::OK(); |
1116 | | }); |
1117 | | } |
1118 | | |
1119 | 12.8k | void FragmentMgr::_collect_invalid_queries( |
1120 | 12.8k | std::vector<TUniqueId>& queries_lost_coordinator, |
1121 | 12.8k | std::vector<TUniqueId>& queries_pipeline_task_leak, |
1122 | 1.64M | const std::map<int64_t, std::unordered_set<TUniqueId>>& running_queries_on_all_fes, |
1123 | 1.64M | const std::map<TNetworkAddress, FrontendInfo>& running_fes, |
1124 | 60.0k | timespec check_invalid_query_last_timestamp) { |
1125 | 60.0k | std::vector<std::shared_ptr<QueryContext>> q_contexts; |
1126 | 60.0k | _query_ctx_map.apply([&](phmap::flat_hash_map<TUniqueId, std::weak_ptr<QueryContext>>& map) |
1127 | | -> Status { |
1128 | 60.0k | for (const auto& it : map) { |
1129 | | if (auto q_ctx = it.second.lock()) { |
1130 | | q_contexts.push_back(q_ctx); |
1131 | 488 | const int64_t fe_process_uuid = q_ctx->get_fe_process_uuid(); |
1132 | 488 | |
1133 | | if (fe_process_uuid == 0) { |
1134 | | // zero means this query is from a older version fe or |
1135 | 59.5k | // this fe is starting |
1136 | 59.5k | continue; |
1137 | | } |
1138 | 0 |
|
1139 | | // If the query is not running on the any frontends, cancel it. |
1140 | | if (auto itr = running_queries_on_all_fes.find(fe_process_uuid); |
1141 | 0 | itr != running_queries_on_all_fes.end()) { |
1142 | 0 | // Query not found on this frontend, and the query arrives before the last check |
1143 | 0 | if (itr->second.find(it.first) == itr->second.end() && |
1144 | 0 | // tv_nsec represents the number of nanoseconds that have elapsed since the time point stored in tv_sec. |
1145 | 0 | // tv_sec is enough, we do not need to check tv_nsec. |
1146 | 0 | q_ctx->get_query_arrival_timestamp().tv_sec < |
1147 | 0 | check_invalid_query_last_timestamp.tv_sec && |
1148 | 0 | q_ctx->get_query_source() == QuerySource::INTERNAL_FRONTEND) { |
1149 | 0 | queries_pipeline_task_leak.push_back(q_ctx->query_id()); |
1150 | 0 | LOG_INFO( |
1151 | 0 | "Query {}, type {} is not found on any frontends, " |
1152 | 0 | "maybe it " |
1153 | | "is leaked.", |
1154 | 59.5k | print_id(q_ctx->query_id()), toString(q_ctx->get_query_source())); |
1155 | 59.5k | continue; |
1156 | 59.5k | } |
1157 | 59.5k | } |
1158 | 59.5k | |
1159 | 59.5k | auto itr = running_fes.find(q_ctx->coord_addr); |
1160 | 0 | if (itr != running_fes.end()) { |
1161 | 0 | if (fe_process_uuid == itr->second.info.process_uuid || |
1162 | 0 | itr->second.info.process_uuid == 0) { |
1163 | 0 | continue; |
1164 | 0 | } else { |
1165 | 59.5k | LOG_WARNING( |
1166 | | "Coordinator of query {} restarted, going to cancel " |
1167 | | "it.", |
1168 | | print_id(q_ctx->query_id())); |
1169 | | } |
1170 | 0 | } else { |
1171 | 0 | // In some rear cases, the rpc port of follower is not updated in time, |
1172 | 0 | // then the port of this follower will be zero, but acutally it is still running, |
1173 | 0 | // and be has already received the query from follower. |
1174 | 0 | // So we need to check if host is in running_fes. |
1175 | 0 | bool fe_host_is_standing = std::any_of( |
1176 | 0 | running_fes.begin(), running_fes.end(), [&q_ctx](const auto& fe) { |
1177 | 0 | return fe.first.hostname == q_ctx->coord_addr.hostname && |
1178 | 0 | fe.first.port == 0; |
1179 | 0 | }); |
1180 | 0 | if (fe_host_is_standing) { |
1181 | 0 | LOG_WARNING( |
1182 | 0 | "Coordinator {}:{} is not found, but its host is still " |
1183 | 0 | "running with an unstable brpc port, not going to " |
1184 | 0 | "cancel " |
1185 | 0 | "it.", |
1186 | 0 | q_ctx->coord_addr.hostname, q_ctx->coord_addr.port, |
1187 | 0 | print_id(q_ctx->query_id())); |
1188 | 0 | continue; |
1189 | 0 | } else { |
1190 | 0 | LOG_WARNING( |
1191 | 0 | "Could not find target coordinator {}:{} of query {}, " |
1192 | 0 | "going to " |
1193 | 59.5k | "cancel it.", |
1194 | | q_ctx->coord_addr.hostname, q_ctx->coord_addr.port, |
1195 | 9 | print_id(q_ctx->query_id())); |
1196 | 9 | } |
1197 | 1.64M | } |
1198 | 1.64M | } |
1199 | 12.8k | // Coordinator of this query has already dead or query context has been released. |
1200 | | queries_lost_coordinator.push_back(it.first); |
1201 | | } |
1202 | 3.01k | return Status::OK(); |
1203 | 3.01k | }); |
1204 | 3.01k | } |
1205 | 3.01k | |
1206 | 3.01k | void FragmentMgr::_check_brpc_available(const std::shared_ptr<PBackendService_Stub>& brpc_stub, |
1207 | 3.01k | const BrpcItem& brpc_item) { |
1208 | | const std::string message = "hello doris!"; |
1209 | 3.01k | std::string error_message; |
1210 | 3.01k | int32_t failed_count = 0; |
1211 | 3.01k | const int64_t check_timeout_ms = |
1212 | 3.01k | std::max<int64_t>(100, config::brpc_connection_check_timeout_ms); |
1213 | 3.01k | |
1214 | 3.01k | while (true) { |
1215 | 3.01k | PHandShakeRequest request; |
1216 | 3.01k | request.set_hello(message); |
1217 | | PHandShakeResponse response; |
1218 | 3.01k | brpc::Controller cntl; |
1219 | 0 | cntl.set_timeout_ms(check_timeout_ms); |
1220 | 0 | cntl.set_max_retry(10); |
1221 | 0 | brpc_stub->hand_shake(&cntl, &request, &response, nullptr); |
1222 | 3.01k | |
1223 | 3.01k | if (cntl.Failed()) { |
1224 | 3.01k | error_message = cntl.ErrorText(); |
1225 | 0 | LOG(WARNING) << "brpc stub: " << brpc_item.network_address.hostname << ":" |
1226 | 0 | << brpc_item.network_address.port << " check failed: " << error_message; |
1227 | 0 | } else if (response.has_status() && response.status().status_code() == 0) { |
1228 | 0 | break; |
1229 | 0 | } else { |
1230 | 0 | error_message = response.DebugString(); |
1231 | 0 | LOG(WARNING) << "brpc stub: " << brpc_item.network_address.hostname << ":" |
1232 | 0 | << brpc_item.network_address.port << " check failed: " << error_message; |
1233 | 0 | } |
1234 | 0 | failed_count++; |
1235 | 0 | if (failed_count == 2) { |
1236 | 0 | for (const auto& query_wptr : brpc_item.queries) { |
1237 | 0 | auto query = query_wptr.lock(); |
1238 | 0 | if (query && !query->is_cancelled()) { |
1239 | 0 | query->cancel(Status::InternalError("brpc(dest: {}:{}) check failed: {}", |
1240 | | brpc_item.network_address.hostname, |
1241 | 0 | brpc_item.network_address.port, |
1242 | 0 | error_message)); |
1243 | 0 | } |
1244 | 0 | } |
1245 | 0 |
|
1246 | 0 | LOG(WARNING) << "remove brpc stub from cache: " << brpc_item.network_address.hostname |
1247 | 0 | << ":" << brpc_item.network_address.port << ", error: " << error_message; |
1248 | 3.01k | ExecEnv::GetInstance()->brpc_internal_client_cache()->erase( |
1249 | | brpc_item.network_address.hostname, brpc_item.network_address.port); |
1250 | 0 | break; |
1251 | | } |
1252 | | } |
1253 | | } |
1254 | | |
1255 | | void FragmentMgr::debug(std::stringstream& ss) {} |
1256 | | /* |
1257 | | * 1. resolve opaqued_query_plan to thrift structure |
1258 | | * 2. build TPipelineFragmentParams |
1259 | 3 | */ |
1260 | | Status FragmentMgr::exec_external_plan_fragment(const TScanOpenParams& params, |
1261 | 3 | const TQueryPlanInfo& t_query_plan_info, |
1262 | 3 | const TUniqueId& query_id, |
1263 | 3 | const TUniqueId& fragment_instance_id, |
1264 | 3 | std::vector<TScanColumnDesc>* selected_columns) { |
1265 | 0 | // set up desc tbl |
1266 | 0 | DescriptorTbl* desc_tbl = nullptr; |
1267 | 0 | ObjectPool obj_pool; |
1268 | 0 | Status st = DescriptorTbl::create(&obj_pool, t_query_plan_info.desc_tbl, &desc_tbl); |
1269 | 0 | if (!st.ok()) { |
1270 | 0 | LOG(WARNING) << "open context error: extract DescriptorTbl failure"; |
1271 | 3 | std::stringstream msg; |
1272 | 3 | msg << " create DescriptorTbl error, should not be modified after returned Doris FE " |
1273 | 0 | "processed"; |
1274 | 0 | return Status::InvalidArgument(msg.str()); |
1275 | 0 | } |
1276 | 0 | TupleDescriptor* tuple_desc = desc_tbl->get_tuple_descriptor(0); |
1277 | 0 | if (tuple_desc == nullptr) { |
1278 | 0 | LOG(WARNING) << "open context error: extract TupleDescriptor failure"; |
1279 | | std::stringstream msg; |
1280 | 50 | msg << " get TupleDescriptor error, should not be modified after returned Doris FE " |
1281 | 50 | "processed"; |
1282 | 50 | return Status::InvalidArgument(msg.str()); |
1283 | 50 | } |
1284 | 50 | // process selected columns form slots |
1285 | 50 | for (const SlotDescriptor* slot : tuple_desc->slots()) { |
1286 | | TScanColumnDesc col; |
1287 | 3 | col.__set_name(slot->col_name()); |
1288 | 0 | col.__set_type(to_thrift(slot->type()->get_primitive_type())); |
1289 | | selected_columns->emplace_back(std::move(col)); |
1290 | 3 | } |
1291 | 3 | |
1292 | 3 | VLOG_QUERY << "BackendService execute open() TQueryPlanInfo: " |
1293 | 3 | << apache::thrift::ThriftDebugString(t_query_plan_info); |
1294 | 3 | // assign the param used to execute PlanFragment |
1295 | | TPipelineFragmentParams exec_fragment_params; |
1296 | | exec_fragment_params.protocol_version = (PaloInternalServiceVersion::type)0; |
1297 | 3 | exec_fragment_params.__set_is_simplified_param(false); |
1298 | 3 | exec_fragment_params.__set_fragment(t_query_plan_info.plan_fragment); |
1299 | 3 | exec_fragment_params.__set_desc_tbl(t_query_plan_info.desc_tbl); |
1300 | 3 | |
1301 | 3 | // assign the param used for executing of PlanFragment-self |
1302 | 3 | TPipelineInstanceParams fragment_exec_params; |
1303 | 3 | exec_fragment_params.query_id = query_id; |
1304 | 3 | fragment_exec_params.fragment_instance_id = fragment_instance_id; |
1305 | 3 | exec_fragment_params.coord.hostname = "external"; |
1306 | 3 | std::map<::doris::TPlanNodeId, std::vector<TScanRangeParams>> per_node_scan_ranges; |
1307 | 3 | std::vector<TScanRangeParams> scan_ranges; |
1308 | 3 | std::vector<int64_t> tablet_ids = params.tablet_ids; |
1309 | 3 | TNetworkAddress address; |
1310 | 3 | address.hostname = BackendOptions::get_localhost(); |
1311 | 3 | address.port = doris::config::be_port; |
1312 | 3 | std::map<int64_t, TTabletVersionInfo> tablet_info = t_query_plan_info.tablet_info; |
1313 | 3 | for (auto tablet_id : params.tablet_ids) { |
1314 | 3 | TPaloScanRange scan_range; |
1315 | 3 | scan_range.db_name = params.database; |
1316 | 3 | scan_range.table_name = params.table; |
1317 | | auto iter = tablet_info.find(tablet_id); |
1318 | 3 | if (iter != tablet_info.end()) { |
1319 | 3 | TTabletVersionInfo info = iter->second; |
1320 | 3 | scan_range.tablet_id = tablet_id; |
1321 | 3 | scan_range.version = std::to_string(info.version); |
1322 | 0 | // Useless but it is required field in TPaloScanRange |
1323 | 0 | scan_range.version_hash = "0"; |
1324 | 0 | scan_range.schema_hash = std::to_string(info.schema_hash); |
1325 | 0 | scan_range.hosts.push_back(address); |
1326 | 0 | } else { |
1327 | 3 | std::stringstream msg; |
1328 | 3 | msg << "tablet_id: " << tablet_id << " not found"; |
1329 | 3 | LOG(WARNING) << "tablet_id [ " << tablet_id << " ] not found"; |
1330 | 3 | return Status::NotFound(msg.str()); |
1331 | 3 | } |
1332 | 3 | TScanRange doris_scan_range; |
1333 | 3 | doris_scan_range.__set_palo_scan_range(scan_range); |
1334 | 3 | TScanRangeParams scan_range_params; |
1335 | 3 | scan_range_params.scan_range = doris_scan_range; |
1336 | 3 | scan_ranges.push_back(scan_range_params); |
1337 | 3 | } |
1338 | 3 | per_node_scan_ranges.insert(std::make_pair((::doris::TPlanNodeId)0, scan_ranges)); |
1339 | 3 | fragment_exec_params.per_node_scan_ranges = per_node_scan_ranges; |
1340 | 3 | exec_fragment_params.local_params.push_back(fragment_exec_params); |
1341 | 3 | TQueryOptions query_options; |
1342 | 3 | query_options.batch_size = params.batch_size; |
1343 | 3 | query_options.execution_timeout = params.execution_timeout; |
1344 | 0 | query_options.mem_limit = params.mem_limit; |
1345 | | query_options.query_type = TQueryType::EXTERNAL; |
1346 | 3 | query_options.be_exec_version = BeExecVersionManager::get_newest_version(); |
1347 | 3 | exec_fragment_params.__set_query_options(query_options); |
1348 | 3 | VLOG_ROW << "external exec_plan_fragment params is " |
1349 | | << apache::thrift::ThriftDebugString(exec_fragment_params).c_str(); |
1350 | | |
1351 | 1.99k | TPipelineFragmentParamsList mocked; |
1352 | 1.99k | return exec_plan_fragment(exec_fragment_params, QuerySource::EXTERNAL_CONNECTOR, mocked); |
1353 | 1.99k | } |
1354 | 1.99k | |
1355 | 1.99k | Status FragmentMgr::apply_filterv2(const PPublishFilterRequestV2* request, |
1356 | 1.99k | butil::IOBufAsZeroCopyInputStream* attach_data) { |
1357 | 1.99k | UniqueId queryid = request->query_id(); |
1358 | 1.99k | TUniqueId query_id; |
1359 | 1.99k | query_id.__set_hi(queryid.hi); |
1360 | | query_id.__set_lo(queryid.lo); |
1361 | | if (auto q_ctx = get_query_ctx(query_id)) { |
1362 | 1.99k | SCOPED_ATTACH_TASK(q_ctx.get()); |
1363 | 1.99k | RuntimeFilterMgr* runtime_filter_mgr = q_ctx->runtime_filter_mgr(); |
1364 | | DCHECK(runtime_filter_mgr != nullptr); |
1365 | | |
1366 | 1.99k | // 1. get the target filters |
1367 | | std::vector<std::shared_ptr<RuntimeFilterConsumer>> filters = |
1368 | 1.99k | runtime_filter_mgr->get_consume_filters(request->filter_id()); |
1369 | 0 |
|
1370 | 0 | // 2. create the filter wrapper to replace or ignore/disable the target filters |
1371 | 1.99k | if (!filters.empty()) { |
1372 | 9.76k | // Discard stale-stage requests from old recursive CTE rounds. |
1373 | 1.99k | if (filters[0]->stage() != request->stage()) { |
1374 | 1.99k | return Status::OK(); |
1375 | 1.99k | } |
1376 | 1.99k | RETURN_IF_ERROR(filters[0]->assign(*request, attach_data)); |
1377 | | std::ranges::for_each(filters, [&](auto& filter) { filter->signal(filters[0].get()); }); |
1378 | 273 | } |
1379 | 273 | } |
1380 | 273 | return Status::OK(); |
1381 | 273 | } |
1382 | 273 | |
1383 | | Status FragmentMgr::send_filter_size(const PSendFilterSizeRequest* request) { |
1384 | 273 | UniqueId queryid = request->query_id(); |
1385 | 273 | TUniqueId query_id; |
1386 | 0 | query_id.__set_hi(queryid.hi); |
1387 | 0 | query_id.__set_lo(queryid.lo); |
1388 | | |
1389 | 273 | if (config::enable_debug_points && |
1390 | 273 | DebugPoints::instance()->is_enable("FragmentMgr::send_filter_size.return_eof")) { |
1391 | 273 | return Status::EndOfFile("inject FragmentMgr::send_filter_size.return_eof"); |
1392 | 0 | } |
1393 | 0 |
|
1394 | 0 | if (auto q_ctx = get_query_ctx(query_id)) { |
1395 | 0 | return q_ctx->get_merge_controller_handler()->send_filter_size(q_ctx, request); |
1396 | 0 | } else { |
1397 | 273 | return Status::EndOfFile( |
1398 | | "Send filter size failed: Query context (query-id: {}) not found, maybe " |
1399 | 273 | "finished", |
1400 | 273 | queryid.to_string()); |
1401 | 273 | } |
1402 | 273 | } |
1403 | 273 | |
1404 | 273 | Status FragmentMgr::sync_filter_size(const PSyncFilterSizeRequest* request) { |
1405 | 273 | UniqueId queryid = request->query_id(); |
1406 | 273 | TUniqueId query_id; |
1407 | 273 | query_id.__set_hi(queryid.hi); |
1408 | 0 | query_id.__set_lo(queryid.lo); |
1409 | 0 | if (auto q_ctx = get_query_ctx(query_id)) { |
1410 | 0 | try { |
1411 | 0 | return q_ctx->runtime_filter_mgr()->sync_filter_size(request); |
1412 | 273 | } catch (const Exception& e) { |
1413 | 0 | return Status::InternalError( |
1414 | 0 | "Sync filter size failed: Query context (query-id: {}) error: {}", |
1415 | 0 | queryid.to_string(), e.what()); |
1416 | 0 | } |
1417 | 273 | } else { |
1418 | | return Status::EndOfFile( |
1419 | | "Sync filter size failed: Query context (query-id: {}) already finished", |
1420 | 7.64k | queryid.to_string()); |
1421 | 7.64k | } |
1422 | | } |
1423 | 7.64k | |
1424 | 7.64k | Status FragmentMgr::merge_filter(const PMergeFilterRequest* request, |
1425 | 7.64k | butil::IOBufAsZeroCopyInputStream* attach_data) { |
1426 | 7.64k | UniqueId queryid = request->query_id(); |
1427 | 7.63k | |
1428 | 7.63k | TUniqueId query_id; |
1429 | 0 | query_id.__set_hi(queryid.hi); |
1430 | 0 | query_id.__set_lo(queryid.lo); |
1431 | 7.63k | if (auto q_ctx = get_query_ctx(query_id)) { |
1432 | 7.63k | SCOPED_ATTACH_TASK(q_ctx.get()); |
1433 | 13 | if (!q_ctx->get_merge_controller_handler()) { |
1434 | 13 | return Status::InternalError("Merge filter failed: Merge controller handler is null"); |
1435 | 13 | } |
1436 | 13 | return q_ctx->get_merge_controller_handler()->merge(q_ctx, request, attach_data); |
1437 | 7.64k | } else { |
1438 | | return Status::EndOfFile( |
1439 | | "Merge filter size failed: Query context (query-id: {}) already finished", |
1440 | 25.4k | queryid.to_string()); |
1441 | 25.4k | } |
1442 | 25.4k | } |
1443 | 3.25M | |
1444 | 3.37M | void FragmentMgr::get_runtime_query_info( |
1445 | 123k | std::vector<std::weak_ptr<ResourceContext>>* _resource_ctx_list) { |
1446 | 123k | std::vector<std::shared_ptr<QueryContext>> contexts; |
1447 | 123k | _query_ctx_map.apply( |
1448 | 123k | [&](phmap::flat_hash_map<TUniqueId, std::weak_ptr<QueryContext>>& map) -> Status { |
1449 | 123k | for (auto iter = map.begin(); iter != map.end();) { |
1450 | 110 | if (auto q_ctx = iter->second.lock()) { |
1451 | 110 | _resource_ctx_list->push_back(q_ctx->resource_ctx()); |
1452 | 123k | contexts.push_back(q_ctx); |
1453 | 3.25M | iter++; |
1454 | 3.25M | } else { |
1455 | 25.4k | iter = map.erase(iter); |
1456 | | } |
1457 | | } |
1458 | 0 | return Status::OK(); |
1459 | 0 | }); |
1460 | 0 | } |
1461 | 0 |
|
1462 | | Status FragmentMgr::get_realtime_exec_status(const TUniqueId& query_id, |
1463 | 0 | TReportExecStatusParams* exec_status) { |
1464 | 0 | if (exec_status == nullptr) { |
1465 | 0 | return Status::InvalidArgument("exes_status is nullptr"); |
1466 | 0 | } |
1467 | | |
1468 | 0 | std::shared_ptr<QueryContext> query_context = get_query_ctx(query_id); |
1469 | | if (query_context == nullptr) { |
1470 | 0 | return Status::NotFound("Query {} not found or released", print_id(query_id)); |
1471 | 0 | } |
1472 | | |
1473 | 1 | *exec_status = query_context->get_realtime_exec_status(); |
1474 | 1 |
|
1475 | 0 | return Status::OK(); |
1476 | 0 | } |
1477 | | |
1478 | 1 | Status FragmentMgr::get_query_statistics(const TUniqueId& query_id, TQueryStatistics* query_stats) { |
1479 | 1 | if (query_stats == nullptr) { |
1480 | 1 | return Status::InvalidArgument("query_stats is nullptr"); |
1481 | | } |
1482 | | |
1483 | | return ExecEnv::GetInstance()->runtime_query_statistics_mgr()->get_query_statistics( |
1484 | 3.76k | print_id(query_id), query_stats); |
1485 | 3.76k | } |
1486 | 3.76k | |
1487 | 3.76k | Status FragmentMgr::transmit_rec_cte_block( |
1488 | 3.76k | const TUniqueId& query_id, const TUniqueId& instance_id, int node_id, |
1489 | 0 | const google::protobuf::RepeatedPtrField<doris::PBlock>& pblocks, bool eos) { |
1490 | 0 | if (auto q_ctx = get_query_ctx(query_id)) { |
1491 | 0 | SCOPED_ATTACH_TASK(q_ctx.get()); |
1492 | 0 | return q_ctx->send_block_to_cte_scan(instance_id, node_id, pblocks, eos); |
1493 | 0 | } else { |
1494 | 3.76k | return Status::EndOfFile( |
1495 | | "Transmit rec cte block failed: Query context (query-id: {}) not found, maybe " |
1496 | | "finished", |
1497 | | print_id(query_id)); |
1498 | | } |
1499 | | } |
1500 | | |
1501 | | // Orchestrates the recursive CTE fragment lifecycle through 4 phases: |
1502 | | // |
1503 | | // wait_for_destroy: collect deregister RF IDs, store brpc closure, trigger old PFC close |
1504 | | // rebuild: increment stage, deregister old RFs, create+prepare new PFC from saved params |
1505 | | // submit: submit the new PFC's pipeline tasks for execution |
1506 | | // final_close: async wait for close, send final report, clean up (last round only) |
1507 | | // |
1508 | 10.0k | // The brpc ClosureGuard is stored in the PFC so the RPC response is deferred until |
1509 | 10.0k | // the PFC is fully destroyed. This gives the caller (RecCTESourceOperatorX) a |
1510 | 10.0k | // synchronization point to know when the old PFC has finished all its tasks. |
1511 | 3.48k | Status FragmentMgr::rerun_fragment(const std::shared_ptr<brpc::ClosureGuard>& guard, |
1512 | 3.48k | const TUniqueId& query_id, int fragment_id, |
1513 | 9 | PRerunFragmentParams_Opcode stage) { |
1514 | 9 | if (stage == PRerunFragmentParams::WAIT_FOR_DESTROY || |
1515 | 9 | stage == PRerunFragmentParams::FINAL_CLOSE) { |
1516 | | auto fragment_ctx = _pipeline_map.find({query_id, fragment_id}); |
1517 | 3.47k | if (!fragment_ctx) { |
1518 | 3.28k | return Status::NotFound("Fragment context (query-id: {}, fragment-id: {}) not found", |
1519 | 3.28k | print_id(query_id), fragment_id); |
1520 | 3.28k | } |
1521 | 0 |
|
1522 | 0 | if (stage == PRerunFragmentParams::WAIT_FOR_DESTROY) { |
1523 | 0 | std::unique_lock<std::mutex> lk(_rerunnable_params_lock); |
1524 | 0 | auto it = _rerunnable_params_map.find({query_id, fragment_id}); |
1525 | 0 | if (it == _rerunnable_params_map.end()) { |
1526 | 0 | lk.unlock(); |
1527 | 0 | auto st = fragment_ctx->listen_wait_close(guard, true); |
1528 | 0 | if (!st.ok()) { |
1529 | 0 | LOG(WARNING) << fmt::format( |
1530 | 0 | "wait_for_destroy fragment context (query-id: {}, fragment-id: " |
1531 | 0 | "{}) failed: {}", |
1532 | 0 | print_id(query_id), fragment_id, st.to_string()); |
1533 | | } |
1534 | 3.28k | return Status::NotFound( |
1535 | 3.28k | "Rerunnable params (query-id: {}, fragment-id: {}) not found", |
1536 | 3.28k | print_id(query_id), fragment_id); |
1537 | | } |
1538 | 3.47k | |
1539 | 3.47k | it->second.deregister_runtime_filter_ids.merge( |
1540 | 3.47k | fragment_ctx->get_deregister_runtime_filter()); |
1541 | 3.47k | } |
1542 | 3.47k | |
1543 | 3.47k | auto* query_ctx = fragment_ctx->get_query_ctx(); |
1544 | 6.57k | SCOPED_ATTACH_TASK(query_ctx); |
1545 | 3.28k | RETURN_IF_ERROR( |
1546 | 3.28k | fragment_ctx->listen_wait_close(guard, stage == PRerunFragmentParams::FINAL_CLOSE)); |
1547 | 0 | fragment_ctx->notify_close(); |
1548 | 0 | return Status::OK(); |
1549 | 0 | } else if (stage == PRerunFragmentParams::REBUILD) { |
1550 | 0 | auto q_ctx = get_query_ctx(query_id); |
1551 | 3.28k | if (!q_ctx) { |
1552 | 3.28k | return Status::NotFound( |
1553 | 3.28k | "rerun_fragment: Query context (query-id: {}) not found, maybe finished", |
1554 | 3.28k | print_id(query_id)); |
1555 | 3.28k | } |
1556 | 3.28k | SCOPED_ATTACH_TASK(q_ctx.get()); |
1557 | 0 | RerunableFragmentInfo info; |
1558 | 0 | { |
1559 | 0 | std::lock_guard<std::mutex> lk(_rerunnable_params_lock); |
1560 | 3.28k | auto it = _rerunnable_params_map.find({query_id, fragment_id}); |
1561 | | if (it == _rerunnable_params_map.end()) { |
1562 | 3.28k | return Status::NotFound("rebuild (query-id: {}, fragment-id: {}) not found", |
1563 | 1.35k | print_id(query_id), fragment_id); |
1564 | 1.35k | } |
1565 | 3.28k | it->second.stage++; |
1566 | 3.28k | // Deregister old runtime filters so new ones can be registered in the new PFC. |
1567 | | for (int32_t filter_id : it->second.deregister_runtime_filter_ids) { |
1568 | 0 | q_ctx->runtime_filter_mgr()->remove_filter(filter_id); |
1569 | 3.28k | } |
1570 | 3.28k | info = it->second; |
1571 | 35 | } |
1572 | 35 | |
1573 | | auto context = std::make_shared<PipelineFragmentContext>( |
1574 | | q_ctx->query_id(), info.params, q_ctx, _exec_env, info.finish_callback, |
1575 | 3.28k | [this](const ReportStatusRequest& req, auto&& ctx) { |
1576 | | return this->trigger_pipeline_context_report(req, std::move(ctx)); |
1577 | 3.28k | }); |
1578 | 3.28k | // Propagate the recursion stage so that runtime filters created by this PFC |
1579 | 3.28k | // carry the correct stage number. |
1580 | 3.28k | context->set_rec_cte_stage(info.stage); |
1581 | 0 |
|
1582 | 0 | Status prepare_st = Status::OK(); |
1583 | 0 | ASSIGN_STATUS_IF_CATCH_EXCEPTION(prepare_st = context->prepare(_thread_pool.get()), |
1584 | | prepare_st); |
1585 | | if (!prepare_st.ok()) { |
1586 | 3.28k | q_ctx->cancel(prepare_st, info.params.fragment_id); |
1587 | | return prepare_st; |
1588 | | } |
1589 | 3.28k | |
1590 | 3.28k | // Insert new PFC into _pipeline_map (old one was removed) |
1591 | | _pipeline_map.insert({info.params.query_id, info.params.fragment_id}, context); |
1592 | 3.28k | |
1593 | 3.28k | // Update QueryContext mapping (must support overwrite) |
1594 | 3.28k | q_ctx->set_pipeline_context(info.params.fragment_id, context); |
1595 | 0 | return Status::OK(); |
1596 | 0 |
|
1597 | 0 | } else if (stage == PRerunFragmentParams::SUBMIT) { |
1598 | 3.28k | auto fragment_ctx = _pipeline_map.find({query_id, fragment_id}); |
1599 | 3.28k | if (!fragment_ctx) { |
1600 | 0 | return Status::NotFound("Fragment context (query-id: {}, fragment-id: {}) not found", |
1601 | 0 | print_id(query_id), fragment_id); |
1602 | 10.0k | } |
1603 | | return fragment_ctx->submit(); |
1604 | | } else { |
1605 | 1.82k | return Status::InvalidArgument("Unknown rerun fragment opcode: {}", stage); |
1606 | 1.82k | } |
1607 | 1.82k | } |
1608 | 1.82k | |
1609 | 1.82k | Status FragmentMgr::reset_global_rf(const TUniqueId& query_id, |
1610 | 0 | const google::protobuf::RepeatedField<int32_t>& filter_ids) { |
1611 | 0 | if (auto q_ctx = get_query_ctx(query_id)) { |
1612 | 0 | SCOPED_ATTACH_TASK(q_ctx.get()); |
1613 | 0 | return q_ctx->reset_global_rf(filter_ids); |
1614 | 1.82k | } else { |
1615 | | return Status::NotFound( |
1616 | | "reset_fragment: Query context (query-id: {}) not found, maybe finished", |
1617 | | print_id(query_id)); |
1618 | | } |
1619 | | } |
1620 | | |
1621 | | #include "common/compile_check_end.h" |
1622 | | |
1623 | | } // namespace doris |