be/src/exec/pipeline/dependency.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #pragma once |
19 | | |
20 | | #ifdef __APPLE__ |
21 | | #include <netinet/in.h> |
22 | | #include <sys/_types/_u_int.h> |
23 | | #endif |
24 | | |
25 | | #include <concurrentqueue.h> |
26 | | #include <gen_cpp/internal_service.pb.h> |
27 | | #include <sqltypes.h> |
28 | | |
29 | | #include <atomic> |
30 | | #include <functional> |
31 | | #include <memory> |
32 | | #include <mutex> |
33 | | #include <thread> |
34 | | #include <utility> |
35 | | |
36 | | #include "common/config.h" |
37 | | #include "common/logging.h" |
38 | | #include "core/block/block.h" |
39 | | #include "core/types.h" |
40 | | #include "exec/common/agg_context.h" |
41 | | #include "exec/common/agg_utils.h" |
42 | | #include "exec/common/join_utils.h" |
43 | | #include "exec/common/set_utils.h" |
44 | | #include "exec/operator/data_queue.h" |
45 | | #include "exec/operator/join/process_hash_table_probe.h" |
46 | | #include "exec/sort/partition_sorter.h" |
47 | | #include "exec/sort/sorter.h" |
48 | | #include "exec/spill/spill_file.h" |
49 | | #include "runtime/runtime_profile_counter_names.h" |
50 | | #include "util/brpc_closure.h" |
51 | | #include "util/stack_util.h" |
52 | | |
53 | | namespace doris { |
54 | | class AggFnEvaluator; |
55 | | class VSlotRef; |
56 | | } // namespace doris |
57 | | |
58 | | namespace doris { |
59 | | #include "common/compile_check_begin.h" |
60 | | class Dependency; |
61 | | class PipelineTask; |
62 | | struct BasicSharedState; |
63 | | using DependencySPtr = std::shared_ptr<Dependency>; |
64 | | class LocalExchangeSourceLocalState; |
65 | | |
66 | | static constexpr auto SLOW_DEPENDENCY_THRESHOLD = 60 * 1000L * 1000L * 1000L; |
67 | | static constexpr auto TIME_UNIT_DEPENDENCY_LOG = 30 * 1000L * 1000L * 1000L; |
68 | | static_assert(TIME_UNIT_DEPENDENCY_LOG < SLOW_DEPENDENCY_THRESHOLD); |
69 | | |
70 | | struct BasicSharedState { |
71 | | ENABLE_FACTORY_CREATOR(BasicSharedState) |
72 | | |
73 | | template <class TARGET> |
74 | 96.5k | TARGET* cast() { |
75 | 96.5k | DCHECK(dynamic_cast<TARGET*>(this)) |
76 | 0 | << " Mismatch type! Current type is " << typeid(*this).name() |
77 | 0 | << " and expect type is" << typeid(TARGET).name(); |
78 | 96.5k | return reinterpret_cast<TARGET*>(this); |
79 | 96.5k | } _ZN5doris16BasicSharedState4castINS_19HashJoinSharedStateEEEPT_v Line | Count | Source | 74 | 96.0k | TARGET* cast() { | 75 | 96.0k | DCHECK(dynamic_cast<TARGET*>(this)) | 76 | 0 | << " Mismatch type! Current type is " << typeid(*this).name() | 77 | 0 | << " and expect type is" << typeid(TARGET).name(); | 78 | 96.0k | return reinterpret_cast<TARGET*>(this); | 79 | 96.0k | } |
_ZN5doris16BasicSharedState4castINS_30PartitionedHashJoinSharedStateEEEPT_v Line | Count | Source | 74 | 3 | TARGET* cast() { | 75 | 3 | DCHECK(dynamic_cast<TARGET*>(this)) | 76 | 0 | << " Mismatch type! Current type is " << typeid(*this).name() | 77 | 0 | << " and expect type is" << typeid(TARGET).name(); | 78 | 3 | return reinterpret_cast<TARGET*>(this); | 79 | 3 | } |
_ZN5doris16BasicSharedState4castINS_15SortSharedStateEEEPT_v Line | Count | Source | 74 | 49 | TARGET* cast() { | 75 | 49 | DCHECK(dynamic_cast<TARGET*>(this)) | 76 | 0 | << " Mismatch type! Current type is " << typeid(*this).name() | 77 | 0 | << " and expect type is" << typeid(TARGET).name(); | 78 | 49 | return reinterpret_cast<TARGET*>(this); | 79 | 49 | } |
_ZN5doris16BasicSharedState4castINS_20SpillSortSharedStateEEEPT_v Line | Count | Source | 74 | 28 | TARGET* cast() { | 75 | 28 | DCHECK(dynamic_cast<TARGET*>(this)) | 76 | 0 | << " Mismatch type! Current type is " << typeid(*this).name() | 77 | 0 | << " and expect type is" << typeid(TARGET).name(); | 78 | 28 | return reinterpret_cast<TARGET*>(this); | 79 | 28 | } |
Unexecuted instantiation: _ZN5doris16BasicSharedState4castINS_25NestedLoopJoinSharedStateEEEPT_v _ZN5doris16BasicSharedState4castINS_19AnalyticSharedStateEEEPT_v Line | Count | Source | 74 | 18 | TARGET* cast() { | 75 | 18 | DCHECK(dynamic_cast<TARGET*>(this)) | 76 | 0 | << " Mismatch type! Current type is " << typeid(*this).name() | 77 | 0 | << " and expect type is" << typeid(TARGET).name(); | 78 | 18 | return reinterpret_cast<TARGET*>(this); | 79 | 18 | } |
_ZN5doris16BasicSharedState4castINS_14AggSharedStateEEEPT_v Line | Count | Source | 74 | 84 | TARGET* cast() { | 75 | 84 | DCHECK(dynamic_cast<TARGET*>(this)) | 76 | 0 | << " Mismatch type! Current type is " << typeid(*this).name() | 77 | 0 | << " and expect type is" << typeid(TARGET).name(); | 78 | 84 | return reinterpret_cast<TARGET*>(this); | 79 | 84 | } |
_ZN5doris16BasicSharedState4castINS_25PartitionedAggSharedStateEEEPT_v Line | Count | Source | 74 | 30 | TARGET* cast() { | 75 | 30 | DCHECK(dynamic_cast<TARGET*>(this)) | 76 | 0 | << " Mismatch type! Current type is " << typeid(*this).name() | 77 | 0 | << " and expect type is" << typeid(TARGET).name(); | 78 | 30 | return reinterpret_cast<TARGET*>(this); | 79 | 30 | } |
_ZN5doris16BasicSharedState4castINS_16UnionSharedStateEEEPT_v Line | Count | Source | 74 | 4 | TARGET* cast() { | 75 | 4 | DCHECK(dynamic_cast<TARGET*>(this)) | 76 | 0 | << " Mismatch type! Current type is " << typeid(*this).name() | 77 | 0 | << " and expect type is" << typeid(TARGET).name(); | 78 | 4 | return reinterpret_cast<TARGET*>(this); | 79 | 4 | } |
_ZN5doris16BasicSharedState4castINS_28PartitionSortNodeSharedStateEEEPT_v Line | Count | Source | 74 | 204 | TARGET* cast() { | 75 | 204 | DCHECK(dynamic_cast<TARGET*>(this)) | 76 | 0 | << " Mismatch type! Current type is " << typeid(*this).name() | 77 | 0 | << " and expect type is" << typeid(TARGET).name(); | 78 | 204 | return reinterpret_cast<TARGET*>(this); | 79 | 204 | } |
Unexecuted instantiation: _ZN5doris16BasicSharedState4castINS_20MultiCastSharedStateEEEPT_v _ZN5doris16BasicSharedState4castINS_14SetSharedStateEEEPT_v Line | Count | Source | 74 | 39 | TARGET* cast() { | 75 | 39 | DCHECK(dynamic_cast<TARGET*>(this)) | 76 | 0 | << " Mismatch type! Current type is " << typeid(*this).name() | 77 | 0 | << " and expect type is" << typeid(TARGET).name(); | 78 | 39 | return reinterpret_cast<TARGET*>(this); | 79 | 39 | } |
Unexecuted instantiation: _ZN5doris16BasicSharedState4castINS_24LocalExchangeSharedStateEEEPT_v _ZN5doris16BasicSharedState4castIS0_EEPT_v Line | Count | Source | 74 | 16 | TARGET* cast() { | 75 | 16 | DCHECK(dynamic_cast<TARGET*>(this)) | 76 | 0 | << " Mismatch type! Current type is " << typeid(*this).name() | 77 | 0 | << " and expect type is" << typeid(TARGET).name(); | 78 | 16 | return reinterpret_cast<TARGET*>(this); | 79 | 16 | } |
_ZN5doris16BasicSharedState4castINS_20DataQueueSharedStateEEEPT_v Line | Count | Source | 74 | 6 | TARGET* cast() { | 75 | 6 | DCHECK(dynamic_cast<TARGET*>(this)) | 76 | 0 | << " Mismatch type! Current type is " << typeid(*this).name() | 77 | 0 | << " and expect type is" << typeid(TARGET).name(); | 78 | 6 | return reinterpret_cast<TARGET*>(this); | 79 | 6 | } |
Unexecuted instantiation: _ZN5doris16BasicSharedState4castINS_17RecCTESharedStateEEEPT_v |
80 | | template <class TARGET> |
81 | | const TARGET* cast() const { |
82 | | DCHECK(dynamic_cast<const TARGET*>(this)) |
83 | | << " Mismatch type! Current type is " << typeid(*this).name() |
84 | | << " and expect type is" << typeid(TARGET).name(); |
85 | | return reinterpret_cast<const TARGET*>(this); |
86 | | } |
87 | | std::vector<DependencySPtr> source_deps; |
88 | | std::vector<DependencySPtr> sink_deps; |
89 | | int id = 0; |
90 | | std::set<int> related_op_ids; |
91 | | |
92 | 72.4k | virtual ~BasicSharedState() = default; |
93 | | |
94 | | void create_source_dependencies(int num_sources, int operator_id, int node_id, |
95 | | const std::string& name); |
96 | | Dependency* create_source_dependency(int operator_id, int node_id, const std::string& name); |
97 | | |
98 | | Dependency* create_sink_dependency(int dest_id, int node_id, const std::string& name); |
99 | 24 | std::vector<DependencySPtr> get_dep_by_channel_id(int channel_id) { |
100 | 24 | DCHECK_LT(channel_id, source_deps.size()); |
101 | 24 | return {source_deps[channel_id]}; |
102 | 24 | } |
103 | | }; |
104 | | |
105 | | class Dependency : public std::enable_shared_from_this<Dependency> { |
106 | | public: |
107 | | ENABLE_FACTORY_CREATOR(Dependency); |
108 | | Dependency(int id, int node_id, std::string name, bool ready = false) |
109 | 485k | : _id(id), _node_id(node_id), _name(std::move(name)), _ready(ready) {} |
110 | 485k | virtual ~Dependency() = default; |
111 | | |
112 | 0 | [[nodiscard]] int id() const { return _id; } |
113 | 96.5k | [[nodiscard]] virtual std::string name() const { return _name; } |
114 | 4 | BasicSharedState* shared_state() { return _shared_state; } |
115 | 144k | void set_shared_state(BasicSharedState* shared_state) { _shared_state = shared_state; } |
116 | | virtual std::string debug_string(int indentation_level = 0); |
117 | 826M | bool ready() const { return _ready; } |
118 | | |
119 | | // Start the watcher. We use it to count how long this dependency block the current pipeline task. |
120 | 25 | void start_watcher() { _watcher.start(); } |
121 | 96.1k | [[nodiscard]] int64_t watcher_elapse_time() { return _watcher.elapsed_time(); } |
122 | | |
123 | | // Which dependency current pipeline task is blocked by. `nullptr` if this dependency is ready. |
124 | | [[nodiscard]] Dependency* is_blocked_by(std::shared_ptr<PipelineTask> task = nullptr); |
125 | | // Notify downstream pipeline tasks this dependency is ready. |
126 | | void set_ready(); |
127 | 48.6k | void set_ready_to_read(int channel_id = 0) { |
128 | 48.6k | DCHECK_LT(channel_id, _shared_state->source_deps.size()) << debug_string(); |
129 | 48.6k | _shared_state->source_deps[channel_id]->set_ready(); |
130 | 48.6k | } |
131 | 0 | void set_ready_to_write() { |
132 | 0 | DCHECK_EQ(_shared_state->sink_deps.size(), 1) << debug_string(); |
133 | 0 | _shared_state->sink_deps.front()->set_ready(); |
134 | 0 | } |
135 | | |
136 | | // Notify downstream pipeline tasks this dependency is blocked. |
137 | 1.90k | void block() { |
138 | 1.90k | if (_always_ready) { |
139 | 11 | return; |
140 | 11 | } |
141 | 1.89k | std::unique_lock<std::mutex> lc(_always_ready_lock); |
142 | 1.89k | if (_always_ready) { |
143 | 0 | return; |
144 | 0 | } |
145 | 1.89k | _ready = false; |
146 | 1.89k | } |
147 | | |
148 | 141 | void set_always_ready() { |
149 | 141 | if (_always_ready) { |
150 | 39 | return; |
151 | 39 | } |
152 | 102 | std::unique_lock<std::mutex> lc(_always_ready_lock); |
153 | 102 | if (_always_ready) { |
154 | 0 | return; |
155 | 0 | } |
156 | 102 | _always_ready = true; |
157 | 102 | set_ready(); |
158 | 102 | } |
159 | | |
160 | | protected: |
161 | | void _add_block_task(std::shared_ptr<PipelineTask> task); |
162 | | |
163 | | const int _id; |
164 | | const int _node_id; |
165 | | const std::string _name; |
166 | | std::atomic<bool> _ready; |
167 | | |
168 | | BasicSharedState* _shared_state = nullptr; |
169 | | MonotonicStopWatch _watcher; |
170 | | |
171 | | std::mutex _task_lock; |
172 | | std::vector<std::weak_ptr<PipelineTask>> _blocked_task; |
173 | | |
174 | | // If `_always_ready` is true, `block()` will never block tasks. |
175 | | std::atomic<bool> _always_ready = false; |
176 | | std::mutex _always_ready_lock; |
177 | | }; |
178 | | |
179 | | struct FakeSharedState final : public BasicSharedState { |
180 | | ENABLE_FACTORY_CREATOR(FakeSharedState) |
181 | | }; |
182 | | |
183 | | class CountedFinishDependency final : public Dependency { |
184 | | public: |
185 | | using SharedState = FakeSharedState; |
186 | | CountedFinishDependency(int id, int node_id, std::string name) |
187 | 96.0k | : Dependency(id, node_id, std::move(name), true) {} |
188 | | |
189 | 8 | void add(uint32_t count = 1) { |
190 | 8 | std::unique_lock<std::mutex> l(_mtx); |
191 | 8 | if (!_counter) { |
192 | 7 | block(); |
193 | 7 | } |
194 | 8 | _counter += count; |
195 | 8 | } |
196 | | |
197 | 7 | void sub() { |
198 | 7 | std::unique_lock<std::mutex> l(_mtx); |
199 | 7 | _counter--; |
200 | 7 | if (!_counter) { |
201 | 6 | set_ready(); |
202 | 6 | } |
203 | 7 | } |
204 | | |
205 | | std::string debug_string(int indentation_level = 0) override; |
206 | | |
207 | | private: |
208 | | std::mutex _mtx; |
209 | | uint32_t _counter = 0; |
210 | | }; |
211 | | |
212 | | struct RuntimeFilterTimerQueue; |
213 | | class RuntimeFilterTimer { |
214 | | public: |
215 | | RuntimeFilterTimer(int64_t registration_time, int32_t wait_time_ms, |
216 | | std::shared_ptr<Dependency> parent, bool force_wait_timeout = false) |
217 | 2 | : _parent(std::move(parent)), |
218 | 2 | _registration_time(registration_time), |
219 | 2 | _wait_time_ms(wait_time_ms), |
220 | 2 | _force_wait_timeout(force_wait_timeout) {} |
221 | | |
222 | | // Called by runtime filter producer. |
223 | | void call_ready(); |
224 | | |
225 | | // Called by RuntimeFilterTimerQueue which is responsible for checking if this rf is timeout. |
226 | | void call_timeout(); |
227 | | |
228 | 2 | int64_t registration_time() const { return _registration_time; } |
229 | 2 | int32_t wait_time_ms() const { return _wait_time_ms; } |
230 | | |
231 | | void set_local_runtime_filter_dependencies( |
232 | 0 | const std::vector<std::shared_ptr<Dependency>>& deps) { |
233 | 0 | _local_runtime_filter_dependencies = deps; |
234 | 0 | } |
235 | | |
236 | | bool should_be_check_timeout(); |
237 | | |
238 | 2 | bool force_wait_timeout() { return _force_wait_timeout; } |
239 | | |
240 | | private: |
241 | | friend struct RuntimeFilterTimerQueue; |
242 | | std::shared_ptr<Dependency> _parent = nullptr; |
243 | | std::vector<std::shared_ptr<Dependency>> _local_runtime_filter_dependencies; |
244 | | std::mutex _lock; |
245 | | int64_t _registration_time; |
246 | | const int32_t _wait_time_ms; |
247 | | // true only for group_commit_scan_operator |
248 | | bool _force_wait_timeout; |
249 | | }; |
250 | | |
251 | | struct RuntimeFilterTimerQueue { |
252 | | constexpr static int64_t interval = 10; |
253 | 1 | void run() { _thread.detach(); } |
254 | | void start(); |
255 | | |
256 | 0 | void stop() { |
257 | 0 | _stop = true; |
258 | 0 | cv.notify_all(); |
259 | 0 | wait_for_shutdown(); |
260 | 0 | } |
261 | | |
262 | 0 | void wait_for_shutdown() const { |
263 | 0 | while (!_shutdown) { |
264 | 0 | std::this_thread::sleep_for(std::chrono::milliseconds(interval)); |
265 | 0 | } |
266 | 0 | } |
267 | | |
268 | 0 | ~RuntimeFilterTimerQueue() = default; |
269 | 1 | RuntimeFilterTimerQueue() { _thread = std::thread(&RuntimeFilterTimerQueue::start, this); } |
270 | 1 | void push_filter_timer(std::vector<std::shared_ptr<RuntimeFilterTimer>>&& filter) { |
271 | 1 | std::unique_lock<std::mutex> lc(_que_lock); |
272 | 1 | _que.insert(_que.end(), filter.begin(), filter.end()); |
273 | 1 | cv.notify_all(); |
274 | 1 | } |
275 | | |
276 | | std::thread _thread; |
277 | | std::condition_variable cv; |
278 | | std::mutex cv_m; |
279 | | std::mutex _que_lock; |
280 | | std::atomic_bool _stop = false; |
281 | | std::atomic_bool _shutdown = false; |
282 | | std::list<std::shared_ptr<RuntimeFilterTimer>> _que; |
283 | | }; |
284 | | |
285 | | struct AggSharedState : public BasicSharedState { |
286 | | ENABLE_FACTORY_CREATOR(AggSharedState) |
287 | | public: |
288 | 51 | AggSharedState() = default; |
289 | 51 | ~AggSharedState() override { |
290 | | // Explicitly close context before destruction. close() is virtual and must be |
291 | | // called while the derived object (e.g. InlineCountAggContext) is still alive, |
292 | | // not from the base class destructor where vtable has already reverted. |
293 | | // close() is idempotent: GroupByAggContext::close sets mapped=nullptr after destroy; |
294 | | // UngroupByAggContext::close has _agg_state_created guard. |
295 | 51 | if (agg_ctx) { |
296 | 50 | agg_ctx->close(); |
297 | 50 | agg_ctx.reset(); |
298 | 50 | } |
299 | 51 | } |
300 | | |
301 | | std::unique_ptr<AggContext> agg_ctx; |
302 | | |
303 | | // Kept in AggSharedState (used by Source operators for output key conversion). |
304 | | std::vector<size_t> make_nullable_keys; |
305 | | |
306 | | // Spill support (set by Sink operator during open). |
307 | | bool enable_spill = false; |
308 | | }; |
309 | | |
310 | | struct PartitionedAggSharedState : public BasicSharedState, |
311 | | public std::enable_shared_from_this<PartitionedAggSharedState> { |
312 | | ENABLE_FACTORY_CREATOR(PartitionedAggSharedState) |
313 | | |
314 | 39 | PartitionedAggSharedState() = default; |
315 | 39 | ~PartitionedAggSharedState() override = default; |
316 | | |
317 | | void close(); |
318 | | |
319 | | AggSharedState* _in_mem_shared_state = nullptr; |
320 | | std::shared_ptr<BasicSharedState> _in_mem_shared_state_sptr; |
321 | | |
322 | | // partition count is no longer stored in shared state; operators maintain their own |
323 | | std::atomic<bool> _is_spilled = false; |
324 | | std::deque<SpillFileSPtr> _spill_partitions; |
325 | | }; |
326 | | |
327 | | struct SortSharedState : public BasicSharedState { |
328 | | ENABLE_FACTORY_CREATOR(SortSharedState) |
329 | | public: |
330 | | std::shared_ptr<Sorter> sorter; |
331 | | }; |
332 | | |
333 | | struct SpillSortSharedState : public BasicSharedState, |
334 | | public std::enable_shared_from_this<SpillSortSharedState> { |
335 | | ENABLE_FACTORY_CREATOR(SpillSortSharedState) |
336 | | |
337 | 19 | SpillSortSharedState() = default; |
338 | 19 | ~SpillSortSharedState() override = default; |
339 | | |
340 | 11 | void update_spill_block_batch_row_count(RuntimeState* state, const Block* block) { |
341 | 11 | auto rows = block->rows(); |
342 | 11 | if (rows > 0 && 0 == avg_row_bytes) { |
343 | 7 | avg_row_bytes = std::max((std::size_t)1, block->bytes() / rows); |
344 | 7 | spill_block_batch_row_count = |
345 | 7 | (state->spill_buffer_size_bytes() + avg_row_bytes - 1) / avg_row_bytes; |
346 | 7 | LOG(INFO) << "spill sort block batch row count: " << spill_block_batch_row_count; |
347 | 7 | } |
348 | 11 | } |
349 | | |
350 | | void close(); |
351 | | |
352 | | SortSharedState* in_mem_shared_state = nullptr; |
353 | | bool enable_spill = false; |
354 | | bool is_spilled = false; |
355 | | int64_t limit = -1; |
356 | | int64_t offset = 0; |
357 | | std::atomic_bool is_closed = false; |
358 | | std::shared_ptr<BasicSharedState> in_mem_shared_state_sptr; |
359 | | |
360 | | std::deque<SpillFileSPtr> sorted_spill_groups; |
361 | | size_t avg_row_bytes = 0; |
362 | | size_t spill_block_batch_row_count; |
363 | | }; |
364 | | |
365 | | struct UnionSharedState : public BasicSharedState { |
366 | | ENABLE_FACTORY_CREATOR(UnionSharedState) |
367 | | |
368 | | public: |
369 | 1 | UnionSharedState(int child_count = 1) : data_queue(child_count), _child_count(child_count) {}; |
370 | 0 | int child_count() const { return _child_count; } |
371 | | DataQueue data_queue; |
372 | | const int _child_count; |
373 | | }; |
374 | | |
375 | | struct DataQueueSharedState : public BasicSharedState { |
376 | | ENABLE_FACTORY_CREATOR(DataQueueSharedState) |
377 | | public: |
378 | | DataQueue data_queue; |
379 | | }; |
380 | | |
381 | | class MultiCastDataStreamer; |
382 | | |
383 | | struct MultiCastSharedState : public BasicSharedState, |
384 | | public std::enable_shared_from_this<MultiCastSharedState> { |
385 | | MultiCastSharedState(ObjectPool* pool, int cast_sender_count, int node_id); |
386 | | |
387 | | std::unique_ptr<MultiCastDataStreamer> multi_cast_data_streamer; |
388 | | }; |
389 | | |
390 | | struct AnalyticSharedState : public BasicSharedState { |
391 | | ENABLE_FACTORY_CREATOR(AnalyticSharedState) |
392 | | |
393 | | public: |
394 | 9 | AnalyticSharedState() = default; |
395 | | std::queue<Block> blocks_buffer; |
396 | | std::mutex buffer_mutex; |
397 | | bool sink_eos = false; |
398 | | std::mutex sink_eos_lock; |
399 | | Arena agg_arena_pool; |
400 | | }; |
401 | | |
402 | | struct JoinSharedState : public BasicSharedState { |
403 | | // For some join case, we can apply a short circuit strategy |
404 | | // 1. _has_null_in_build_side = true |
405 | | // 2. build side rows is empty, Join op is: inner join/right outer join/left semi/right semi/right anti |
406 | | bool _has_null_in_build_side = false; |
407 | | bool short_circuit_for_probe = false; |
408 | | // for some join, when build side rows is empty, we could return directly by add some additional null data in probe table. |
409 | | bool empty_right_table_need_probe_dispose = false; |
410 | | JoinOpVariants join_op_variants; |
411 | | }; |
412 | | |
413 | | struct HashJoinSharedState : public JoinSharedState { |
414 | | ENABLE_FACTORY_CREATOR(HashJoinSharedState) |
415 | 72.1k | HashJoinSharedState() { |
416 | 72.1k | hash_table_variant_vector.push_back(std::make_shared<JoinDataVariants>()); |
417 | 72.1k | } |
418 | 1 | HashJoinSharedState(int num_instances) { |
419 | 1 | source_deps.resize(num_instances, nullptr); |
420 | 1 | hash_table_variant_vector.resize(num_instances, nullptr); |
421 | 9 | for (int i = 0; i < num_instances; i++) { |
422 | 8 | hash_table_variant_vector[i] = std::make_shared<JoinDataVariants>(); |
423 | 8 | } |
424 | 1 | } |
425 | | std::shared_ptr<Arena> arena = std::make_shared<Arena>(); |
426 | | |
427 | | const std::vector<TupleDescriptor*> build_side_child_desc; |
428 | | size_t build_exprs_size = 0; |
429 | | std::shared_ptr<Block> build_block; |
430 | | std::shared_ptr<std::vector<uint32_t>> build_indexes_null; |
431 | | |
432 | | // Used by shared hash table |
433 | | // For probe operator, hash table in _hash_table_variants is read-only if visited flags is not |
434 | | // used. (visited flags will be used only in right / full outer join). |
435 | | // |
436 | | // For broadcast join, although hash table is read-only, some states in `_hash_table_variants` |
437 | | // are still could be written. For example, serialized keys will be written in a continuous |
438 | | // memory in `_hash_table_variants`. So before execution, we should use a local _hash_table_variants |
439 | | // which has a shared hash table in it. |
440 | | std::vector<std::shared_ptr<JoinDataVariants>> hash_table_variant_vector; |
441 | | |
442 | | // whether left semi join could directly return |
443 | | // if runtime filters contains local in filter, we can make sure all input rows are matched |
444 | | // local filter will always be applied, and in filter could guarantee precise filtering |
445 | | // ATTN: we should disable always_true logic for in filter when we set this flag |
446 | | bool left_semi_direct_return = false; |
447 | | |
448 | | // ASOF JOIN specific fields |
449 | | // Whether the inequality is >= or > (true) vs <= or < (false) |
450 | | bool asof_inequality_is_greater = true; |
451 | | // Whether the inequality is strict (> or <) vs non-strict (>= or <=) |
452 | | bool asof_inequality_is_strict = false; |
453 | | |
454 | | // ASOF JOIN pre-sorted index with inline values for O(log K) branchless lookup |
455 | | // Typed AsofIndexGroups stored in a variant (uint32_t for DateV2, uint64_t for DateTimeV2/TimestampTZ) |
456 | | AsofIndexVariant asof_index_groups; |
457 | | // build_row_index -> bucket_id for O(1) reverse lookup |
458 | | std::vector<uint32_t> asof_build_row_to_bucket; |
459 | | }; |
460 | | |
461 | | struct PartitionedHashJoinSharedState |
462 | | : public HashJoinSharedState, |
463 | | public std::enable_shared_from_this<PartitionedHashJoinSharedState> { |
464 | | ENABLE_FACTORY_CREATOR(PartitionedHashJoinSharedState) |
465 | | |
466 | | std::unique_ptr<RuntimeState> _inner_runtime_state; |
467 | | std::shared_ptr<HashJoinSharedState> _inner_shared_state; |
468 | | std::vector<std::unique_ptr<MutableBlock>> _partitioned_build_blocks; |
469 | | std::vector<SpillFileSPtr> _spilled_build_groups; |
470 | | std::atomic<bool> _is_spilled = false; |
471 | | }; |
472 | | |
473 | | struct NestedLoopJoinSharedState : public JoinSharedState { |
474 | | ENABLE_FACTORY_CREATOR(NestedLoopJoinSharedState) |
475 | | // if true, probe child has no more rows to process |
476 | | bool probe_side_eos = false; |
477 | | // Visited flags for each row in build side. |
478 | | MutableColumns build_side_visited_flags; |
479 | | // List of build blocks, constructed in prepare() |
480 | | Blocks build_blocks; |
481 | | }; |
482 | | |
483 | | struct PartitionSortNodeSharedState : public BasicSharedState { |
484 | | ENABLE_FACTORY_CREATOR(PartitionSortNodeSharedState) |
485 | | public: |
486 | | std::queue<Block> blocks_buffer; |
487 | | std::mutex buffer_mutex; |
488 | | std::vector<std::unique_ptr<PartitionSorter>> partition_sorts; |
489 | | bool sink_eos = false; |
490 | | std::mutex sink_eos_lock; |
491 | | std::mutex prepared_finish_lock; |
492 | | }; |
493 | | |
494 | | struct SetSharedState : public BasicSharedState { |
495 | | ENABLE_FACTORY_CREATOR(SetSharedState) |
496 | | public: |
497 | | /// default init |
498 | | Block build_block; // build to source |
499 | | //record element size in hashtable |
500 | | int64_t valid_element_in_hash_tbl = 0; |
501 | | //first: idx mapped to column types |
502 | | //second: column_id, could point to origin column or cast column |
503 | | std::unordered_map<int, int> build_col_idx; |
504 | | |
505 | | //// shared static states (shared, decided in prepare/open...) |
506 | | |
507 | | /// init in setup_local_state |
508 | | std::unique_ptr<SetDataVariants> hash_table_variants = |
509 | | std::make_unique<SetDataVariants>(); // the real data HERE. |
510 | | std::vector<bool> build_not_ignore_null; |
511 | | |
512 | | // The SET operator's child might have different nullable attributes. |
513 | | // If a calculation involves both nullable and non-nullable columns, the final output should be a nullable column |
514 | | Status update_build_not_ignore_null(const VExprContextSPtrs& ctxs); |
515 | | |
516 | | size_t get_hash_table_size() const; |
517 | | /// init in both upstream side. |
518 | | //The i-th result expr list refers to the i-th child. |
519 | | std::vector<VExprContextSPtrs> child_exprs_lists; |
520 | | |
521 | | /// init in build side |
522 | | size_t child_quantity; |
523 | | VExprContextSPtrs build_child_exprs; |
524 | | std::vector<Dependency*> probe_finished_children_dependency; |
525 | | |
526 | | /// init in probe side |
527 | | std::vector<VExprContextSPtrs> probe_child_exprs_lists; |
528 | | |
529 | | std::atomic<bool> ready_for_read = false; |
530 | | |
531 | | Arena arena; |
532 | | |
533 | | /// called in setup_local_state |
534 | | Status hash_table_init(); |
535 | | }; |
536 | | |
537 | | enum class ExchangeType : uint8_t { |
538 | | NOOP = 0, |
539 | | // Shuffle data by Crc32CHashPartitioner |
540 | | HASH_SHUFFLE = 1, |
541 | | // Round-robin passthrough data blocks. |
542 | | PASSTHROUGH = 2, |
543 | | // Shuffle data by Crc32HashPartitioner<ShuffleChannelIds> (e.g. same as storage engine). |
544 | | BUCKET_HASH_SHUFFLE = 3, |
545 | | // Passthrough data blocks to all channels. |
546 | | BROADCAST = 4, |
547 | | // Passthrough data to channels evenly in an adaptive way. |
548 | | ADAPTIVE_PASSTHROUGH = 5, |
549 | | // Send all data to the first channel. |
550 | | PASS_TO_ONE = 6, |
551 | | }; |
552 | | |
553 | 63 | inline std::string get_exchange_type_name(ExchangeType idx) { |
554 | 63 | switch (idx) { |
555 | 14 | case ExchangeType::NOOP: |
556 | 14 | return "NOOP"; |
557 | 49 | case ExchangeType::HASH_SHUFFLE: |
558 | 49 | return "HASH_SHUFFLE"; |
559 | 0 | case ExchangeType::PASSTHROUGH: |
560 | 0 | return "PASSTHROUGH"; |
561 | 0 | case ExchangeType::BUCKET_HASH_SHUFFLE: |
562 | 0 | return "BUCKET_HASH_SHUFFLE"; |
563 | 0 | case ExchangeType::BROADCAST: |
564 | 0 | return "BROADCAST"; |
565 | 0 | case ExchangeType::ADAPTIVE_PASSTHROUGH: |
566 | 0 | return "ADAPTIVE_PASSTHROUGH"; |
567 | 0 | case ExchangeType::PASS_TO_ONE: |
568 | 0 | return "PASS_TO_ONE"; |
569 | 63 | } |
570 | 0 | throw Exception(Status::FatalError("__builtin_unreachable")); |
571 | 63 | } |
572 | | |
573 | | struct DataDistribution { |
574 | 144k | DataDistribution(ExchangeType type) : distribution_type(type) {} |
575 | | DataDistribution(ExchangeType type, const std::vector<TExpr>& partition_exprs_) |
576 | 52 | : distribution_type(type), partition_exprs(partition_exprs_) {} |
577 | 0 | DataDistribution(const DataDistribution& other) = default; |
578 | 5 | bool need_local_exchange() const { return distribution_type != ExchangeType::NOOP; } |
579 | 5 | DataDistribution& operator=(const DataDistribution& other) = default; |
580 | | ExchangeType distribution_type; |
581 | | std::vector<TExpr> partition_exprs; |
582 | | }; |
583 | | |
584 | | class ExchangerBase; |
585 | | |
586 | | struct LocalExchangeSharedState : public BasicSharedState { |
587 | | public: |
588 | | ENABLE_FACTORY_CREATOR(LocalExchangeSharedState); |
589 | | LocalExchangeSharedState(int num_instances); |
590 | | ~LocalExchangeSharedState() override; |
591 | | std::unique_ptr<ExchangerBase> exchanger {}; |
592 | | std::vector<RuntimeProfile::Counter*> mem_counters; |
593 | | std::atomic<int64_t> mem_usage = 0; |
594 | | std::atomic<size_t> _buffer_mem_limit = config::local_exchange_buffer_mem_limit; |
595 | | // We need to make sure to add mem_usage first and then enqueue, otherwise sub mem_usage may cause negative mem_usage during concurrent dequeue. |
596 | | std::mutex le_lock; |
597 | | void sub_running_sink_operators(); |
598 | | void sub_running_source_operators(); |
599 | 10 | void _set_always_ready() { |
600 | 40 | for (auto& dep : source_deps) { |
601 | 40 | DCHECK(dep); |
602 | 40 | dep->set_always_ready(); |
603 | 40 | } |
604 | 10 | for (auto& dep : sink_deps) { |
605 | 10 | DCHECK(dep); |
606 | 10 | dep->set_always_ready(); |
607 | 10 | } |
608 | 10 | } |
609 | | |
610 | 0 | Dependency* get_sink_dep_by_channel_id(int channel_id) { return nullptr; } |
611 | | |
612 | 129 | void set_ready_to_read(int channel_id) { |
613 | 129 | auto& dep = source_deps[channel_id]; |
614 | 129 | DCHECK(dep) << channel_id; |
615 | 129 | dep->set_ready(); |
616 | 129 | } |
617 | | |
618 | 161 | void add_mem_usage(int channel_id, size_t delta) { mem_counters[channel_id]->update(delta); } |
619 | | |
620 | 125 | void sub_mem_usage(int channel_id, size_t delta) { |
621 | 125 | mem_counters[channel_id]->update(-(int64_t)delta); |
622 | 125 | } |
623 | | |
624 | 114 | void add_total_mem_usage(size_t delta) { |
625 | 114 | if (cast_set<int64_t>(mem_usage.fetch_add(delta) + delta) > _buffer_mem_limit) { |
626 | 15 | sink_deps.front()->block(); |
627 | 15 | } |
628 | 114 | } |
629 | | |
630 | 114 | void sub_total_mem_usage(size_t delta) { |
631 | 114 | auto prev_usage = mem_usage.fetch_sub(delta); |
632 | 114 | DCHECK_GE(prev_usage - delta, 0) << "prev_usage: " << prev_usage << " delta: " << delta; |
633 | 114 | if (cast_set<int64_t>(prev_usage - delta) <= _buffer_mem_limit) { |
634 | 102 | sink_deps.front()->set_ready(); |
635 | 102 | } |
636 | 114 | } |
637 | | |
638 | 0 | void set_low_memory_mode(RuntimeState* state) { |
639 | 0 | _buffer_mem_limit = std::min<int64_t>(config::local_exchange_buffer_mem_limit, |
640 | 0 | state->low_memory_mode_buffer_limit()); |
641 | 0 | } |
642 | | }; |
643 | | |
644 | | #include "common/compile_check_end.h" |
645 | | } // namespace doris |