Coverage Report

Created: 2024-11-22 21:49

/root/doris/be/src/pipeline/dependency.h
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <concurrentqueue.h>
21
#include <sqltypes.h>
22
23
#include <atomic>
24
#include <functional>
25
#include <memory>
26
#include <mutex>
27
#include <thread>
28
#include <utility>
29
30
#include "common/logging.h"
31
#include "gutil/integral_types.h"
32
#include "pipeline/common/agg_utils.h"
33
#include "pipeline/common/join_utils.h"
34
#include "pipeline/common/set_utils.h"
35
#include "pipeline/exec/data_queue.h"
36
#include "pipeline/exec/join/process_hash_table_probe.h"
37
#include "vec/common/sort/partition_sorter.h"
38
#include "vec/common/sort/sorter.h"
39
#include "vec/core/block.h"
40
#include "vec/core/types.h"
41
#include "vec/spill/spill_stream.h"
42
43
namespace doris::vectorized {
44
class AggFnEvaluator;
45
class VSlotRef;
46
} // namespace doris::vectorized
47
48
namespace doris::pipeline {
49
#include "common/compile_check_begin.h"
50
class Dependency;
51
class PipelineTask;
52
struct BasicSharedState;
53
using DependencySPtr = std::shared_ptr<Dependency>;
54
class LocalExchangeSourceLocalState;
55
56
static constexpr auto SLOW_DEPENDENCY_THRESHOLD = 60 * 1000L * 1000L * 1000L;
57
static constexpr auto TIME_UNIT_DEPENDENCY_LOG = 30 * 1000L * 1000L * 1000L;
58
static_assert(TIME_UNIT_DEPENDENCY_LOG < SLOW_DEPENDENCY_THRESHOLD);
59
60
struct BasicSharedState {
61
    ENABLE_FACTORY_CREATOR(BasicSharedState)
62
63
    template <class TARGET>
64
0
    TARGET* cast() {
65
0
        DCHECK(dynamic_cast<TARGET*>(this))
66
0
                << " Mismatch type! Current type is " << typeid(*this).name()
67
0
                << " and expect type is" << typeid(TARGET).name();
68
0
        return reinterpret_cast<TARGET*>(this);
69
0
    }
Unexecuted instantiation: _ZN5doris8pipeline16BasicSharedState4castINS0_19HashJoinSharedStateEEEPT_v
Unexecuted instantiation: _ZN5doris8pipeline16BasicSharedState4castINS0_30PartitionedHashJoinSharedStateEEEPT_v
Unexecuted instantiation: _ZN5doris8pipeline16BasicSharedState4castINS0_15SortSharedStateEEEPT_v
Unexecuted instantiation: _ZN5doris8pipeline16BasicSharedState4castINS0_20SpillSortSharedStateEEEPT_v
Unexecuted instantiation: _ZN5doris8pipeline16BasicSharedState4castINS0_25NestedLoopJoinSharedStateEEEPT_v
Unexecuted instantiation: _ZN5doris8pipeline16BasicSharedState4castINS0_19AnalyticSharedStateEEEPT_v
Unexecuted instantiation: _ZN5doris8pipeline16BasicSharedState4castINS0_14AggSharedStateEEEPT_v
Unexecuted instantiation: _ZN5doris8pipeline16BasicSharedState4castINS0_25PartitionedAggSharedStateEEEPT_v
Unexecuted instantiation: _ZN5doris8pipeline16BasicSharedState4castINS0_16UnionSharedStateEEEPT_v
Unexecuted instantiation: _ZN5doris8pipeline16BasicSharedState4castINS0_28PartitionSortNodeSharedStateEEEPT_v
Unexecuted instantiation: _ZN5doris8pipeline16BasicSharedState4castINS0_20MultiCastSharedStateEEEPT_v
Unexecuted instantiation: _ZN5doris8pipeline16BasicSharedState4castINS0_14SetSharedStateEEEPT_v
Unexecuted instantiation: _ZN5doris8pipeline16BasicSharedState4castINS0_24LocalExchangeSharedStateEEEPT_v
Unexecuted instantiation: _ZN5doris8pipeline16BasicSharedState4castIS1_EEPT_v
Unexecuted instantiation: _ZN5doris8pipeline16BasicSharedState4castINS0_16CacheSharedStateEEEPT_v
70
    template <class TARGET>
71
    const TARGET* cast() const {
72
        DCHECK(dynamic_cast<const TARGET*>(this))
73
                << " Mismatch type! Current type is " << typeid(*this).name()
74
                << " and expect type is" << typeid(TARGET).name();
75
        return reinterpret_cast<const TARGET*>(this);
76
    }
77
    std::vector<DependencySPtr> source_deps;
78
    std::vector<DependencySPtr> sink_deps;
79
    int id = 0;
80
    std::set<int> related_op_ids;
81
82
0
    virtual ~BasicSharedState() = default;
83
84
    Dependency* create_source_dependency(int operator_id, int node_id, const std::string& name);
85
86
    Dependency* create_sink_dependency(int dest_id, int node_id, const std::string& name);
87
};
88
89
class Dependency : public std::enable_shared_from_this<Dependency> {
90
public:
91
    ENABLE_FACTORY_CREATOR(Dependency);
92
    Dependency(int id, int node_id, std::string name, bool ready = false)
93
3
            : _id(id), _node_id(node_id), _name(std::move(name)), _ready(ready) {}
94
3
    virtual ~Dependency() = default;
95
96
0
    [[nodiscard]] int id() const { return _id; }
97
3
    [[nodiscard]] virtual std::string name() const { return _name; }
98
0
    BasicSharedState* shared_state() { return _shared_state; }
99
0
    void set_shared_state(BasicSharedState* shared_state) { _shared_state = shared_state; }
100
    virtual std::string debug_string(int indentation_level = 0);
101
0
    bool ready() const { return _ready; }
102
103
    // Start the watcher. We use it to count how long this dependency block the current pipeline task.
104
0
    void start_watcher() { _watcher.start(); }
105
6
    [[nodiscard]] int64_t watcher_elapse_time() { return _watcher.elapsed_time(); }
106
107
    // Which dependency current pipeline task is blocked by. `nullptr` if this dependency is ready.
108
    [[nodiscard]] virtual Dependency* is_blocked_by(PipelineTask* task = nullptr);
109
    // Notify downstream pipeline tasks this dependency is ready.
110
    void set_ready();
111
0
    void set_ready_to_read() {
112
0
        DCHECK_EQ(_shared_state->source_deps.size(), 1) << debug_string();
113
0
        _shared_state->source_deps.front()->set_ready();
114
0
    }
115
0
    void set_block_to_read() {
116
0
        DCHECK_EQ(_shared_state->source_deps.size(), 1) << debug_string();
117
0
        _shared_state->source_deps.front()->block();
118
0
    }
119
0
    void set_ready_to_write() {
120
0
        DCHECK_EQ(_shared_state->sink_deps.size(), 1) << debug_string();
121
0
        _shared_state->sink_deps.front()->set_ready();
122
0
    }
123
0
    void set_block_to_write() {
124
0
        DCHECK_EQ(_shared_state->sink_deps.size(), 1) << debug_string();
125
0
        _shared_state->sink_deps.front()->block();
126
0
    }
127
128
    // Notify downstream pipeline tasks this dependency is blocked.
129
0
    void block() {
130
0
        if (_always_ready) {
131
0
            return;
132
0
        }
133
0
        std::unique_lock<std::mutex> lc(_always_ready_lock);
134
0
        if (_always_ready) {
135
0
            return;
136
0
        }
137
0
        _ready = false;
138
0
    }
139
140
0
    void set_always_ready() {
141
0
        if (_always_ready) {
142
0
            return;
143
0
        }
144
0
        std::unique_lock<std::mutex> lc(_always_ready_lock);
145
0
        if (_always_ready) {
146
0
            return;
147
0
        }
148
0
        _always_ready = true;
149
0
        set_ready();
150
0
    }
151
152
protected:
153
    void _add_block_task(PipelineTask* task);
154
155
    const int _id;
156
    const int _node_id;
157
    const std::string _name;
158
    std::atomic<bool> _ready;
159
160
    BasicSharedState* _shared_state = nullptr;
161
    MonotonicStopWatch _watcher;
162
163
    std::mutex _task_lock;
164
    std::vector<PipelineTask*> _blocked_task;
165
166
    // If `_always_ready` is true, `block()` will never block tasks.
167
    std::atomic<bool> _always_ready = false;
168
    std::mutex _always_ready_lock;
169
};
170
171
struct FakeSharedState final : public BasicSharedState {
172
    ENABLE_FACTORY_CREATOR(FakeSharedState)
173
};
174
175
class CountedFinishDependency final : public Dependency {
176
public:
177
    using SharedState = FakeSharedState;
178
    CountedFinishDependency(int id, int node_id, std::string name)
179
0
            : Dependency(id, node_id, name, true) {}
180
181
0
    void add() {
182
0
        std::unique_lock<std::mutex> l(_mtx);
183
0
        if (!_counter) {
184
0
            block();
185
0
        }
186
0
        _counter++;
187
0
    }
188
189
0
    void sub() {
190
0
        std::unique_lock<std::mutex> l(_mtx);
191
0
        _counter--;
192
0
        if (!_counter) {
193
0
            set_ready();
194
0
        }
195
0
    }
196
197
    std::string debug_string(int indentation_level = 0) override;
198
199
private:
200
    std::mutex _mtx;
201
    uint32_t _counter = 0;
202
};
203
204
class RuntimeFilterDependency;
205
struct RuntimeFilterTimerQueue;
206
class RuntimeFilterTimer {
207
public:
208
    RuntimeFilterTimer(int64_t registration_time, int32_t wait_time_ms,
209
                       std::shared_ptr<RuntimeFilterDependency> parent)
210
            : _parent(std::move(parent)),
211
              _registration_time(registration_time),
212
0
              _wait_time_ms(wait_time_ms) {}
213
214
    // Called by runtime filter producer.
215
    void call_ready();
216
217
    // Called by RuntimeFilterTimerQueue which is responsible for checking if this rf is timeout.
218
    void call_timeout();
219
220
0
    int64_t registration_time() const { return _registration_time; }
221
0
    int32_t wait_time_ms() const { return _wait_time_ms; }
222
223
    void set_local_runtime_filter_dependencies(
224
0
            const std::vector<std::shared_ptr<RuntimeFilterDependency>>& deps) {
225
0
        _local_runtime_filter_dependencies = deps;
226
0
    }
227
228
    bool should_be_check_timeout();
229
230
private:
231
    friend struct RuntimeFilterTimerQueue;
232
    std::shared_ptr<RuntimeFilterDependency> _parent = nullptr;
233
    std::vector<std::shared_ptr<RuntimeFilterDependency>> _local_runtime_filter_dependencies;
234
    std::mutex _lock;
235
    int64_t _registration_time;
236
    const int32_t _wait_time_ms;
237
};
238
239
struct RuntimeFilterTimerQueue {
240
    constexpr static int64_t interval = 10;
241
0
    void run() { _thread.detach(); }
242
    void start();
243
244
0
    void stop() {
245
0
        _stop = true;
246
0
        cv.notify_all();
247
0
        wait_for_shutdown();
248
0
    }
249
250
0
    void wait_for_shutdown() const {
251
0
        while (!_shutdown) {
252
0
            std::this_thread::sleep_for(std::chrono::milliseconds(interval));
253
0
        }
254
0
    }
255
256
0
    ~RuntimeFilterTimerQueue() = default;
257
0
    RuntimeFilterTimerQueue() { _thread = std::thread(&RuntimeFilterTimerQueue::start, this); }
258
0
    void push_filter_timer(std::vector<std::shared_ptr<pipeline::RuntimeFilterTimer>>&& filter) {
259
0
        std::unique_lock<std::mutex> lc(_que_lock);
260
0
        _que.insert(_que.end(), filter.begin(), filter.end());
261
0
        cv.notify_all();
262
0
    }
263
264
    std::thread _thread;
265
    std::condition_variable cv;
266
    std::mutex cv_m;
267
    std::mutex _que_lock;
268
    std::atomic_bool _stop = false;
269
    std::atomic_bool _shutdown = false;
270
    std::list<std::shared_ptr<pipeline::RuntimeFilterTimer>> _que;
271
};
272
273
class RuntimeFilterDependency final : public Dependency {
274
public:
275
    RuntimeFilterDependency(int id, int node_id, std::string name, IRuntimeFilter* runtime_filter)
276
0
            : Dependency(id, node_id, name), _runtime_filter(runtime_filter) {}
277
    std::string debug_string(int indentation_level = 0) override;
278
279
private:
280
    const IRuntimeFilter* _runtime_filter = nullptr;
281
};
282
283
struct AggSharedState : public BasicSharedState {
284
    ENABLE_FACTORY_CREATOR(AggSharedState)
285
public:
286
0
    AggSharedState() {
287
0
        agg_data = std::make_unique<AggregatedDataVariants>();
288
0
        agg_arena_pool = std::make_unique<vectorized::Arena>();
289
0
    }
290
0
    ~AggSharedState() override {
291
0
        if (!probe_expr_ctxs.empty()) {
292
0
            _close_with_serialized_key();
293
0
        } else {
294
0
            _close_without_key();
295
0
        }
296
0
    }
297
298
    Status reset_hash_table();
299
300
    bool do_limit_filter(vectorized::Block* block, size_t num_rows,
301
                         const std::vector<int>* key_locs = nullptr);
302
    void build_limit_heap(size_t hash_table_size);
303
304
    // We should call this function only at 1st phase.
305
    // 1st phase: is_merge=true, only have one SlotRef.
306
    // 2nd phase: is_merge=false, maybe have multiple exprs.
307
    static int get_slot_column_id(const vectorized::AggFnEvaluator* evaluator);
308
309
    AggregatedDataVariantsUPtr agg_data = nullptr;
310
    std::unique_ptr<AggregateDataContainer> aggregate_data_container;
311
    ArenaUPtr agg_arena_pool;
312
    std::vector<vectorized::AggFnEvaluator*> aggregate_evaluators;
313
    // group by k1,k2
314
    vectorized::VExprContextSPtrs probe_expr_ctxs;
315
    size_t input_num_rows = 0;
316
    std::vector<vectorized::AggregateDataPtr> values;
317
    /// The total size of the row from the aggregate functions.
318
    size_t total_size_of_aggregate_states = 0;
319
    size_t align_aggregate_states = 1;
320
    /// The offset to the n-th aggregate function in a row of aggregate functions.
321
    vectorized::Sizes offsets_of_aggregate_states;
322
    std::vector<size_t> make_nullable_keys;
323
324
    bool agg_data_created_without_key = false;
325
    bool enable_spill = false;
326
    bool reach_limit = false;
327
328
    int64_t limit = -1;
329
    bool do_sort_limit = false;
330
    vectorized::MutableColumns limit_columns;
331
    int limit_columns_min = -1;
332
    vectorized::PaddedPODArray<uint8_t> need_computes;
333
    std::vector<uint8_t> cmp_res;
334
    std::vector<int> order_directions;
335
    std::vector<int> null_directions;
336
337
    struct HeapLimitCursor {
338
        HeapLimitCursor(int row_id, vectorized::MutableColumns& limit_columns,
339
                        std::vector<int>& order_directions, std::vector<int>& null_directions)
340
                : _row_id(row_id),
341
                  _limit_columns(limit_columns),
342
                  _order_directions(order_directions),
343
0
                  _null_directions(null_directions) {}
344
345
        HeapLimitCursor(const HeapLimitCursor& other) = default;
346
347
        HeapLimitCursor(HeapLimitCursor&& other) noexcept
348
                : _row_id(other._row_id),
349
                  _limit_columns(other._limit_columns),
350
                  _order_directions(other._order_directions),
351
0
                  _null_directions(other._null_directions) {}
352
353
0
        HeapLimitCursor& operator=(const HeapLimitCursor& other) noexcept {
354
0
            _row_id = other._row_id;
355
0
            return *this;
356
0
        }
357
358
0
        HeapLimitCursor& operator=(HeapLimitCursor&& other) noexcept {
359
0
            _row_id = other._row_id;
360
0
            return *this;
361
0
        }
362
363
0
        bool operator<(const HeapLimitCursor& rhs) const {
364
0
            for (int i = 0; i < _limit_columns.size(); ++i) {
365
0
                const auto& _limit_column = _limit_columns[i];
366
0
                auto res = _limit_column->compare_at(_row_id, rhs._row_id, *_limit_column,
367
0
                                                     _null_directions[i]) *
368
0
                           _order_directions[i];
369
0
                if (res < 0) {
370
0
                    return true;
371
0
                } else if (res > 0) {
372
0
                    return false;
373
0
                }
374
0
            }
375
0
            return false;
376
0
        }
377
378
        int _row_id;
379
        vectorized::MutableColumns& _limit_columns;
380
        std::vector<int>& _order_directions;
381
        std::vector<int>& _null_directions;
382
    };
383
384
    std::priority_queue<HeapLimitCursor> limit_heap;
385
386
private:
387
    vectorized::MutableColumns _get_keys_hash_table();
388
389
0
    void _close_with_serialized_key() {
390
0
        std::visit(vectorized::Overload {[&](std::monostate& arg) -> void {
391
                                             // Do nothing
392
0
                                         },
393
0
                                         [&](auto& agg_method) -> void {
394
0
                                             auto& data = *agg_method.hash_table;
395
0
                                             data.for_each_mapped([&](auto& mapped) {
396
0
                                                 if (mapped) {
397
0
                                                     static_cast<void>(_destroy_agg_status(mapped));
398
0
                                                     mapped = nullptr;
399
0
                                                 }
400
0
                                             });
Unexecuted instantiation: _ZZZN5doris8pipeline14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_10vectorized16MethodSerializedI9PHHashMapINS_9StringRefEPc11DefaultHashIS9_vEEEEEEvS3_ENKUlS3_E_clISA_EEDaS3_
Unexecuted instantiation: _ZZZN5doris8pipeline14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_10vectorized15MethodOneNumberIh9PHHashMapIhPc9HashCRC32IhEEEEEEvS3_ENKUlS3_E_clIS9_EEDaS3_
Unexecuted instantiation: _ZZZN5doris8pipeline14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_10vectorized15MethodOneNumberIt9PHHashMapItPc9HashCRC32ItEEEEEEvS3_ENKUlS3_E_clIS9_EEDaS3_
Unexecuted instantiation: _ZZZN5doris8pipeline14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_10vectorized15MethodOneNumberIj9PHHashMapIjPc9HashCRC32IjEEEEEEvS3_ENKUlS3_E_clIS9_EEDaS3_
Unexecuted instantiation: _ZZZN5doris8pipeline14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_10vectorized15MethodOneNumberIm9PHHashMapImPc9HashCRC32ImEEEEEEvS3_ENKUlS3_E_clIS9_EEDaS3_
Unexecuted instantiation: _ZZZN5doris8pipeline14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_10vectorized19MethodStringNoCacheINS_13StringHashMapIPc9AllocatorILb1ELb1ELb0E22DefaultMemoryAllocatorEEEEEEEvS3_ENKUlS3_E_clIS9_EEDaS3_
Unexecuted instantiation: _ZZZN5doris8pipeline14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_10vectorized15MethodOneNumberIN4wide7integerILm128EjEE9PHHashMapISA_Pc9HashCRC32ISA_EEEEEEvS3_ENKUlS3_E_clISC_EEDaS3_
Unexecuted instantiation: _ZZZN5doris8pipeline14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_10vectorized15MethodOneNumberIN4wide7integerILm256EjEE9PHHashMapISA_Pc9HashCRC32ISA_EEEEEEvS3_ENKUlS3_E_clISC_EEDaS3_
Unexecuted instantiation: _ZZZN5doris8pipeline14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_10vectorized15MethodOneNumberIj9PHHashMapIjPc14HashMixWrapperIj9HashCRC32IjEEEEEEEvS3_ENKUlS3_E_clIS9_EEDaS3_
Unexecuted instantiation: _ZZZN5doris8pipeline14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_10vectorized15MethodOneNumberIm9PHHashMapImPc14HashMixWrapperIm9HashCRC32ImEEEEEEEvS3_ENKUlS3_E_clIS9_EEDaS3_
Unexecuted instantiation: _ZZZN5doris8pipeline14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_10vectorized26MethodSingleNullableColumnINS6_15MethodOneNumberIhNS6_15DataWithNullKeyI9PHHashMapIhPc9HashCRC32IhEEEEEEEEEEvS3_ENKUlS3_E_clISB_EEDaS3_
Unexecuted instantiation: _ZZZN5doris8pipeline14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_10vectorized26MethodSingleNullableColumnINS6_15MethodOneNumberItNS6_15DataWithNullKeyI9PHHashMapItPc9HashCRC32ItEEEEEEEEEEvS3_ENKUlS3_E_clISB_EEDaS3_
Unexecuted instantiation: _ZZZN5doris8pipeline14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_10vectorized26MethodSingleNullableColumnINS6_15MethodOneNumberIjNS6_15DataWithNullKeyI9PHHashMapIjPc9HashCRC32IjEEEEEEEEEEvS3_ENKUlS3_E_clISB_EEDaS3_
Unexecuted instantiation: _ZZZN5doris8pipeline14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_10vectorized26MethodSingleNullableColumnINS6_15MethodOneNumberImNS6_15DataWithNullKeyI9PHHashMapImPc9HashCRC32ImEEEEEEEEEEvS3_ENKUlS3_E_clISB_EEDaS3_
Unexecuted instantiation: _ZZZN5doris8pipeline14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_10vectorized26MethodSingleNullableColumnINS6_15MethodOneNumberIjNS6_15DataWithNullKeyI9PHHashMapIjPc14HashMixWrapperIj9HashCRC32IjEEEEEEEEEEEvS3_ENKUlS3_E_clISB_EEDaS3_
Unexecuted instantiation: _ZZZN5doris8pipeline14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_10vectorized26MethodSingleNullableColumnINS6_15MethodOneNumberImNS6_15DataWithNullKeyI9PHHashMapImPc14HashMixWrapperIm9HashCRC32ImEEEEEEEEEEEvS3_ENKUlS3_E_clISB_EEDaS3_
Unexecuted instantiation: _ZZZN5doris8pipeline14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_10vectorized26MethodSingleNullableColumnINS6_15MethodOneNumberIN4wide7integerILm128EjEENS6_15DataWithNullKeyI9PHHashMapISB_Pc9HashCRC32ISB_EEEEEEEEEEvS3_ENKUlS3_E_clISE_EEDaS3_
Unexecuted instantiation: _ZZZN5doris8pipeline14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_10vectorized26MethodSingleNullableColumnINS6_15MethodOneNumberIN4wide7integerILm256EjEENS6_15DataWithNullKeyI9PHHashMapISB_Pc9HashCRC32ISB_EEEEEEEEEEvS3_ENKUlS3_E_clISE_EEDaS3_
Unexecuted instantiation: _ZZZN5doris8pipeline14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_10vectorized26MethodSingleNullableColumnINS6_19MethodStringNoCacheINS6_15DataWithNullKeyINS_13StringHashMapIPc9AllocatorILb1ELb1ELb0E22DefaultMemoryAllocatorEEEEEEEEEEEvS3_ENKUlS3_E_clISB_EEDaS3_
Unexecuted instantiation: _ZZZN5doris8pipeline14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_10vectorized15MethodKeysFixedI9PHHashMapImPc9HashCRC32ImEEEEEEvS3_ENKUlS3_E_clIS9_EEDaS3_
Unexecuted instantiation: _ZZZN5doris8pipeline14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_10vectorized15MethodKeysFixedI9PHHashMapIN4wide7integerILm128EjEEPc9HashCRC32ISB_EEEEEEvS3_ENKUlS3_E_clISC_EEDaS3_
Unexecuted instantiation: _ZZZN5doris8pipeline14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_10vectorized15MethodKeysFixedI9PHHashMapIN4wide7integerILm256EjEEPc9HashCRC32ISB_EEEEEEvS3_ENKUlS3_E_clISC_EEDaS3_
Unexecuted instantiation: _ZZZN5doris8pipeline14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_10vectorized15MethodKeysFixedI9PHHashMapINS6_7UInt136EPc9HashCRC32IS9_EEEEEEvS3_ENKUlS3_E_clISA_EEDaS3_
401
0
                                             if (data.has_null_key_data()) {
402
0
                                                 auto st = _destroy_agg_status(
403
0
                                                         data.template get_null_key_data<
404
0
                                                                 vectorized::AggregateDataPtr>());
405
0
                                                 if (!st) {
406
0
                                                     throw Exception(st.code(), st.to_string());
407
0
                                                 }
408
0
                                             }
409
0
                                         }},
Unexecuted instantiation: _ZZN5doris8pipeline14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_10vectorized16MethodSerializedI9PHHashMapINS_9StringRefEPc11DefaultHashIS9_vEEEEEEvS3_
Unexecuted instantiation: _ZZN5doris8pipeline14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_10vectorized15MethodOneNumberIh9PHHashMapIhPc9HashCRC32IhEEEEEEvS3_
Unexecuted instantiation: _ZZN5doris8pipeline14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_10vectorized15MethodOneNumberIt9PHHashMapItPc9HashCRC32ItEEEEEEvS3_
Unexecuted instantiation: _ZZN5doris8pipeline14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_10vectorized15MethodOneNumberIj9PHHashMapIjPc9HashCRC32IjEEEEEEvS3_
Unexecuted instantiation: _ZZN5doris8pipeline14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_10vectorized15MethodOneNumberIm9PHHashMapImPc9HashCRC32ImEEEEEEvS3_
Unexecuted instantiation: _ZZN5doris8pipeline14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_10vectorized19MethodStringNoCacheINS_13StringHashMapIPc9AllocatorILb1ELb1ELb0E22DefaultMemoryAllocatorEEEEEEEvS3_
Unexecuted instantiation: _ZZN5doris8pipeline14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_10vectorized15MethodOneNumberIN4wide7integerILm128EjEE9PHHashMapISA_Pc9HashCRC32ISA_EEEEEEvS3_
Unexecuted instantiation: _ZZN5doris8pipeline14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_10vectorized15MethodOneNumberIN4wide7integerILm256EjEE9PHHashMapISA_Pc9HashCRC32ISA_EEEEEEvS3_
Unexecuted instantiation: _ZZN5doris8pipeline14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_10vectorized15MethodOneNumberIj9PHHashMapIjPc14HashMixWrapperIj9HashCRC32IjEEEEEEEvS3_
Unexecuted instantiation: _ZZN5doris8pipeline14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_10vectorized15MethodOneNumberIm9PHHashMapImPc14HashMixWrapperIm9HashCRC32ImEEEEEEEvS3_
Unexecuted instantiation: _ZZN5doris8pipeline14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_10vectorized26MethodSingleNullableColumnINS6_15MethodOneNumberIhNS6_15DataWithNullKeyI9PHHashMapIhPc9HashCRC32IhEEEEEEEEEEvS3_
Unexecuted instantiation: _ZZN5doris8pipeline14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_10vectorized26MethodSingleNullableColumnINS6_15MethodOneNumberItNS6_15DataWithNullKeyI9PHHashMapItPc9HashCRC32ItEEEEEEEEEEvS3_
Unexecuted instantiation: _ZZN5doris8pipeline14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_10vectorized26MethodSingleNullableColumnINS6_15MethodOneNumberIjNS6_15DataWithNullKeyI9PHHashMapIjPc9HashCRC32IjEEEEEEEEEEvS3_
Unexecuted instantiation: _ZZN5doris8pipeline14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_10vectorized26MethodSingleNullableColumnINS6_15MethodOneNumberImNS6_15DataWithNullKeyI9PHHashMapImPc9HashCRC32ImEEEEEEEEEEvS3_
Unexecuted instantiation: _ZZN5doris8pipeline14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_10vectorized26MethodSingleNullableColumnINS6_15MethodOneNumberIjNS6_15DataWithNullKeyI9PHHashMapIjPc14HashMixWrapperIj9HashCRC32IjEEEEEEEEEEEvS3_
Unexecuted instantiation: _ZZN5doris8pipeline14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_10vectorized26MethodSingleNullableColumnINS6_15MethodOneNumberImNS6_15DataWithNullKeyI9PHHashMapImPc14HashMixWrapperIm9HashCRC32ImEEEEEEEEEEEvS3_
Unexecuted instantiation: _ZZN5doris8pipeline14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_10vectorized26MethodSingleNullableColumnINS6_15MethodOneNumberIN4wide7integerILm128EjEENS6_15DataWithNullKeyI9PHHashMapISB_Pc9HashCRC32ISB_EEEEEEEEEEvS3_
Unexecuted instantiation: _ZZN5doris8pipeline14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_10vectorized26MethodSingleNullableColumnINS6_15MethodOneNumberIN4wide7integerILm256EjEENS6_15DataWithNullKeyI9PHHashMapISB_Pc9HashCRC32ISB_EEEEEEEEEEvS3_
Unexecuted instantiation: _ZZN5doris8pipeline14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_10vectorized26MethodSingleNullableColumnINS6_19MethodStringNoCacheINS6_15DataWithNullKeyINS_13StringHashMapIPc9AllocatorILb1ELb1ELb0E22DefaultMemoryAllocatorEEEEEEEEEEEvS3_
Unexecuted instantiation: _ZZN5doris8pipeline14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_10vectorized15MethodKeysFixedI9PHHashMapImPc9HashCRC32ImEEEEEEvS3_
Unexecuted instantiation: _ZZN5doris8pipeline14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_10vectorized15MethodKeysFixedI9PHHashMapIN4wide7integerILm128EjEEPc9HashCRC32ISB_EEEEEEvS3_
Unexecuted instantiation: _ZZN5doris8pipeline14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_10vectorized15MethodKeysFixedI9PHHashMapIN4wide7integerILm256EjEEPc9HashCRC32ISB_EEEEEEvS3_
Unexecuted instantiation: _ZZN5doris8pipeline14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_10vectorized15MethodKeysFixedI9PHHashMapINS6_7UInt136EPc9HashCRC32IS9_EEEEEEvS3_
410
0
                   agg_data->method_variant);
411
0
    }
412
413
0
    void _close_without_key() {
414
        //because prepare maybe failed, and couldn't create agg data.
415
        //but finally call close to destory agg data, if agg data has bitmapValue
416
        //will be core dump, it's not initialized
417
0
        if (agg_data_created_without_key) {
418
0
            static_cast<void>(_destroy_agg_status(agg_data->without_key));
419
0
            agg_data_created_without_key = false;
420
0
        }
421
0
    }
422
    Status _destroy_agg_status(vectorized::AggregateDataPtr data);
423
};
424
425
struct AggSpillPartition;
426
struct PartitionedAggSharedState : public BasicSharedState,
427
                                   public std::enable_shared_from_this<PartitionedAggSharedState> {
428
    ENABLE_FACTORY_CREATOR(PartitionedAggSharedState)
429
430
0
    PartitionedAggSharedState() = default;
431
0
    ~PartitionedAggSharedState() override = default;
432
433
    void init_spill_params(size_t spill_partition_count_bits);
434
435
    void close();
436
437
    AggSharedState* in_mem_shared_state = nullptr;
438
    std::shared_ptr<BasicSharedState> in_mem_shared_state_sptr;
439
440
    size_t partition_count_bits;
441
    size_t partition_count;
442
    size_t max_partition_index;
443
    Status sink_status;
444
    bool is_spilled = false;
445
    std::atomic_bool is_closed = false;
446
    std::deque<std::shared_ptr<AggSpillPartition>> spill_partitions;
447
448
0
    size_t get_partition_index(size_t hash_value) const {
449
0
        return (hash_value >> (32 - partition_count_bits)) & max_partition_index;
450
0
    }
451
};
452
453
struct AggSpillPartition {
454
    static constexpr int64_t AGG_SPILL_FILE_SIZE = 1024 * 1024 * 1024; // 1G
455
456
0
    AggSpillPartition() = default;
457
458
    void close();
459
460
    Status get_spill_stream(RuntimeState* state, int node_id, RuntimeProfile* profile,
461
                            vectorized::SpillStreamSPtr& spilling_stream);
462
463
0
    Status flush_if_full() {
464
0
        DCHECK(spilling_stream_);
465
0
        Status status;
466
        // avoid small spill files
467
0
        if (spilling_stream_->get_written_bytes() >= AGG_SPILL_FILE_SIZE) {
468
0
            status = spilling_stream_->spill_eof();
469
0
            spilling_stream_.reset();
470
0
        }
471
0
        return status;
472
0
    }
473
474
0
    Status finish_current_spilling(bool eos = false) {
475
0
        if (spilling_stream_) {
476
0
            if (eos || spilling_stream_->get_written_bytes() >= AGG_SPILL_FILE_SIZE) {
477
0
                auto status = spilling_stream_->spill_eof();
478
0
                spilling_stream_.reset();
479
0
                return status;
480
0
            }
481
0
        }
482
0
        return Status::OK();
483
0
    }
484
485
    std::deque<vectorized::SpillStreamSPtr> spill_streams_;
486
    vectorized::SpillStreamSPtr spilling_stream_;
487
};
488
using AggSpillPartitionSPtr = std::shared_ptr<AggSpillPartition>;
489
struct SortSharedState : public BasicSharedState {
490
    ENABLE_FACTORY_CREATOR(SortSharedState)
491
public:
492
    std::unique_ptr<vectorized::Sorter> sorter;
493
};
494
495
struct SpillSortSharedState : public BasicSharedState,
496
                              public std::enable_shared_from_this<SpillSortSharedState> {
497
    ENABLE_FACTORY_CREATOR(SpillSortSharedState)
498
499
0
    SpillSortSharedState() = default;
500
0
    ~SpillSortSharedState() override = default;
501
502
    // This number specifies the maximum size of sub blocks
503
    static constexpr size_t SORT_BLOCK_SPILL_BATCH_BYTES = 8 * 1024 * 1024;
504
0
    void update_spill_block_batch_row_count(const vectorized::Block* block) {
505
0
        auto rows = block->rows();
506
0
        if (rows > 0 && 0 == avg_row_bytes) {
507
0
            avg_row_bytes = std::max((std::size_t)1, block->bytes() / rows);
508
0
            spill_block_batch_row_count =
509
0
                    (SORT_BLOCK_SPILL_BATCH_BYTES + avg_row_bytes - 1) / avg_row_bytes;
510
0
            LOG(INFO) << "spill sort block batch row count: " << spill_block_batch_row_count;
511
0
        }
512
0
    }
513
    void close();
514
515
    SortSharedState* in_mem_shared_state = nullptr;
516
    bool enable_spill = false;
517
    bool is_spilled = false;
518
    std::atomic_bool is_closed = false;
519
    Status sink_status;
520
    std::shared_ptr<BasicSharedState> in_mem_shared_state_sptr;
521
522
    std::deque<vectorized::SpillStreamSPtr> sorted_streams;
523
    size_t avg_row_bytes = 0;
524
    size_t spill_block_batch_row_count;
525
};
526
527
struct UnionSharedState : public BasicSharedState {
528
    ENABLE_FACTORY_CREATOR(UnionSharedState)
529
530
public:
531
0
    UnionSharedState(int child_count = 1) : data_queue(child_count), _child_count(child_count) {};
532
0
    int child_count() const { return _child_count; }
533
    DataQueue data_queue;
534
    const int _child_count;
535
};
536
537
struct CacheSharedState : public BasicSharedState {
538
    ENABLE_FACTORY_CREATOR(CacheSharedState)
539
public:
540
    DataQueue data_queue;
541
};
542
543
class MultiCastDataStreamer;
544
545
struct MultiCastSharedState : public BasicSharedState {
546
public:
547
    MultiCastSharedState(const RowDescriptor& row_desc, ObjectPool* pool, int cast_sender_count);
548
    std::unique_ptr<pipeline::MultiCastDataStreamer> multi_cast_data_streamer;
549
};
550
551
struct BlockRowPos {
552
    int64_t block_num {}; //the pos at which block
553
    int64_t row_num {};   //the pos at which row
554
    int64_t pos {};       //pos = all blocks size + row_num
555
0
    std::string debug_string() const {
556
0
        std::string res = "\t block_num: ";
557
0
        res += std::to_string(block_num);
558
0
        res += "\t row_num: ";
559
0
        res += std::to_string(row_num);
560
0
        res += "\t pos: ";
561
0
        res += std::to_string(pos);
562
0
        return res;
563
0
    }
564
};
565
566
struct AnalyticSharedState : public BasicSharedState {
567
    ENABLE_FACTORY_CREATOR(AnalyticSharedState)
568
569
public:
570
0
    AnalyticSharedState() = default;
571
572
    int64_t current_row_position = 0;
573
    BlockRowPos partition_by_end;
574
    int64_t input_total_rows = 0;
575
    BlockRowPos all_block_end;
576
    std::vector<vectorized::Block> input_blocks;
577
    bool input_eos = false;
578
    BlockRowPos found_partition_end;
579
    std::vector<int64_t> origin_cols;
580
    std::vector<int64_t> input_block_first_row_positions;
581
    std::vector<std::vector<vectorized::MutableColumnPtr>> agg_input_columns;
582
583
    // TODO: maybe global?
584
    std::vector<int64_t> partition_by_column_idxs;
585
    std::vector<int64_t> ordey_by_column_idxs;
586
};
587
588
struct JoinSharedState : public BasicSharedState {
589
    // For some join case, we can apply a short circuit strategy
590
    // 1. _has_null_in_build_side = true
591
    // 2. build side rows is empty, Join op is: inner join/right outer join/left semi/right semi/right anti
592
    bool _has_null_in_build_side = false;
593
    bool short_circuit_for_probe = false;
594
    // for some join, when build side rows is empty, we could return directly by add some additional null data in probe table.
595
    bool empty_right_table_need_probe_dispose = false;
596
    JoinOpVariants join_op_variants;
597
};
598
599
struct HashJoinSharedState : public JoinSharedState {
600
    ENABLE_FACTORY_CREATOR(HashJoinSharedState)
601
    // mark the join column whether support null eq
602
    std::vector<bool> is_null_safe_eq_join;
603
604
    // mark the build hash table whether it needs to store null value
605
    std::vector<bool> serialize_null_into_key;
606
    std::shared_ptr<vectorized::Arena> arena = std::make_shared<vectorized::Arena>();
607
608
    // maybe share hash table with other fragment instances
609
    std::shared_ptr<JoinDataVariants> hash_table_variants = std::make_shared<JoinDataVariants>();
610
    const std::vector<TupleDescriptor*> build_side_child_desc;
611
    size_t build_exprs_size = 0;
612
    std::shared_ptr<vectorized::Block> build_block;
613
    std::shared_ptr<std::vector<uint32_t>> build_indexes_null;
614
    bool probe_ignore_null = false;
615
};
616
617
struct PartitionedHashJoinSharedState
618
        : public HashJoinSharedState,
619
          public std::enable_shared_from_this<PartitionedHashJoinSharedState> {
620
    ENABLE_FACTORY_CREATOR(PartitionedHashJoinSharedState)
621
622
    std::unique_ptr<RuntimeState> inner_runtime_state;
623
    std::shared_ptr<HashJoinSharedState> inner_shared_state;
624
    std::vector<std::unique_ptr<vectorized::MutableBlock>> partitioned_build_blocks;
625
    std::vector<vectorized::SpillStreamSPtr> spilled_streams;
626
    bool need_to_spill = false;
627
};
628
629
struct NestedLoopJoinSharedState : public JoinSharedState {
630
    ENABLE_FACTORY_CREATOR(NestedLoopJoinSharedState)
631
    // if true, left child has no more rows to process
632
    bool left_side_eos = false;
633
    // Visited flags for each row in build side.
634
    vectorized::MutableColumns build_side_visited_flags;
635
    // List of build blocks, constructed in prepare()
636
    vectorized::Blocks build_blocks;
637
};
638
639
struct PartitionSortNodeSharedState : public BasicSharedState {
640
    ENABLE_FACTORY_CREATOR(PartitionSortNodeSharedState)
641
public:
642
    std::queue<vectorized::Block> blocks_buffer;
643
    std::mutex buffer_mutex;
644
    std::vector<std::unique_ptr<vectorized::PartitionSorter>> partition_sorts;
645
    bool sink_eos = false;
646
    std::mutex sink_eos_lock;
647
};
648
649
struct SetSharedState : public BasicSharedState {
650
    ENABLE_FACTORY_CREATOR(SetSharedState)
651
public:
652
    /// default init
653
    vectorized::Block build_block; // build to source
654
    //record element size in hashtable
655
    int64_t valid_element_in_hash_tbl = 0;
656
    //first: idx mapped to column types
657
    //second: column_id, could point to origin column or cast column
658
    std::unordered_map<int, int> build_col_idx;
659
660
    //// shared static states (shared, decided in prepare/open...)
661
662
    /// init in setup_local_state
663
    std::unique_ptr<SetDataVariants> hash_table_variants = nullptr; // the real data HERE.
664
    std::vector<bool> build_not_ignore_null;
665
666
    // The SET operator's child might have different nullable attributes.
667
    // If a calculation involves both nullable and non-nullable columns, the final output should be a nullable column
668
    Status update_build_not_ignore_null(const vectorized::VExprContextSPtrs& ctxs);
669
670
    /// init in both upstream side.
671
    //The i-th result expr list refers to the i-th child.
672
    std::vector<vectorized::VExprContextSPtrs> child_exprs_lists;
673
674
    /// init in build side
675
    size_t child_quantity;
676
    vectorized::VExprContextSPtrs build_child_exprs;
677
    std::vector<Dependency*> probe_finished_children_dependency;
678
679
    /// init in probe side
680
    std::vector<vectorized::VExprContextSPtrs> probe_child_exprs_lists;
681
682
    std::atomic<bool> ready_for_read = false;
683
684
    /// called in setup_local_state
685
    Status hash_table_init();
686
};
687
688
enum class ExchangeType : uint8_t {
689
    NOOP = 0,
690
    // Shuffle data by Crc32HashPartitioner<LocalExchangeChannelIds>.
691
    HASH_SHUFFLE = 1,
692
    // Round-robin passthrough data blocks.
693
    PASSTHROUGH = 2,
694
    // Shuffle data by Crc32HashPartitioner<ShuffleChannelIds> (e.g. same as storage engine).
695
    BUCKET_HASH_SHUFFLE = 3,
696
    // Passthrough data blocks to all channels.
697
    BROADCAST = 4,
698
    // Passthrough data to channels evenly in an adaptive way.
699
    ADAPTIVE_PASSTHROUGH = 5,
700
    // Send all data to the first channel.
701
    PASS_TO_ONE = 6,
702
    // merge all data to one channel.
703
    LOCAL_MERGE_SORT = 7,
704
};
705
706
0
inline std::string get_exchange_type_name(ExchangeType idx) {
707
0
    switch (idx) {
708
0
    case ExchangeType::NOOP:
709
0
        return "NOOP";
710
0
    case ExchangeType::HASH_SHUFFLE:
711
0
        return "HASH_SHUFFLE";
712
0
    case ExchangeType::PASSTHROUGH:
713
0
        return "PASSTHROUGH";
714
0
    case ExchangeType::BUCKET_HASH_SHUFFLE:
715
0
        return "BUCKET_HASH_SHUFFLE";
716
0
    case ExchangeType::BROADCAST:
717
0
        return "BROADCAST";
718
0
    case ExchangeType::ADAPTIVE_PASSTHROUGH:
719
0
        return "ADAPTIVE_PASSTHROUGH";
720
0
    case ExchangeType::PASS_TO_ONE:
721
0
        return "PASS_TO_ONE";
722
0
    case ExchangeType::LOCAL_MERGE_SORT:
723
0
        return "LOCAL_MERGE_SORT";
724
0
    }
725
0
    LOG(FATAL) << "__builtin_unreachable";
726
0
    __builtin_unreachable();
727
0
}
728
729
struct DataDistribution {
730
0
    DataDistribution(ExchangeType type) : distribution_type(type) {}
731
    DataDistribution(ExchangeType type, const std::vector<TExpr>& partition_exprs_)
732
0
            : distribution_type(type), partition_exprs(partition_exprs_) {}
733
0
    DataDistribution(const DataDistribution& other) = default;
734
0
    bool need_local_exchange() const { return distribution_type != ExchangeType::NOOP; }
735
0
    DataDistribution& operator=(const DataDistribution& other) = default;
736
    ExchangeType distribution_type;
737
    std::vector<TExpr> partition_exprs;
738
};
739
740
class ExchangerBase;
741
742
struct LocalExchangeSharedState : public BasicSharedState {
743
public:
744
    ENABLE_FACTORY_CREATOR(LocalExchangeSharedState);
745
    LocalExchangeSharedState(int num_instances);
746
    ~LocalExchangeSharedState() override;
747
    std::unique_ptr<ExchangerBase> exchanger {};
748
    std::vector<RuntimeProfile::Counter*> mem_counters;
749
    std::atomic<int64_t> mem_usage = 0;
750
    // We need to make sure to add mem_usage first and then enqueue, otherwise sub mem_usage may cause negative mem_usage during concurrent dequeue.
751
    std::mutex le_lock;
752
0
    virtual void create_dependencies(int local_exchange_id) {
753
0
        for (auto& source_dep : source_deps) {
754
0
            source_dep = std::make_shared<Dependency>(local_exchange_id, local_exchange_id,
755
0
                                                      "LOCAL_EXCHANGE_OPERATOR_DEPENDENCY");
756
0
            source_dep->set_shared_state(this);
757
0
        }
758
0
    }
759
    void sub_running_sink_operators();
760
    void sub_running_source_operators(LocalExchangeSourceLocalState& local_state);
761
0
    void _set_always_ready() {
762
0
        for (auto& dep : source_deps) {
763
0
            DCHECK(dep);
764
0
            dep->set_always_ready();
765
0
        }
766
0
        for (auto& dep : sink_deps) {
767
0
            DCHECK(dep);
768
0
            dep->set_always_ready();
769
0
        }
770
0
    }
771
772
0
    virtual std::vector<DependencySPtr> get_dep_by_channel_id(int channel_id) {
773
0
        return {source_deps[channel_id]};
774
0
    }
775
0
    virtual Dependency* get_sink_dep_by_channel_id(int channel_id) { return nullptr; }
776
777
0
    void set_ready_to_read(int channel_id) {
778
0
        auto& dep = source_deps[channel_id];
779
0
        DCHECK(dep) << channel_id;
780
0
        dep->set_ready();
781
0
    }
782
783
0
    void add_mem_usage(int channel_id, size_t delta, bool update_total_mem_usage = true) {
784
0
        mem_counters[channel_id]->update(delta);
785
0
        if (update_total_mem_usage) {
786
0
            add_total_mem_usage(delta, channel_id);
787
0
        }
788
0
    }
789
790
0
    void sub_mem_usage(int channel_id, size_t delta) {
791
0
        mem_counters[channel_id]->update(-(int64_t)delta);
792
0
    }
793
794
0
    virtual void add_total_mem_usage(size_t delta, int channel_id) {
795
0
        if (mem_usage.fetch_add(delta) + delta > config::local_exchange_buffer_mem_limit) {
796
0
            sink_deps.front()->block();
797
0
        }
798
0
    }
799
800
0
    virtual void sub_total_mem_usage(size_t delta, int channel_id) {
801
0
        auto prev_usage = mem_usage.fetch_sub(delta);
802
0
        DCHECK_GE(prev_usage - delta, 0) << "prev_usage: " << prev_usage << " delta: " << delta
803
0
                                         << " channel_id: " << channel_id;
804
0
        if (prev_usage - delta <= config::local_exchange_buffer_mem_limit) {
805
0
            sink_deps.front()->set_ready();
806
0
        }
807
0
    }
808
};
809
810
struct LocalMergeExchangeSharedState : public LocalExchangeSharedState {
811
    ENABLE_FACTORY_CREATOR(LocalMergeExchangeSharedState);
812
    LocalMergeExchangeSharedState(int num_instances)
813
            : LocalExchangeSharedState(num_instances),
814
              _queues_mem_usage(num_instances),
815
0
              _each_queue_limit(config::local_exchange_buffer_mem_limit / num_instances) {
816
0
        for (size_t i = 0; i < num_instances; i++) {
817
0
            _queues_mem_usage[i] = 0;
818
0
        }
819
0
    }
820
821
0
    void create_dependencies(int local_exchange_id) override {
822
0
        sink_deps.resize(source_deps.size());
823
0
        for (size_t i = 0; i < source_deps.size(); i++) {
824
0
            source_deps[i] =
825
0
                    std::make_shared<Dependency>(local_exchange_id, local_exchange_id,
826
0
                                                 "LOCAL_MERGE_EXCHANGE_OPERATOR_DEPENDENCY");
827
0
            source_deps[i]->set_shared_state(this);
828
0
            sink_deps[i] = std::make_shared<Dependency>(
829
0
                    local_exchange_id, local_exchange_id,
830
0
                    "LOCAL_MERGE_EXCHANGE_OPERATOR_SINK_DEPENDENCY", true);
831
0
            sink_deps[i]->set_shared_state(this);
832
0
        }
833
0
    }
834
835
0
    void sub_total_mem_usage(size_t delta, int channel_id) override {
836
0
        auto prev_usage = _queues_mem_usage[channel_id].fetch_sub(delta);
837
0
        DCHECK_GE(prev_usage - delta, 0) << "prev_usage: " << prev_usage << " delta: " << delta
838
0
                                         << " channel_id: " << channel_id;
839
0
        if (prev_usage - delta <= _each_queue_limit) {
840
0
            sink_deps[channel_id]->set_ready();
841
0
        }
842
0
        if (_queues_mem_usage[channel_id] == 0) {
843
0
            source_deps[channel_id]->block();
844
0
        }
845
0
    }
846
0
    void add_total_mem_usage(size_t delta, int channel_id) override {
847
0
        if (_queues_mem_usage[channel_id].fetch_add(delta) + delta > _each_queue_limit) {
848
0
            sink_deps[channel_id]->block();
849
0
        }
850
0
        source_deps[channel_id]->set_ready();
851
0
    }
852
853
0
    Dependency* get_sink_dep_by_channel_id(int channel_id) override {
854
0
        return sink_deps[channel_id].get();
855
0
    }
856
857
0
    std::vector<DependencySPtr> get_dep_by_channel_id(int channel_id) override {
858
0
        return source_deps;
859
0
    }
860
861
private:
862
    std::vector<std::atomic_int64_t> _queues_mem_usage;
863
    const int64_t _each_queue_limit;
864
};
865
#include "common/compile_check_end.h"
866
} // namespace doris::pipeline