Coverage Report

Created: 2026-03-12 00:34

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/root/doris/be/src/exec/pipeline/dependency.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#ifdef __APPLE__
21
#include <netinet/in.h>
22
#include <sys/_types/_u_int.h>
23
#endif
24
25
#include <concurrentqueue.h>
26
#include <gen_cpp/internal_service.pb.h>
27
#include <sqltypes.h>
28
29
#include <atomic>
30
#include <functional>
31
#include <memory>
32
#include <mutex>
33
#include <thread>
34
#include <utility>
35
36
#include "common/config.h"
37
#include "common/logging.h"
38
#include "core/block/block.h"
39
#include "core/types.h"
40
#include "exec/common/agg_utils.h"
41
#include "exec/common/join_utils.h"
42
#include "exec/common/set_utils.h"
43
#include "exec/operator/data_queue.h"
44
#include "exec/operator/join/process_hash_table_probe.h"
45
#include "exec/sort/partition_sorter.h"
46
#include "exec/sort/sorter.h"
47
#include "exec/spill/spill_stream.h"
48
#include "util/brpc_closure.h"
49
#include "util/stack_util.h"
50
51
namespace doris {
52
class AggFnEvaluator;
53
class VSlotRef;
54
} // namespace doris
55
56
namespace doris {
57
#include "common/compile_check_begin.h"
58
class Dependency;
59
class PipelineTask;
60
struct BasicSharedState;
61
using DependencySPtr = std::shared_ptr<Dependency>;
62
class LocalExchangeSourceLocalState;
63
64
static constexpr auto SLOW_DEPENDENCY_THRESHOLD = 60 * 1000L * 1000L * 1000L;
65
static constexpr auto TIME_UNIT_DEPENDENCY_LOG = 30 * 1000L * 1000L * 1000L;
66
static_assert(TIME_UNIT_DEPENDENCY_LOG < SLOW_DEPENDENCY_THRESHOLD);
67
68
struct BasicSharedState {
69
    ENABLE_FACTORY_CREATOR(BasicSharedState)
70
71
    template <class TARGET>
72
96.4k
    TARGET* cast() {
73
96.4k
        DCHECK(dynamic_cast<TARGET*>(this))
74
0
                << " Mismatch type! Current type is " << typeid(*this).name()
75
0
                << " and expect type is" << typeid(TARGET).name();
76
96.4k
        return reinterpret_cast<TARGET*>(this);
77
96.4k
    }
_ZN5doris16BasicSharedState4castINS_19HashJoinSharedStateEEEPT_v
Line
Count
Source
72
96.0k
    TARGET* cast() {
73
96.0k
        DCHECK(dynamic_cast<TARGET*>(this))
74
0
                << " Mismatch type! Current type is " << typeid(*this).name()
75
0
                << " and expect type is" << typeid(TARGET).name();
76
96.0k
        return reinterpret_cast<TARGET*>(this);
77
96.0k
    }
_ZN5doris16BasicSharedState4castINS_30PartitionedHashJoinSharedStateEEEPT_v
Line
Count
Source
72
3
    TARGET* cast() {
73
3
        DCHECK(dynamic_cast<TARGET*>(this))
74
0
                << " Mismatch type! Current type is " << typeid(*this).name()
75
0
                << " and expect type is" << typeid(TARGET).name();
76
3
        return reinterpret_cast<TARGET*>(this);
77
3
    }
_ZN5doris16BasicSharedState4castINS_15SortSharedStateEEEPT_v
Line
Count
Source
72
34
    TARGET* cast() {
73
34
        DCHECK(dynamic_cast<TARGET*>(this))
74
0
                << " Mismatch type! Current type is " << typeid(*this).name()
75
0
                << " and expect type is" << typeid(TARGET).name();
76
34
        return reinterpret_cast<TARGET*>(this);
77
34
    }
_ZN5doris16BasicSharedState4castINS_20SpillSortSharedStateEEEPT_v
Line
Count
Source
72
13
    TARGET* cast() {
73
13
        DCHECK(dynamic_cast<TARGET*>(this))
74
0
                << " Mismatch type! Current type is " << typeid(*this).name()
75
0
                << " and expect type is" << typeid(TARGET).name();
76
13
        return reinterpret_cast<TARGET*>(this);
77
13
    }
Unexecuted instantiation: _ZN5doris16BasicSharedState4castINS_25NestedLoopJoinSharedStateEEEPT_v
_ZN5doris16BasicSharedState4castINS_19AnalyticSharedStateEEEPT_v
Line
Count
Source
72
18
    TARGET* cast() {
73
18
        DCHECK(dynamic_cast<TARGET*>(this))
74
0
                << " Mismatch type! Current type is " << typeid(*this).name()
75
0
                << " and expect type is" << typeid(TARGET).name();
76
18
        return reinterpret_cast<TARGET*>(this);
77
18
    }
_ZN5doris16BasicSharedState4castINS_14AggSharedStateEEEPT_v
Line
Count
Source
72
70
    TARGET* cast() {
73
70
        DCHECK(dynamic_cast<TARGET*>(this))
74
0
                << " Mismatch type! Current type is " << typeid(*this).name()
75
0
                << " and expect type is" << typeid(TARGET).name();
76
70
        return reinterpret_cast<TARGET*>(this);
77
70
    }
_ZN5doris16BasicSharedState4castINS_25PartitionedAggSharedStateEEEPT_v
Line
Count
Source
72
16
    TARGET* cast() {
73
16
        DCHECK(dynamic_cast<TARGET*>(this))
74
0
                << " Mismatch type! Current type is " << typeid(*this).name()
75
0
                << " and expect type is" << typeid(TARGET).name();
76
16
        return reinterpret_cast<TARGET*>(this);
77
16
    }
_ZN5doris16BasicSharedState4castINS_16UnionSharedStateEEEPT_v
Line
Count
Source
72
4
    TARGET* cast() {
73
4
        DCHECK(dynamic_cast<TARGET*>(this))
74
0
                << " Mismatch type! Current type is " << typeid(*this).name()
75
0
                << " and expect type is" << typeid(TARGET).name();
76
4
        return reinterpret_cast<TARGET*>(this);
77
4
    }
_ZN5doris16BasicSharedState4castINS_28PartitionSortNodeSharedStateEEEPT_v
Line
Count
Source
72
204
    TARGET* cast() {
73
204
        DCHECK(dynamic_cast<TARGET*>(this))
74
0
                << " Mismatch type! Current type is " << typeid(*this).name()
75
0
                << " and expect type is" << typeid(TARGET).name();
76
204
        return reinterpret_cast<TARGET*>(this);
77
204
    }
Unexecuted instantiation: _ZN5doris16BasicSharedState4castINS_20MultiCastSharedStateEEEPT_v
_ZN5doris16BasicSharedState4castINS_14SetSharedStateEEEPT_v
Line
Count
Source
72
33
    TARGET* cast() {
73
33
        DCHECK(dynamic_cast<TARGET*>(this))
74
0
                << " Mismatch type! Current type is " << typeid(*this).name()
75
0
                << " and expect type is" << typeid(TARGET).name();
76
33
        return reinterpret_cast<TARGET*>(this);
77
33
    }
Unexecuted instantiation: _ZN5doris16BasicSharedState4castINS_24LocalExchangeSharedStateEEEPT_v
_ZN5doris16BasicSharedState4castIS0_EEPT_v
Line
Count
Source
72
11
    TARGET* cast() {
73
11
        DCHECK(dynamic_cast<TARGET*>(this))
74
0
                << " Mismatch type! Current type is " << typeid(*this).name()
75
0
                << " and expect type is" << typeid(TARGET).name();
76
11
        return reinterpret_cast<TARGET*>(this);
77
11
    }
_ZN5doris16BasicSharedState4castINS_20DataQueueSharedStateEEEPT_v
Line
Count
Source
72
6
    TARGET* cast() {
73
6
        DCHECK(dynamic_cast<TARGET*>(this))
74
0
                << " Mismatch type! Current type is " << typeid(*this).name()
75
0
                << " and expect type is" << typeid(TARGET).name();
76
6
        return reinterpret_cast<TARGET*>(this);
77
6
    }
Unexecuted instantiation: _ZN5doris16BasicSharedState4castINS_17RecCTESharedStateEEEPT_v
78
    template <class TARGET>
79
    const TARGET* cast() const {
80
        DCHECK(dynamic_cast<const TARGET*>(this))
81
                << " Mismatch type! Current type is " << typeid(*this).name()
82
                << " and expect type is" << typeid(TARGET).name();
83
        return reinterpret_cast<const TARGET*>(this);
84
    }
85
    std::vector<DependencySPtr> source_deps;
86
    std::vector<DependencySPtr> sink_deps;
87
    int id = 0;
88
    std::set<int> related_op_ids;
89
90
72.3k
    virtual ~BasicSharedState() = default;
91
92
    void create_source_dependencies(int num_sources, int operator_id, int node_id,
93
                                    const std::string& name);
94
    Dependency* create_source_dependency(int operator_id, int node_id, const std::string& name);
95
96
    Dependency* create_sink_dependency(int dest_id, int node_id, const std::string& name);
97
24
    std::vector<DependencySPtr> get_dep_by_channel_id(int channel_id) {
98
24
        DCHECK_LT(channel_id, source_deps.size());
99
24
        return {source_deps[channel_id]};
100
24
    }
101
};
102
103
class Dependency : public std::enable_shared_from_this<Dependency> {
104
public:
105
    ENABLE_FACTORY_CREATOR(Dependency);
106
    Dependency(int id, int node_id, std::string name, bool ready = false)
107
484k
            : _id(id), _node_id(node_id), _name(std::move(name)), _ready(ready) {}
108
484k
    virtual ~Dependency() = default;
109
110
0
    [[nodiscard]] int id() const { return _id; }
111
96.5k
    [[nodiscard]] virtual std::string name() const { return _name; }
112
4
    BasicSharedState* shared_state() { return _shared_state; }
113
144k
    void set_shared_state(BasicSharedState* shared_state) { _shared_state = shared_state; }
114
    virtual std::string debug_string(int indentation_level = 0);
115
833M
    bool ready() const { return _ready; }
116
117
    // Start the watcher. We use it to count how long this dependency block the current pipeline task.
118
24
    void start_watcher() { _watcher.start(); }
119
96.1k
    [[nodiscard]] int64_t watcher_elapse_time() { return _watcher.elapsed_time(); }
120
121
    // Which dependency current pipeline task is blocked by. `nullptr` if this dependency is ready.
122
    [[nodiscard]] Dependency* is_blocked_by(std::shared_ptr<PipelineTask> task = nullptr);
123
    // Notify downstream pipeline tasks this dependency is ready.
124
    void set_ready();
125
48.6k
    void set_ready_to_read(int channel_id = 0) {
126
48.6k
        DCHECK_LT(channel_id, _shared_state->source_deps.size()) << debug_string();
127
48.6k
        _shared_state->source_deps[channel_id]->set_ready();
128
48.6k
    }
129
0
    void set_ready_to_write() {
130
0
        DCHECK_EQ(_shared_state->sink_deps.size(), 1) << debug_string();
131
0
        _shared_state->sink_deps.front()->set_ready();
132
0
    }
133
134
    // Notify downstream pipeline tasks this dependency is blocked.
135
1.91k
    void block() {
136
1.91k
        if (_always_ready) {
137
13
            return;
138
13
        }
139
1.89k
        std::unique_lock<std::mutex> lc(_always_ready_lock);
140
1.89k
        if (_always_ready) {
141
0
            return;
142
0
        }
143
1.89k
        _ready = false;
144
1.89k
    }
145
146
133
    void set_always_ready() {
147
133
        if (_always_ready) {
148
35
            return;
149
35
        }
150
98
        std::unique_lock<std::mutex> lc(_always_ready_lock);
151
98
        if (_always_ready) {
152
0
            return;
153
0
        }
154
98
        _always_ready = true;
155
98
        set_ready();
156
98
    }
157
158
protected:
159
    void _add_block_task(std::shared_ptr<PipelineTask> task);
160
161
    const int _id;
162
    const int _node_id;
163
    const std::string _name;
164
    std::atomic<bool> _ready;
165
166
    BasicSharedState* _shared_state = nullptr;
167
    MonotonicStopWatch _watcher;
168
169
    std::mutex _task_lock;
170
    std::vector<std::weak_ptr<PipelineTask>> _blocked_task;
171
172
    // If `_always_ready` is true, `block()` will never block tasks.
173
    std::atomic<bool> _always_ready = false;
174
    std::mutex _always_ready_lock;
175
};
176
177
struct FakeSharedState final : public BasicSharedState {
178
    ENABLE_FACTORY_CREATOR(FakeSharedState)
179
};
180
181
class CountedFinishDependency final : public Dependency {
182
public:
183
    using SharedState = FakeSharedState;
184
    CountedFinishDependency(int id, int node_id, std::string name)
185
96.0k
            : Dependency(id, node_id, std::move(name), true) {}
186
187
8
    void add(uint32_t count = 1) {
188
8
        std::unique_lock<std::mutex> l(_mtx);
189
8
        if (!_counter) {
190
7
            block();
191
7
        }
192
8
        _counter += count;
193
8
    }
194
195
7
    void sub() {
196
7
        std::unique_lock<std::mutex> l(_mtx);
197
7
        _counter--;
198
7
        if (!_counter) {
199
6
            set_ready();
200
6
        }
201
7
    }
202
203
    std::string debug_string(int indentation_level = 0) override;
204
205
private:
206
    std::mutex _mtx;
207
    uint32_t _counter = 0;
208
};
209
210
struct RuntimeFilterTimerQueue;
211
class RuntimeFilterTimer {
212
public:
213
    RuntimeFilterTimer(int64_t registration_time, int32_t wait_time_ms,
214
                       std::shared_ptr<Dependency> parent, bool force_wait_timeout = false)
215
2
            : _parent(std::move(parent)),
216
2
              _registration_time(registration_time),
217
2
              _wait_time_ms(wait_time_ms),
218
2
              _force_wait_timeout(force_wait_timeout) {}
219
220
    // Called by runtime filter producer.
221
    void call_ready();
222
223
    // Called by RuntimeFilterTimerQueue which is responsible for checking if this rf is timeout.
224
    void call_timeout();
225
226
2
    int64_t registration_time() const { return _registration_time; }
227
2
    int32_t wait_time_ms() const { return _wait_time_ms; }
228
229
    void set_local_runtime_filter_dependencies(
230
0
            const std::vector<std::shared_ptr<Dependency>>& deps) {
231
0
        _local_runtime_filter_dependencies = deps;
232
0
    }
233
234
    bool should_be_check_timeout();
235
236
2
    bool force_wait_timeout() { return _force_wait_timeout; }
237
238
private:
239
    friend struct RuntimeFilterTimerQueue;
240
    std::shared_ptr<Dependency> _parent = nullptr;
241
    std::vector<std::shared_ptr<Dependency>> _local_runtime_filter_dependencies;
242
    std::mutex _lock;
243
    int64_t _registration_time;
244
    const int32_t _wait_time_ms;
245
    // true only for group_commit_scan_operator
246
    bool _force_wait_timeout;
247
};
248
249
struct RuntimeFilterTimerQueue {
250
    constexpr static int64_t interval = 10;
251
1
    void run() { _thread.detach(); }
252
    void start();
253
254
0
    void stop() {
255
0
        _stop = true;
256
0
        cv.notify_all();
257
0
        wait_for_shutdown();
258
0
    }
259
260
0
    void wait_for_shutdown() const {
261
0
        while (!_shutdown) {
262
0
            std::this_thread::sleep_for(std::chrono::milliseconds(interval));
263
0
        }
264
0
    }
265
266
0
    ~RuntimeFilterTimerQueue() = default;
267
1
    RuntimeFilterTimerQueue() { _thread = std::thread(&RuntimeFilterTimerQueue::start, this); }
268
1
    void push_filter_timer(std::vector<std::shared_ptr<RuntimeFilterTimer>>&& filter) {
269
1
        std::unique_lock<std::mutex> lc(_que_lock);
270
1
        _que.insert(_que.end(), filter.begin(), filter.end());
271
1
        cv.notify_all();
272
1
    }
273
274
    std::thread _thread;
275
    std::condition_variable cv;
276
    std::mutex cv_m;
277
    std::mutex _que_lock;
278
    std::atomic_bool _stop = false;
279
    std::atomic_bool _shutdown = false;
280
    std::list<std::shared_ptr<RuntimeFilterTimer>> _que;
281
};
282
283
struct AggSharedState : public BasicSharedState {
284
    ENABLE_FACTORY_CREATOR(AggSharedState)
285
public:
286
40
    AggSharedState() { agg_data = std::make_unique<AggregatedDataVariants>(); }
287
40
    ~AggSharedState() override {
288
40
        if (!probe_expr_ctxs.empty()) {
289
30
            _close_with_serialized_key();
290
30
        } else {
291
10
            _close_without_key();
292
10
        }
293
40
    }
294
295
    Status reset_hash_table();
296
297
    bool do_limit_filter(Block* block, size_t num_rows, const std::vector<int>* key_locs = nullptr);
298
    void build_limit_heap(size_t hash_table_size);
299
300
    // We should call this function only at 1st phase.
301
    // 1st phase: is_merge=true, only have one SlotRef.
302
    // 2nd phase: is_merge=false, maybe have multiple exprs.
303
    static int get_slot_column_id(const AggFnEvaluator* evaluator);
304
305
    AggregatedDataVariantsUPtr agg_data = nullptr;
306
    std::unique_ptr<AggregateDataContainer> aggregate_data_container;
307
    std::vector<AggFnEvaluator*> aggregate_evaluators;
308
    // group by k1,k2
309
    VExprContextSPtrs probe_expr_ctxs;
310
    size_t input_num_rows = 0;
311
    std::vector<AggregateDataPtr> values;
312
    /// The total size of the row from the aggregate functions.
313
    size_t total_size_of_aggregate_states = 0;
314
    size_t align_aggregate_states = 1;
315
    /// The offset to the n-th aggregate function in a row of aggregate functions.
316
    Sizes offsets_of_aggregate_states;
317
    std::vector<size_t> make_nullable_keys;
318
319
    bool agg_data_created_without_key = false;
320
    bool enable_spill = false;
321
    bool reach_limit = false;
322
323
    int64_t limit = -1;
324
    bool do_sort_limit = false;
325
    MutableColumns limit_columns;
326
    int limit_columns_min = -1;
327
    PaddedPODArray<uint8_t> need_computes;
328
    std::vector<uint8_t> cmp_res;
329
    std::vector<int> order_directions;
330
    std::vector<int> null_directions;
331
332
    struct HeapLimitCursor {
333
        HeapLimitCursor(int row_id, MutableColumns& limit_columns,
334
                        std::vector<int>& order_directions, std::vector<int>& null_directions)
335
32
                : _row_id(row_id),
336
32
                  _limit_columns(limit_columns),
337
32
                  _order_directions(order_directions),
338
32
                  _null_directions(null_directions) {}
339
340
        HeapLimitCursor(const HeapLimitCursor& other) = default;
341
342
        HeapLimitCursor(HeapLimitCursor&& other) noexcept
343
152
                : _row_id(other._row_id),
344
152
                  _limit_columns(other._limit_columns),
345
152
                  _order_directions(other._order_directions),
346
152
                  _null_directions(other._null_directions) {}
347
348
0
        HeapLimitCursor& operator=(const HeapLimitCursor& other) noexcept {
349
0
            _row_id = other._row_id;
350
0
            return *this;
351
0
        }
352
353
129
        HeapLimitCursor& operator=(HeapLimitCursor&& other) noexcept {
354
129
            _row_id = other._row_id;
355
129
            return *this;
356
129
        }
357
358
79
        bool operator<(const HeapLimitCursor& rhs) const {
359
85
            for (int i = 0; i < _limit_columns.size(); ++i) {
360
79
                const auto& _limit_column = _limit_columns[i];
361
79
                auto res = _limit_column->compare_at(_row_id, rhs._row_id, *_limit_column,
362
79
                                                     _null_directions[i]) *
363
79
                           _order_directions[i];
364
79
                if (res < 0) {
365
46
                    return true;
366
46
                } else if (res > 0) {
367
27
                    return false;
368
27
                }
369
79
            }
370
6
            return false;
371
79
        }
372
373
        int _row_id;
374
        MutableColumns& _limit_columns;
375
        std::vector<int>& _order_directions;
376
        std::vector<int>& _null_directions;
377
    };
378
379
    std::priority_queue<HeapLimitCursor> limit_heap;
380
381
    // Refresh the top limit heap with a new row
382
    void refresh_top_limit(size_t row_id, const ColumnRawPtrs& key_columns);
383
384
    Arena agg_arena_pool;
385
    Arena agg_profile_arena;
386
387
private:
388
    MutableColumns _get_keys_hash_table();
389
390
30
    void _close_with_serialized_key() {
391
30
        std::visit(Overload {[&](std::monostate& arg) -> void {
392
                                 // Do nothing
393
0
                             },
394
30
                             [&](auto& agg_method) -> void {
395
30
                                 auto& data = *agg_method.hash_table;
396
91
                                 data.for_each_mapped([&](auto& mapped) {
397
91
                                     if (mapped) {
398
91
                                         _destroy_agg_status(mapped);
399
91
                                         mapped = nullptr;
400
91
                                     }
401
91
                                 });
Unexecuted instantiation: _ZZZN5doris14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_15MethodKeysFixedI9PHHashMapIN4wide7integerILm256EjEEPc9HashCRC32IS9_EEEEEEvS2_ENKUlS2_E_clISA_EEDaS2_
Unexecuted instantiation: _ZZZN5doris14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_15MethodKeysFixedI9PHHashMapINS_7UInt136EPc9HashCRC32IS7_EEEEEEvS2_ENKUlS2_E_clIS8_EEDaS2_
Unexecuted instantiation: _ZZZN5doris14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_15MethodKeysFixedI9PHHashMapIN4wide7integerILm128EjEEPc9HashCRC32IS9_EEEEEEvS2_ENKUlS2_E_clISA_EEDaS2_
Unexecuted instantiation: _ZZZN5doris14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_15MethodKeysFixedI9PHHashMapINS_7UInt104EPc9HashCRC32IS7_EEEEEEvS2_ENKUlS2_E_clIS8_EEDaS2_
Unexecuted instantiation: _ZZZN5doris14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_15MethodKeysFixedI9PHHashMapINS_6UInt96EPc9HashCRC32IS7_EEEEEEvS2_ENKUlS2_E_clIS8_EEDaS2_
Unexecuted instantiation: _ZZZN5doris14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_15MethodKeysFixedI9PHHashMapINS_6UInt72EPc9HashCRC32IS7_EEEEEEvS2_ENKUlS2_E_clIS8_EEDaS2_
Unexecuted instantiation: _ZZZN5doris14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_15MethodKeysFixedI9PHHashMapImPc9HashCRC32ImEEEEEEvS2_ENKUlS2_E_clIS7_EEDaS2_
Unexecuted instantiation: _ZZZN5doris14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_26MethodSingleNullableColumnINS_19MethodStringNoCacheINS_15DataWithNullKeyINS_13StringHashMapIPcNS_9AllocatorILb1ELb1ELb0ENS_22DefaultMemoryAllocatorELb1EEEEEEEEEEEEEvS2_ENKUlS2_E_clIS9_EEDaS2_
Unexecuted instantiation: _ZZZN5doris14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_26MethodSingleNullableColumnINS_15MethodOneNumberIN4wide7integerILm256EjEENS_15DataWithNullKeyI9PHHashMapIS9_Pc9HashCRC32IS9_EEEEEEEEEEvS2_ENKUlS2_E_clISC_EEDaS2_
Unexecuted instantiation: _ZZZN5doris14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_26MethodSingleNullableColumnINS_15MethodOneNumberIN4wide7integerILm128EjEENS_15DataWithNullKeyI9PHHashMapIS9_Pc9HashCRC32IS9_EEEEEEEEEEvS2_ENKUlS2_E_clISC_EEDaS2_
_ZZZN5doris14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_26MethodSingleNullableColumnINS_15MethodOneNumberImNS_15DataWithNullKeyI9PHHashMapImPc14HashMixWrapperIm9HashCRC32ImEEEEEEEEEEEvS2_ENKUlS2_E_clIS9_EEDaS2_
Line
Count
Source
396
20
                                 data.for_each_mapped([&](auto& mapped) {
397
20
                                     if (mapped) {
398
20
                                         _destroy_agg_status(mapped);
399
20
                                         mapped = nullptr;
400
20
                                     }
401
20
                                 });
Unexecuted instantiation: _ZZZN5doris14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_26MethodSingleNullableColumnINS_15MethodOneNumberIjNS_15DataWithNullKeyI9PHHashMapIjPc14HashMixWrapperIj9HashCRC32IjEEEEEEEEEEEvS2_ENKUlS2_E_clIS9_EEDaS2_
Unexecuted instantiation: _ZZZN5doris14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_26MethodSingleNullableColumnINS_15MethodOneNumberImNS_15DataWithNullKeyI9PHHashMapImPc9HashCRC32ImEEEEEEEEEEvS2_ENKUlS2_E_clIS9_EEDaS2_
Unexecuted instantiation: _ZZZN5doris14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_26MethodSingleNullableColumnINS_15MethodOneNumberIjNS_15DataWithNullKeyI9PHHashMapIjPc9HashCRC32IjEEEEEEEEEEvS2_ENKUlS2_E_clIS9_EEDaS2_
Unexecuted instantiation: _ZZZN5doris14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_26MethodSingleNullableColumnINS_15MethodOneNumberItNS_15DataWithNullKeyI9PHHashMapItPc9HashCRC32ItEEEEEEEEEEvS2_ENKUlS2_E_clIS9_EEDaS2_
Unexecuted instantiation: _ZZZN5doris14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_26MethodSingleNullableColumnINS_15MethodOneNumberIhNS_15DataWithNullKeyI9PHHashMapIhPc9HashCRC32IhEEEEEEEEEEvS2_ENKUlS2_E_clIS9_EEDaS2_
_ZZZN5doris14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_15MethodOneNumberIm9PHHashMapImPc14HashMixWrapperIm9HashCRC32ImEEEEEEEvS2_ENKUlS2_E_clIS7_EEDaS2_
Line
Count
Source
396
55
                                 data.for_each_mapped([&](auto& mapped) {
397
55
                                     if (mapped) {
398
55
                                         _destroy_agg_status(mapped);
399
55
                                         mapped = nullptr;
400
55
                                     }
401
55
                                 });
_ZZZN5doris14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_15MethodOneNumberIj9PHHashMapIjPc14HashMixWrapperIj9HashCRC32IjEEEEEEEvS2_ENKUlS2_E_clIS7_EEDaS2_
Line
Count
Source
396
16
                                 data.for_each_mapped([&](auto& mapped) {
397
16
                                     if (mapped) {
398
16
                                         _destroy_agg_status(mapped);
399
16
                                         mapped = nullptr;
400
16
                                     }
401
16
                                 });
Unexecuted instantiation: _ZZZN5doris14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_15MethodOneNumberIN4wide7integerILm256EjEE9PHHashMapIS8_Pc9HashCRC32IS8_EEEEEEvS2_ENKUlS2_E_clISA_EEDaS2_
Unexecuted instantiation: _ZZZN5doris14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_15MethodOneNumberIN4wide7integerILm128EjEE9PHHashMapIS8_Pc9HashCRC32IS8_EEEEEEvS2_ENKUlS2_E_clISA_EEDaS2_
Unexecuted instantiation: _ZZZN5doris14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_19MethodStringNoCacheINS_13StringHashMapIPcNS_9AllocatorILb1ELb1ELb0ENS_22DefaultMemoryAllocatorELb1EEEEEEEEEvS2_ENKUlS2_E_clIS7_EEDaS2_
Unexecuted instantiation: _ZZZN5doris14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_15MethodOneNumberIm9PHHashMapImPc9HashCRC32ImEEEEEEvS2_ENKUlS2_E_clIS7_EEDaS2_
Unexecuted instantiation: _ZZZN5doris14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_15MethodOneNumberIj9PHHashMapIjPc9HashCRC32IjEEEEEEvS2_ENKUlS2_E_clIS7_EEDaS2_
Unexecuted instantiation: _ZZZN5doris14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_15MethodOneNumberIt9PHHashMapItPc9HashCRC32ItEEEEEEvS2_ENKUlS2_E_clIS7_EEDaS2_
Unexecuted instantiation: _ZZZN5doris14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_15MethodOneNumberIh9PHHashMapIhPc9HashCRC32IhEEEEEEvS2_ENKUlS2_E_clIS7_EEDaS2_
Unexecuted instantiation: _ZZZN5doris14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_16MethodSerializedI9PHHashMapINS_9StringRefEPc11DefaultHashIS7_vEEEEEEvS2_ENKUlS2_E_clIS8_EEDaS2_
402
30
                                 if (data.has_null_key_data()) {
403
5
                                     _destroy_agg_status(
404
5
                                             data.template get_null_key_data<AggregateDataPtr>());
405
5
                                 }
406
30
                             }},
Unexecuted instantiation: _ZZN5doris14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_15MethodKeysFixedI9PHHashMapIN4wide7integerILm256EjEEPc9HashCRC32IS9_EEEEEEvS2_
Unexecuted instantiation: _ZZN5doris14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_15MethodKeysFixedI9PHHashMapINS_7UInt136EPc9HashCRC32IS7_EEEEEEvS2_
Unexecuted instantiation: _ZZN5doris14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_15MethodKeysFixedI9PHHashMapIN4wide7integerILm128EjEEPc9HashCRC32IS9_EEEEEEvS2_
Unexecuted instantiation: _ZZN5doris14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_15MethodKeysFixedI9PHHashMapINS_7UInt104EPc9HashCRC32IS7_EEEEEEvS2_
Unexecuted instantiation: _ZZN5doris14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_15MethodKeysFixedI9PHHashMapINS_6UInt96EPc9HashCRC32IS7_EEEEEEvS2_
Unexecuted instantiation: _ZZN5doris14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_15MethodKeysFixedI9PHHashMapINS_6UInt72EPc9HashCRC32IS7_EEEEEEvS2_
Unexecuted instantiation: _ZZN5doris14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_15MethodKeysFixedI9PHHashMapImPc9HashCRC32ImEEEEEEvS2_
Unexecuted instantiation: _ZZN5doris14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_26MethodSingleNullableColumnINS_19MethodStringNoCacheINS_15DataWithNullKeyINS_13StringHashMapIPcNS_9AllocatorILb1ELb1ELb0ENS_22DefaultMemoryAllocatorELb1EEEEEEEEEEEEEvS2_
Unexecuted instantiation: _ZZN5doris14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_26MethodSingleNullableColumnINS_15MethodOneNumberIN4wide7integerILm256EjEENS_15DataWithNullKeyI9PHHashMapIS9_Pc9HashCRC32IS9_EEEEEEEEEEvS2_
Unexecuted instantiation: _ZZN5doris14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_26MethodSingleNullableColumnINS_15MethodOneNumberIN4wide7integerILm128EjEENS_15DataWithNullKeyI9PHHashMapIS9_Pc9HashCRC32IS9_EEEEEEEEEEvS2_
_ZZN5doris14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_26MethodSingleNullableColumnINS_15MethodOneNumberImNS_15DataWithNullKeyI9PHHashMapImPc14HashMixWrapperIm9HashCRC32ImEEEEEEEEEEEvS2_
Line
Count
Source
394
5
                             [&](auto& agg_method) -> void {
395
5
                                 auto& data = *agg_method.hash_table;
396
5
                                 data.for_each_mapped([&](auto& mapped) {
397
5
                                     if (mapped) {
398
5
                                         _destroy_agg_status(mapped);
399
5
                                         mapped = nullptr;
400
5
                                     }
401
5
                                 });
402
5
                                 if (data.has_null_key_data()) {
403
5
                                     _destroy_agg_status(
404
5
                                             data.template get_null_key_data<AggregateDataPtr>());
405
5
                                 }
406
5
                             }},
Unexecuted instantiation: _ZZN5doris14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_26MethodSingleNullableColumnINS_15MethodOneNumberIjNS_15DataWithNullKeyI9PHHashMapIjPc14HashMixWrapperIj9HashCRC32IjEEEEEEEEEEEvS2_
Unexecuted instantiation: _ZZN5doris14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_26MethodSingleNullableColumnINS_15MethodOneNumberImNS_15DataWithNullKeyI9PHHashMapImPc9HashCRC32ImEEEEEEEEEEvS2_
Unexecuted instantiation: _ZZN5doris14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_26MethodSingleNullableColumnINS_15MethodOneNumberIjNS_15DataWithNullKeyI9PHHashMapIjPc9HashCRC32IjEEEEEEEEEEvS2_
Unexecuted instantiation: _ZZN5doris14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_26MethodSingleNullableColumnINS_15MethodOneNumberItNS_15DataWithNullKeyI9PHHashMapItPc9HashCRC32ItEEEEEEEEEEvS2_
Unexecuted instantiation: _ZZN5doris14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_26MethodSingleNullableColumnINS_15MethodOneNumberIhNS_15DataWithNullKeyI9PHHashMapIhPc9HashCRC32IhEEEEEEEEEEvS2_
_ZZN5doris14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_15MethodOneNumberIm9PHHashMapImPc14HashMixWrapperIm9HashCRC32ImEEEEEEEvS2_
Line
Count
Source
394
14
                             [&](auto& agg_method) -> void {
395
14
                                 auto& data = *agg_method.hash_table;
396
14
                                 data.for_each_mapped([&](auto& mapped) {
397
14
                                     if (mapped) {
398
14
                                         _destroy_agg_status(mapped);
399
14
                                         mapped = nullptr;
400
14
                                     }
401
14
                                 });
402
14
                                 if (data.has_null_key_data()) {
403
0
                                     _destroy_agg_status(
404
0
                                             data.template get_null_key_data<AggregateDataPtr>());
405
0
                                 }
406
14
                             }},
_ZZN5doris14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_15MethodOneNumberIj9PHHashMapIjPc14HashMixWrapperIj9HashCRC32IjEEEEEEEvS2_
Line
Count
Source
394
11
                             [&](auto& agg_method) -> void {
395
11
                                 auto& data = *agg_method.hash_table;
396
11
                                 data.for_each_mapped([&](auto& mapped) {
397
11
                                     if (mapped) {
398
11
                                         _destroy_agg_status(mapped);
399
11
                                         mapped = nullptr;
400
11
                                     }
401
11
                                 });
402
11
                                 if (data.has_null_key_data()) {
403
0
                                     _destroy_agg_status(
404
0
                                             data.template get_null_key_data<AggregateDataPtr>());
405
0
                                 }
406
11
                             }},
Unexecuted instantiation: _ZZN5doris14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_15MethodOneNumberIN4wide7integerILm256EjEE9PHHashMapIS8_Pc9HashCRC32IS8_EEEEEEvS2_
Unexecuted instantiation: _ZZN5doris14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_15MethodOneNumberIN4wide7integerILm128EjEE9PHHashMapIS8_Pc9HashCRC32IS8_EEEEEEvS2_
Unexecuted instantiation: _ZZN5doris14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_19MethodStringNoCacheINS_13StringHashMapIPcNS_9AllocatorILb1ELb1ELb0ENS_22DefaultMemoryAllocatorELb1EEEEEEEEEvS2_
Unexecuted instantiation: _ZZN5doris14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_15MethodOneNumberIm9PHHashMapImPc9HashCRC32ImEEEEEEvS2_
Unexecuted instantiation: _ZZN5doris14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_15MethodOneNumberIj9PHHashMapIjPc9HashCRC32IjEEEEEEvS2_
Unexecuted instantiation: _ZZN5doris14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_15MethodOneNumberIt9PHHashMapItPc9HashCRC32ItEEEEEEvS2_
Unexecuted instantiation: _ZZN5doris14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_15MethodOneNumberIh9PHHashMapIhPc9HashCRC32IhEEEEEEvS2_
Unexecuted instantiation: _ZZN5doris14AggSharedState26_close_with_serialized_keyEvENKUlRT_E_clINS_16MethodSerializedI9PHHashMapINS_9StringRefEPc11DefaultHashIS7_vEEEEEEvS2_
407
30
                   agg_data->method_variant);
408
30
    }
409
410
10
    void _close_without_key() {
411
        //because prepare maybe failed, and couldn't create agg data.
412
        //but finally call close to destory agg data, if agg data has bitmapValue
413
        //will be core dump, it's not initialized
414
10
        if (agg_data_created_without_key) {
415
8
            _destroy_agg_status(agg_data->without_key);
416
8
            agg_data_created_without_key = false;
417
8
        }
418
10
    }
419
    void _destroy_agg_status(AggregateDataPtr data);
420
};
421
422
struct BasicSpillSharedState {
423
55
    virtual ~BasicSpillSharedState() = default;
424
425
    // These two counters are shared to spill source operators as the initial value
426
    // of 'SpillWriteFileCurrentBytes' and 'SpillWriteFileCurrentCount'.
427
    // Total bytes of spill data written to disk file(after serialized)
428
    RuntimeProfile::Counter* _spill_write_file_total_size = nullptr;
429
    RuntimeProfile::Counter* _spill_file_total_count = nullptr;
430
431
31
    void setup_shared_profile(RuntimeProfile* sink_profile) {
432
31
        _spill_file_total_count =
433
31
                ADD_COUNTER_WITH_LEVEL(sink_profile, "SpillWriteFileTotalCount", TUnit::UNIT, 1);
434
31
        _spill_write_file_total_size =
435
31
                ADD_COUNTER_WITH_LEVEL(sink_profile, "SpillWriteFileBytes", TUnit::BYTES, 1);
436
31
    }
437
438
    virtual void update_spill_stream_profiles(RuntimeProfile* source_profile) = 0;
439
};
440
441
struct AggSpillPartition;
442
struct PartitionedAggSharedState : public BasicSharedState,
443
                                   public BasicSpillSharedState,
444
                                   public std::enable_shared_from_this<PartitionedAggSharedState> {
445
    ENABLE_FACTORY_CREATOR(PartitionedAggSharedState)
446
447
12
    PartitionedAggSharedState() = default;
448
12
    ~PartitionedAggSharedState() override = default;
449
450
    void update_spill_stream_profiles(RuntimeProfile* source_profile) override;
451
452
    void init_spill_params(size_t spill_partition_count);
453
454
    void close();
455
456
    AggSharedState* in_mem_shared_state = nullptr;
457
    std::shared_ptr<BasicSharedState> in_mem_shared_state_sptr;
458
459
    size_t partition_count;
460
    size_t max_partition_index;
461
    bool is_spilled = false;
462
    std::atomic_bool is_closed = false;
463
    std::deque<std::shared_ptr<AggSpillPartition>> spill_partitions;
464
465
1.04M
    size_t get_partition_index(size_t hash_value) const { return hash_value % partition_count; }
466
};
467
468
struct AggSpillPartition {
469
    static constexpr int64_t AGG_SPILL_FILE_SIZE = 1024 * 1024 * 1024; // 1G
470
471
352
    AggSpillPartition() = default;
472
473
    void close();
474
475
    Status get_spill_stream(RuntimeState* state, int node_id, RuntimeProfile* profile,
476
                            SpillStreamSPtr& spilling_stream);
477
478
64
    Status flush_if_full() {
479
64
        DCHECK(spilling_stream_);
480
64
        Status status;
481
        // avoid small spill files
482
64
        if (spilling_stream_->get_written_bytes() >= AGG_SPILL_FILE_SIZE) {
483
0
            status = spilling_stream_->spill_eof();
484
0
            spilling_stream_.reset();
485
0
        }
486
64
        return status;
487
64
    }
488
489
352
    Status finish_current_spilling(bool eos = false) {
490
352
        if (spilling_stream_) {
491
100
            if (eos || spilling_stream_->get_written_bytes() >= AGG_SPILL_FILE_SIZE) {
492
48
                auto status = spilling_stream_->spill_eof();
493
48
                spilling_stream_.reset();
494
48
                return status;
495
48
            }
496
100
        }
497
304
        return Status::OK();
498
352
    }
499
500
    std::deque<SpillStreamSPtr> spill_streams_;
501
    SpillStreamSPtr spilling_stream_;
502
};
503
using AggSpillPartitionSPtr = std::shared_ptr<AggSpillPartition>;
504
struct SortSharedState : public BasicSharedState {
505
    ENABLE_FACTORY_CREATOR(SortSharedState)
506
public:
507
    std::shared_ptr<Sorter> sorter;
508
};
509
510
struct SpillSortSharedState : public BasicSharedState,
511
                              public BasicSpillSharedState,
512
                              public std::enable_shared_from_this<SpillSortSharedState> {
513
    ENABLE_FACTORY_CREATOR(SpillSortSharedState)
514
515
10
    SpillSortSharedState() = default;
516
10
    ~SpillSortSharedState() override = default;
517
518
5
    void update_spill_block_batch_row_count(RuntimeState* state, const Block* block) {
519
5
        auto rows = block->rows();
520
5
        if (rows > 0 && 0 == avg_row_bytes) {
521
4
            avg_row_bytes = std::max((std::size_t)1, block->bytes() / rows);
522
4
            spill_block_batch_row_count =
523
4
                    (state->spill_sort_batch_bytes() + avg_row_bytes - 1) / avg_row_bytes;
524
4
            LOG(INFO) << "spill sort block batch row count: " << spill_block_batch_row_count;
525
4
        }
526
5
    }
527
528
    void update_spill_stream_profiles(RuntimeProfile* source_profile) override;
529
530
    void close();
531
532
    SortSharedState* in_mem_shared_state = nullptr;
533
    bool enable_spill = false;
534
    bool is_spilled = false;
535
    int64_t limit = -1;
536
    int64_t offset = 0;
537
    std::atomic_bool is_closed = false;
538
    std::shared_ptr<BasicSharedState> in_mem_shared_state_sptr;
539
540
    std::deque<SpillStreamSPtr> sorted_streams;
541
    size_t avg_row_bytes = 0;
542
    size_t spill_block_batch_row_count;
543
};
544
545
struct UnionSharedState : public BasicSharedState {
546
    ENABLE_FACTORY_CREATOR(UnionSharedState)
547
548
public:
549
1
    UnionSharedState(int child_count = 1) : data_queue(child_count), _child_count(child_count) {};
550
0
    int child_count() const { return _child_count; }
551
    DataQueue data_queue;
552
    const int _child_count;
553
};
554
555
struct DataQueueSharedState : public BasicSharedState {
556
    ENABLE_FACTORY_CREATOR(DataQueueSharedState)
557
public:
558
    DataQueue data_queue;
559
};
560
561
class MultiCastDataStreamer;
562
563
struct MultiCastSharedState : public BasicSharedState,
564
                              public BasicSpillSharedState,
565
                              public std::enable_shared_from_this<MultiCastSharedState> {
566
    MultiCastSharedState(ObjectPool* pool, int cast_sender_count, int node_id);
567
    std::unique_ptr<MultiCastDataStreamer> multi_cast_data_streamer;
568
569
    void update_spill_stream_profiles(RuntimeProfile* source_profile) override;
570
};
571
572
struct AnalyticSharedState : public BasicSharedState {
573
    ENABLE_FACTORY_CREATOR(AnalyticSharedState)
574
575
public:
576
9
    AnalyticSharedState() = default;
577
    std::queue<Block> blocks_buffer;
578
    std::mutex buffer_mutex;
579
    bool sink_eos = false;
580
    std::mutex sink_eos_lock;
581
    Arena agg_arena_pool;
582
};
583
584
struct JoinSharedState : public BasicSharedState {
585
    // For some join case, we can apply a short circuit strategy
586
    // 1. _has_null_in_build_side = true
587
    // 2. build side rows is empty, Join op is: inner join/right outer join/left semi/right semi/right anti
588
    bool _has_null_in_build_side = false;
589
    bool short_circuit_for_probe = false;
590
    // for some join, when build side rows is empty, we could return directly by add some additional null data in probe table.
591
    bool empty_right_table_need_probe_dispose = false;
592
    JoinOpVariants join_op_variants;
593
};
594
595
struct HashJoinSharedState : public JoinSharedState {
596
    ENABLE_FACTORY_CREATOR(HashJoinSharedState)
597
72.1k
    HashJoinSharedState() {
598
72.1k
        hash_table_variant_vector.push_back(std::make_shared<JoinDataVariants>());
599
72.1k
    }
600
1
    HashJoinSharedState(int num_instances) {
601
1
        source_deps.resize(num_instances, nullptr);
602
1
        hash_table_variant_vector.resize(num_instances, nullptr);
603
9
        for (int i = 0; i < num_instances; i++) {
604
8
            hash_table_variant_vector[i] = std::make_shared<JoinDataVariants>();
605
8
        }
606
1
    }
607
    std::shared_ptr<Arena> arena = std::make_shared<Arena>();
608
609
    const std::vector<TupleDescriptor*> build_side_child_desc;
610
    size_t build_exprs_size = 0;
611
    std::shared_ptr<Block> build_block;
612
    std::shared_ptr<std::vector<uint32_t>> build_indexes_null;
613
614
    // Used by shared hash table
615
    // For probe operator, hash table in _hash_table_variants is read-only if visited flags is not
616
    // used. (visited flags will be used only in right / full outer join).
617
    //
618
    // For broadcast join, although hash table is read-only, some states in `_hash_table_variants`
619
    // are still could be written. For example, serialized keys will be written in a continuous
620
    // memory in `_hash_table_variants`. So before execution, we should use a local _hash_table_variants
621
    // which has a shared hash table in it.
622
    std::vector<std::shared_ptr<JoinDataVariants>> hash_table_variant_vector;
623
624
    // whether left semi join could directly return
625
    // if runtime filters contains local in filter, we can make sure all input rows are matched
626
    // local filter will always be applied, and in filter could guarantee precise filtering
627
    // ATTN: we should disable always_true logic for in filter when we set this flag
628
    bool left_semi_direct_return = false;
629
};
630
631
struct PartitionedHashJoinSharedState
632
        : public HashJoinSharedState,
633
          public BasicSpillSharedState,
634
          public std::enable_shared_from_this<PartitionedHashJoinSharedState> {
635
    ENABLE_FACTORY_CREATOR(PartitionedHashJoinSharedState)
636
637
0
    void update_spill_stream_profiles(RuntimeProfile* source_profile) override {
638
0
        for (auto& stream : spilled_streams) {
639
0
            if (stream) {
640
0
                stream->update_shared_profiles(source_profile);
641
0
            }
642
0
        }
643
0
    }
644
645
    std::unique_ptr<RuntimeState> inner_runtime_state;
646
    std::shared_ptr<HashJoinSharedState> inner_shared_state;
647
    std::vector<std::unique_ptr<MutableBlock>> partitioned_build_blocks;
648
    std::vector<SpillStreamSPtr> spilled_streams;
649
    bool is_spilled = false;
650
};
651
652
struct NestedLoopJoinSharedState : public JoinSharedState {
653
    ENABLE_FACTORY_CREATOR(NestedLoopJoinSharedState)
654
    // if true, probe child has no more rows to process
655
    bool probe_side_eos = false;
656
    // Visited flags for each row in build side.
657
    MutableColumns build_side_visited_flags;
658
    // List of build blocks, constructed in prepare()
659
    Blocks build_blocks;
660
};
661
662
struct PartitionSortNodeSharedState : public BasicSharedState {
663
    ENABLE_FACTORY_CREATOR(PartitionSortNodeSharedState)
664
public:
665
    std::queue<Block> blocks_buffer;
666
    std::mutex buffer_mutex;
667
    std::vector<std::unique_ptr<PartitionSorter>> partition_sorts;
668
    bool sink_eos = false;
669
    std::mutex sink_eos_lock;
670
    std::mutex prepared_finish_lock;
671
};
672
673
struct SetSharedState : public BasicSharedState {
674
    ENABLE_FACTORY_CREATOR(SetSharedState)
675
public:
676
    /// default init
677
    Block build_block; // build to source
678
    //record element size in hashtable
679
    int64_t valid_element_in_hash_tbl = 0;
680
    //first: idx mapped to column types
681
    //second: column_id, could point to origin column or cast column
682
    std::unordered_map<int, int> build_col_idx;
683
684
    //// shared static states (shared, decided in prepare/open...)
685
686
    /// init in setup_local_state
687
    std::unique_ptr<SetDataVariants> hash_table_variants =
688
            std::make_unique<SetDataVariants>(); // the real data HERE.
689
    std::vector<bool> build_not_ignore_null;
690
691
    // The SET operator's child might have different nullable attributes.
692
    // If a calculation involves both nullable and non-nullable columns, the final output should be a nullable column
693
    Status update_build_not_ignore_null(const VExprContextSPtrs& ctxs);
694
695
    size_t get_hash_table_size() const;
696
    /// init in both upstream side.
697
    //The i-th result expr list refers to the i-th child.
698
    std::vector<VExprContextSPtrs> child_exprs_lists;
699
700
    /// init in build side
701
    size_t child_quantity;
702
    VExprContextSPtrs build_child_exprs;
703
    std::vector<Dependency*> probe_finished_children_dependency;
704
705
    /// init in probe side
706
    std::vector<VExprContextSPtrs> probe_child_exprs_lists;
707
708
    std::atomic<bool> ready_for_read = false;
709
710
    Arena arena;
711
712
    /// called in setup_local_state
713
    Status hash_table_init();
714
};
715
716
enum class ExchangeType : uint8_t {
717
    NOOP = 0,
718
    // Shuffle data by Crc32CHashPartitioner
719
    HASH_SHUFFLE = 1,
720
    // Round-robin passthrough data blocks.
721
    PASSTHROUGH = 2,
722
    // Shuffle data by Crc32HashPartitioner<ShuffleChannelIds> (e.g. same as storage engine).
723
    BUCKET_HASH_SHUFFLE = 3,
724
    // Passthrough data blocks to all channels.
725
    BROADCAST = 4,
726
    // Passthrough data to channels evenly in an adaptive way.
727
    ADAPTIVE_PASSTHROUGH = 5,
728
    // Send all data to the first channel.
729
    PASS_TO_ONE = 6,
730
};
731
732
63
inline std::string get_exchange_type_name(ExchangeType idx) {
733
63
    switch (idx) {
734
14
    case ExchangeType::NOOP:
735
14
        return "NOOP";
736
49
    case ExchangeType::HASH_SHUFFLE:
737
49
        return "HASH_SHUFFLE";
738
0
    case ExchangeType::PASSTHROUGH:
739
0
        return "PASSTHROUGH";
740
0
    case ExchangeType::BUCKET_HASH_SHUFFLE:
741
0
        return "BUCKET_HASH_SHUFFLE";
742
0
    case ExchangeType::BROADCAST:
743
0
        return "BROADCAST";
744
0
    case ExchangeType::ADAPTIVE_PASSTHROUGH:
745
0
        return "ADAPTIVE_PASSTHROUGH";
746
0
    case ExchangeType::PASS_TO_ONE:
747
0
        return "PASS_TO_ONE";
748
63
    }
749
0
    throw Exception(Status::FatalError("__builtin_unreachable"));
750
63
}
751
752
struct DataDistribution {
753
144k
    DataDistribution(ExchangeType type) : distribution_type(type) {}
754
    DataDistribution(ExchangeType type, const std::vector<TExpr>& partition_exprs_)
755
52
            : distribution_type(type), partition_exprs(partition_exprs_) {}
756
0
    DataDistribution(const DataDistribution& other) = default;
757
5
    bool need_local_exchange() const { return distribution_type != ExchangeType::NOOP; }
758
5
    DataDistribution& operator=(const DataDistribution& other) = default;
759
    ExchangeType distribution_type;
760
    std::vector<TExpr> partition_exprs;
761
};
762
763
class ExchangerBase;
764
765
struct LocalExchangeSharedState : public BasicSharedState {
766
public:
767
    ENABLE_FACTORY_CREATOR(LocalExchangeSharedState);
768
    LocalExchangeSharedState(int num_instances);
769
    ~LocalExchangeSharedState() override;
770
    std::unique_ptr<ExchangerBase> exchanger {};
771
    std::vector<RuntimeProfile::Counter*> mem_counters;
772
    std::atomic<int64_t> mem_usage = 0;
773
    std::atomic<size_t> _buffer_mem_limit = config::local_exchange_buffer_mem_limit;
774
    // We need to make sure to add mem_usage first and then enqueue, otherwise sub mem_usage may cause negative mem_usage during concurrent dequeue.
775
    std::mutex le_lock;
776
    void sub_running_sink_operators();
777
    void sub_running_source_operators();
778
10
    void _set_always_ready() {
779
40
        for (auto& dep : source_deps) {
780
40
            DCHECK(dep);
781
40
            dep->set_always_ready();
782
40
        }
783
10
        for (auto& dep : sink_deps) {
784
10
            DCHECK(dep);
785
10
            dep->set_always_ready();
786
10
        }
787
10
    }
788
789
0
    Dependency* get_sink_dep_by_channel_id(int channel_id) { return nullptr; }
790
791
129
    void set_ready_to_read(int channel_id) {
792
129
        auto& dep = source_deps[channel_id];
793
129
        DCHECK(dep) << channel_id;
794
129
        dep->set_ready();
795
129
    }
796
797
161
    void add_mem_usage(int channel_id, size_t delta) { mem_counters[channel_id]->update(delta); }
798
799
125
    void sub_mem_usage(int channel_id, size_t delta) {
800
125
        mem_counters[channel_id]->update(-(int64_t)delta);
801
125
    }
802
803
114
    void add_total_mem_usage(size_t delta) {
804
114
        if (cast_set<int64_t>(mem_usage.fetch_add(delta) + delta) > _buffer_mem_limit) {
805
15
            sink_deps.front()->block();
806
15
        }
807
114
    }
808
809
114
    void sub_total_mem_usage(size_t delta) {
810
114
        auto prev_usage = mem_usage.fetch_sub(delta);
811
114
        DCHECK_GE(prev_usage - delta, 0) << "prev_usage: " << prev_usage << " delta: " << delta;
812
114
        if (cast_set<int64_t>(prev_usage - delta) <= _buffer_mem_limit) {
813
102
            sink_deps.front()->set_ready();
814
102
        }
815
114
    }
816
817
0
    void set_low_memory_mode(RuntimeState* state) {
818
0
        _buffer_mem_limit = std::min<int64_t>(config::local_exchange_buffer_mem_limit,
819
0
                                              state->low_memory_mode_buffer_limit());
820
0
    }
821
};
822
823
#include "common/compile_check_end.h"
824
} // namespace doris