Coverage Report

Created: 2026-03-19 15:34

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exec/pipeline/pipeline_task.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "exec/pipeline/pipeline_task.h"
19
20
#include <fmt/core.h>
21
#include <fmt/format.h>
22
#include <gen_cpp/Metrics_types.h>
23
#include <glog/logging.h>
24
25
#include <algorithm>
26
#include <memory>
27
#include <ostream>
28
#include <vector>
29
30
#include "common/logging.h"
31
#include "common/status.h"
32
#include "core/block/block.h"
33
#include "exec/operator/exchange_source_operator.h"
34
#include "exec/operator/operator.h"
35
#include "exec/operator/rec_cte_source_operator.h"
36
#include "exec/operator/scan_operator.h"
37
#include "exec/pipeline/dependency.h"
38
#include "exec/pipeline/pipeline.h"
39
#include "exec/pipeline/pipeline_fragment_context.h"
40
#include "exec/pipeline/revokable_task.h"
41
#include "exec/pipeline/task_queue.h"
42
#include "exec/pipeline/task_scheduler.h"
43
#include "exec/spill/spill_file.h"
44
#include "runtime/descriptors.h"
45
#include "runtime/exec_env.h"
46
#include "runtime/query_context.h"
47
#include "runtime/runtime_profile.h"
48
#include "runtime/runtime_profile_counter_names.h"
49
#include "runtime/thread_context.h"
50
#include "runtime/workload_group/workload_group_manager.h"
51
#include "util/defer_op.h"
52
#include "util/mem_info.h"
53
#include "util/uid_util.h"
54
55
namespace doris {
56
class RuntimeState;
57
} // namespace doris
58
59
namespace doris {
60
#include "common/compile_check_begin.h"
61
62
PipelineTask::PipelineTask(PipelinePtr& pipeline, uint32_t task_id, RuntimeState* state,
63
                           std::shared_ptr<PipelineFragmentContext> fragment_context,
64
                           RuntimeProfile* parent_profile,
65
                           std::map<int, std::pair<std::shared_ptr<BasicSharedState>,
66
                                                   std::vector<std::shared_ptr<Dependency>>>>
67
                                   shared_state_map,
68
                           int task_idx)
69
        :
70
#ifdef BE_TEST
71
          _query_id(fragment_context ? fragment_context->get_query_id() : TUniqueId()),
72
1.91M
#else
73
          _query_id(fragment_context->get_query_id()),
74
1.91M
#endif
75
1.91M
          _index(task_id),
76
1.91M
          _pipeline(pipeline),
77
1.91M
          _opened(false),
78
1.91M
          _state(state),
79
1.91M
          _fragment_context(fragment_context),
80
1.91M
          _parent_profile(parent_profile),
81
1.91M
          _operators(pipeline->operators()),
82
1.91M
          _source(_operators.front().get()),
83
1.91M
          _root(_operators.back().get()),
84
1.91M
          _sink(pipeline->sink_shared_pointer()),
85
1.91M
          _shared_state_map(std::move(shared_state_map)),
86
1.91M
          _task_idx(task_idx),
87
1.91M
          _memory_sufficient_dependency(state->get_query_ctx()->get_memory_sufficient_dependency()),
88
1.91M
          _pipeline_name(_pipeline->name()) {
89
1.91M
#ifndef BE_TEST
90
1.91M
    _query_mem_tracker = fragment_context->get_query_ctx()->query_mem_tracker();
91
1.91M
#endif
92
1.91M
    _execution_dependencies.push_back(state->get_query_ctx()->get_execution_dependency());
93
1.56M
    if (!_shared_state_map.contains(_sink->dests_id().front())) {
94
1.56M
        auto shared_state = _sink->create_shared_state();
95
1.54M
        if (shared_state) {
96
1.54M
            _sink_shared_state = shared_state;
97
1.56M
        }
98
1.91M
    }
99
}
100
1.92M
101
1.99M
PipelineTask::~PipelineTask() {
102
1.99M
    auto reset_member = [&]() {
103
1.99M
        _shared_state_map.clear();
104
1.99M
        _sink_shared_state.reset();
105
1.99M
        _op_shared_states.clear();
106
1.99M
        _sink.reset();
107
1.99M
        _operators.clear();
108
1.99M
        _block.reset();
109
1.99M
        _pipeline.reset();
110
    };
111
// PipelineTask is also hold by task queue( https://github.com/apache/doris/pull/49753),
112
// so that it maybe the last one to be destructed.
113
// But pipeline task hold some objects, like operators, shared state, etc. So that should release
114
1.92M
// memory manually.
115
1.92M
#ifndef BE_TEST
116
1.92M
    if (_query_mem_tracker) {
117
1.92M
        SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(_query_mem_tracker);
118
1.92M
        reset_member();
119
1.92M
        return;
120
18.4E
    }
121
18.4E
#endif
122
18.4E
    reset_member();
123
}
124
125
1.91M
Status PipelineTask::prepare(const std::vector<TScanRangeParams>& scan_range, const int sender_id,
126
1.91M
                             const TDataSink& tsink) {
127
1.91M
    DCHECK(_sink);
128
1.91M
    _init_profile();
129
1.91M
    SCOPED_TIMER(_task_profile->total_time_counter());
130
1.91M
    SCOPED_CPU_TIMER(_task_cpu_timer);
131
1.91M
    SCOPED_TIMER(_prepare_timer);
132
1.91M
    DBUG_EXECUTE_IF("fault_inject::PipelineXTask::prepare", {
133
1.91M
        Status status = Status::Error<INTERNAL_ERROR>("fault_inject pipeline_task prepare failed");
134
1.91M
        return status;
135
1.91M
    });
136
    {
137
1.91M
        // set sink local state
138
1.91M
        LocalSinkStateInfo info {_task_idx,         _task_profile.get(),
139
1.91M
                                 sender_id,         get_sink_shared_state().get(),
140
1.91M
                                 _shared_state_map, tsink};
141
1.91M
        RETURN_IF_ERROR(_sink->setup_local_state(_state, info));
142
    }
143
1.91M
144
1.91M
    _scan_ranges = scan_range;
145
    auto* parent_profile = _state->get_sink_local_state()->operator_profile();
146
4.49M
147
2.58M
    for (int op_idx = cast_set<int>(_operators.size() - 1); op_idx >= 0; op_idx--) {
148
2.58M
        auto& op = _operators[op_idx];
149
2.58M
        LocalStateInfo info {parent_profile, _scan_ranges, get_op_shared_state(op->operator_id()),
150
2.58M
                             _shared_state_map, _task_idx};
151
2.58M
        RETURN_IF_ERROR(op->setup_local_state(_state, info));
152
2.58M
        parent_profile = _state->get_local_state(op->operator_id())->operator_profile();
153
1.91M
    }
154
1.91M
    {
155
1.91M
        const auto& deps =
156
1.91M
                _state->get_local_state(_source->operator_id())->execution_dependencies();
157
1.91M
        std::unique_lock<std::mutex> lc(_dependency_lock);
158
1.91M
        std::copy(deps.begin(), deps.end(),
159
1.91M
                  std::inserter(_execution_dependencies, _execution_dependencies.end()));
160
1.91M
    }
161
1.91M
    if (auto fragment = _fragment_context.lock()) {
162
0
        if (fragment->get_query_ctx()->is_cancelled()) {
163
0
            terminate();
164
0
            return fragment->get_query_ctx()->exec_status();
165
18.4E
        }
166
18.4E
    } else {
167
18.4E
        return Status::InternalError("Fragment already finished! Query: {}", print_id(_query_id));
168
1.91M
    }
169
1.91M
    _block = doris::Block::create_unique();
170
1.91M
    return _state_transition(State::RUNNABLE);
171
}
172
1.91M
173
1.91M
Status PipelineTask::_extract_dependencies() {
174
1.91M
    std::vector<std::vector<Dependency*>> read_dependencies;
175
1.91M
    std::vector<Dependency*> write_dependencies;
176
1.91M
    std::vector<Dependency*> finish_dependencies;
177
1.91M
    read_dependencies.resize(_operators.size());
178
2.58M
    size_t i = 0;
179
2.58M
    for (auto& op : _operators) {
180
2.58M
        auto* local_state = _state->get_local_state(op->operator_id());
181
2.58M
        DCHECK(local_state);
182
2.58M
        read_dependencies[i] = local_state->dependencies();
183
2.58M
        auto* fin_dep = local_state->finishdependency();
184
8
        if (fin_dep) {
185
8
            finish_dependencies.push_back(fin_dep);
186
2.58M
        }
187
2.58M
        i++;
188
1.91M
    }
189
1.91M
    DBUG_EXECUTE_IF("fault_inject::PipelineXTask::_extract_dependencies", {
190
1.91M
        Status status = Status::Error<INTERNAL_ERROR>(
191
1.91M
                "fault_inject pipeline_task _extract_dependencies failed");
192
1.91M
        return status;
193
1.91M
    });
194
1.91M
    {
195
1.91M
        auto* local_state = _state->get_sink_local_state();
196
1.91M
        write_dependencies = local_state->dependencies();
197
1.91M
        auto* fin_dep = local_state->finishdependency();
198
809k
        if (fin_dep) {
199
809k
            finish_dependencies.push_back(fin_dep);
200
1.91M
        }
201
1.91M
    }
202
1.91M
    {
203
1.91M
        std::unique_lock<std::mutex> lc(_dependency_lock);
204
1.91M
        read_dependencies.swap(_read_dependencies);
205
1.91M
        write_dependencies.swap(_write_dependencies);
206
1.91M
        finish_dependencies.swap(_finish_dependencies);
207
1.91M
    }
208
1.91M
    return Status::OK();
209
}
210
1.15M
211
1.15M
bool PipelineTask::inject_shared_state(std::shared_ptr<BasicSharedState> shared_state) {
212
684k
    if (!shared_state) {
213
684k
        return false;
214
    }
215
    // Shared state is created by upstream task's sink operator and shared by source operator of
216
595k
    // this task.
217
595k
    for (auto& op : _operators) {
218
464k
        if (shared_state->related_op_ids.contains(op->operator_id())) {
219
464k
            _op_shared_states.insert({op->operator_id(), shared_state});
220
464k
            return true;
221
595k
        }
222
    }
223
    // Shared state is created by the first sink operator and shared by sink operator of this task.
224
11.8k
    // For example, Set operations.
225
11.8k
    if (shared_state->related_op_ids.contains(_sink->dests_id().front())) {
226
0
        DCHECK_EQ(_sink_shared_state, nullptr)
227
11.8k
                << " Sink: " << _sink->get_name() << " dest id: " << _sink->dests_id().front();
228
11.8k
        _sink_shared_state = shared_state;
229
11.8k
        return true;
230
18.4E
    }
231
10.0k
    return false;
232
}
233
1.91M
234
1.91M
void PipelineTask::_init_profile() {
235
1.91M
    _task_profile = std::make_unique<RuntimeProfile>(fmt::format("PipelineTask(index={})", _index));
236
1.91M
    _parent_profile->add_child(_task_profile.get(), true, nullptr);
237
    _task_cpu_timer = ADD_TIMER(_task_profile, profile::TASK_CPU_TIME);
238
1.91M
239
1.91M
    static const char* exec_time = profile::EXECUTE_TIME;
240
1.91M
    _exec_timer = ADD_TIMER(_task_profile, exec_time);
241
1.91M
    _prepare_timer = ADD_CHILD_TIMER(_task_profile, profile::PREPARE_TIME, exec_time);
242
1.91M
    _open_timer = ADD_CHILD_TIMER(_task_profile, profile::OPEN_TIME, exec_time);
243
1.91M
    _get_block_timer = ADD_CHILD_TIMER(_task_profile, profile::GET_BLOCK_TIME, exec_time);
244
1.91M
    _get_block_counter = ADD_COUNTER(_task_profile, profile::GET_BLOCK_COUNTER, TUnit::UNIT);
245
1.91M
    _sink_timer = ADD_CHILD_TIMER(_task_profile, profile::SINK_TIME, exec_time);
246
    _close_timer = ADD_CHILD_TIMER(_task_profile, profile::CLOSE_TIME, exec_time);
247
1.91M
248
    _wait_worker_timer = ADD_TIMER_WITH_LEVEL(_task_profile, profile::WAIT_WORKER_TIME, 1);
249
1.91M
250
1.91M
    _schedule_counts = ADD_COUNTER(_task_profile, profile::NUM_SCHEDULE_TIMES, TUnit::UNIT);
251
1.91M
    _yield_counts = ADD_COUNTER(_task_profile, profile::NUM_YIELD_TIMES, TUnit::UNIT);
252
1.91M
    _core_change_times = ADD_COUNTER(_task_profile, profile::CORE_CHANGE_TIMES, TUnit::UNIT);
253
1.91M
    _memory_reserve_times = ADD_COUNTER(_task_profile, profile::MEMORY_RESERVE_TIMES, TUnit::UNIT);
254
1.91M
    _memory_reserve_failed_times =
255
1.91M
            ADD_COUNTER(_task_profile, profile::MEMORY_RESERVE_FAILED_TIMES, TUnit::UNIT);
256
}
257
1.91M
258
1.91M
void PipelineTask::_fresh_profile_counter() {
259
1.91M
    COUNTER_SET(_schedule_counts, (int64_t)_schedule_time);
260
1.91M
    COUNTER_SET(_wait_worker_timer, (int64_t)_wait_worker_watcher.elapsed_time());
261
}
262
1.91M
263
1.91M
Status PipelineTask::_open() {
264
1.91M
    SCOPED_TIMER(_task_profile->total_time_counter());
265
1.91M
    SCOPED_CPU_TIMER(_task_cpu_timer);
266
1.91M
    SCOPED_TIMER(_open_timer);
267
2.59M
    _dry_run = _sink->should_dry_run(_state);
268
2.59M
    for (auto& o : _operators) {
269
2.59M
        RETURN_IF_ERROR(_state->get_local_state(o->operator_id())->open(_state));
270
1.91M
    }
271
1.91M
    RETURN_IF_ERROR(_state->get_sink_local_state()->open(_state));
272
1.91M
    RETURN_IF_ERROR(_extract_dependencies());
273
1.91M
    DBUG_EXECUTE_IF("fault_inject::PipelineXTask::open", {
274
1.91M
        Status status = Status::Error<INTERNAL_ERROR>("fault_inject pipeline_task open failed");
275
1.91M
        return status;
276
1.91M
    });
277
1.91M
    _opened = true;
278
1.91M
    return Status::OK();
279
}
280
10.7M
281
10.7M
Status PipelineTask::_prepare() {
282
10.7M
    SCOPED_TIMER(_task_profile->total_time_counter());
283
14.2M
    SCOPED_CPU_TIMER(_task_cpu_timer);
284
14.2M
    for (auto& o : _operators) {
285
14.2M
        RETURN_IF_ERROR(_state->get_local_state(o->operator_id())->prepare(_state));
286
10.7M
    }
287
10.7M
    RETURN_IF_ERROR(_state->get_sink_local_state()->prepare(_state));
288
10.7M
    return Status::OK();
289
}
290
6.25M
291
bool PipelineTask::_wait_to_start() {
292
    // Before task starting, we should make sure
293
    // 1. Execution dependency is ready (which is controlled by FE 2-phase commit)
294
    // 2. Runtime filter dependencies are ready
295
6.25M
    // 3. All tablets are loaded into local storage
296
6.25M
    return std::any_of(
297
7.70M
            _execution_dependencies.begin(), _execution_dependencies.end(),
298
6.25M
            [&](Dependency* dep) -> bool { return dep->is_blocked_by(shared_from_this()); });
299
}
300
2.27M
301
bool PipelineTask::_is_pending_finish() {
302
2.27M
    // Spilling may be in progress if eos is true.
303
1.18M
    return std::ranges::any_of(_finish_dependencies, [&](Dependency* dep) -> bool {
304
1.18M
        return dep->is_blocked_by(shared_from_this());
305
2.27M
    });
306
}
307
6.64M
308
bool PipelineTask::is_blockable() const {
309
    // Before task starting, we should make sure
310
    // 1. Execution dependency is ready (which is controlled by FE 2-phase commit)
311
    // 2. Runtime filter dependencies are ready
312
    // 3. All tablets are loaded into local storage
313
6.64M
314
26.8k
    if (_state->enable_fuzzy_blockable_task()) {
315
15.9k
        if ((_schedule_time + _task_idx) % 2 == 0) {
316
15.9k
            return true;
317
26.8k
        }
318
    }
319
6.62M
320
8.87M
    return std::ranges::any_of(_operators,
321
6.63M
                               [&](OperatorPtr op) -> bool { return op->is_blockable(_state); }) ||
322
6.64M
           _sink->is_blockable(_state);
323
}
324
8.59M
325
bool PipelineTask::_is_blocked() {
326
8.59M
    // `_dry_run = true` means we do not need data from source operator.
327
15.8M
    if (!_dry_run) {
328
        for (int i = cast_set<int>(_read_dependencies.size() - 1); i >= 0; i--) {
329
11.8M
            // `_read_dependencies` is organized according to operators. For each operator, running condition is met iff all dependencies are ready.
330
11.8M
            for (auto* dep : _read_dependencies[i]) {
331
2.56M
                if (dep->is_blocked_by(shared_from_this())) {
332
2.56M
                    return true;
333
11.8M
                }
334
            }
335
7.39M
            // If all dependencies are ready for this operator, we can execute this task if no datum is needed from upstream operators.
336
86.8k
            if (!_operators[i]->need_more_input_data(_state)) {
337
86.8k
                break;
338
7.39M
            }
339
8.57M
        }
340
6.03M
    }
341
9.49M
    return _memory_sufficient_dependency->is_blocked_by(shared_from_this()) ||
342
9.49M
           std::ranges::any_of(_write_dependencies, [&](Dependency* dep) -> bool {
343
9.49M
               return dep->is_blocked_by(shared_from_this());
344
8.59M
           });
345
}
346
1.20M
347
void PipelineTask::terminate() {
348
1.20M
    // We use a lock to assure all dependencies are not deconstructed here.
349
1.20M
    std::unique_lock<std::mutex> lc(_dependency_lock);
350
1.20M
    auto fragment = _fragment_context.lock();
351
36.6k
    if (!is_finalized() && fragment) {
352
36.6k
        try {
353
36.6k
            DCHECK(_wake_up_early || fragment->is_canceled());
354
94.4k
            std::ranges::for_each(_write_dependencies,
355
36.6k
                                  [&](Dependency* dep) { dep->set_always_ready(); });
356
36.6k
            std::ranges::for_each(_finish_dependencies,
357
42.2k
                                  [&](Dependency* dep) { dep->set_always_ready(); });
358
46.8k
            std::ranges::for_each(_read_dependencies, [&](std::vector<Dependency*>& deps) {
359
42.2k
                std::ranges::for_each(deps, [&](Dependency* dep) { dep->set_always_ready(); });
360
            });
361
36.6k
            // All `_execution_deps` will never be set blocking from ready. So we just set ready here.
362
51.9k
            std::ranges::for_each(_execution_dependencies,
363
36.6k
                                  [&](Dependency* dep) { dep->set_ready(); });
364
36.6k
            _memory_sufficient_dependency->set_ready();
365
0
        } catch (const doris::Exception& e) {
366
0
            LOG(WARNING) << "Terminate failed: " << e.code() << ", " << e.to_string();
367
36.6k
        }
368
1.20M
    }
369
}
370
371
// When current memory pressure is low, memory usage may increase significantly in the next
372
// operator run, while there is no revocable memory available for spilling.
373
// Trigger memory revoking when pressure is high and revocable memory is significant.
374
// Memory pressure is evaluated using two signals:
375
// 1. Query memory usage exceeds a threshold ratio of the query memory limit.
376
7.25M
// 2. Workload group memory usage reaches the workload group low-watermark threshold.
377
7.25M
bool PipelineTask::_should_trigger_revoking(const size_t reserve_size) const {
378
7.21M
    if (!_state->enable_spill()) {
379
7.21M
        return false;
380
    }
381
39.6k
382
39.6k
    auto query_mem_tracker = _state->get_query_ctx()->query_mem_tracker();
383
48.4k
    auto wg = _state->get_query_ctx()->workload_group();
384
1.86k
    if (!query_mem_tracker || !wg) {
385
1.86k
        return false;
386
    }
387
37.7k
388
37.7k
    const auto parallelism = std::max(1, _pipeline->num_tasks());
389
37.7k
    const auto query_water_mark = 90; // 90%
390
37.7k
    const auto group_mem_limit = wg->memory_limit();
391
37.7k
    auto query_limit = query_mem_tracker->limit();
392
1
    if (query_limit <= 0) {
393
37.7k
        query_limit = group_mem_limit;
394
952
    } else if (query_limit > group_mem_limit && group_mem_limit > 0) {
395
952
        query_limit = group_mem_limit;
396
    }
397
37.7k
398
1
    if (query_limit <= 0) {
399
1
        return false;
400
    }
401
46.6k
402
46.6k
    if ((reserve_size * parallelism) <= (query_limit / 5)) {
403
46.6k
        return false;
404
    }
405
18.4E
406
18.4E
    bool is_high_memory_pressure = false;
407
18.4E
    const auto used_mem = query_mem_tracker->consumption() + reserve_size * parallelism;
408
2
    if (used_mem >= int64_t((double(query_limit) * query_water_mark / 100))) {
409
2
        is_high_memory_pressure = true;
410
    }
411
18.4E
412
4
    if (!is_high_memory_pressure) {
413
4
        bool is_low_watermark;
414
4
        bool is_high_watermark;
415
4
        wg->check_mem_used(&is_low_watermark, &is_high_watermark);
416
4
        is_high_memory_pressure = is_low_watermark || is_high_watermark;
417
    }
418
18.4E
419
4
    if (is_high_memory_pressure) {
420
4
        const auto revocable_size = [&]() {
421
4
            size_t total = _sink->revocable_mem_size(_state);
422
4
            for (const auto& op : _operators) {
423
4
                total += op->revocable_mem_size(_state);
424
4
            }
425
4
            return total;
426
        }();
427
4
428
4
        const auto total_estimated_revocable = revocable_size * parallelism;
429
4
        return total_estimated_revocable >= int64_t(double(query_limit) * 0.2);
430
    }
431
18.4E
432
18.4E
    return false;
433
}
434
435
/**
436
 * `_eos` indicates whether the execution phase is done. `done` indicates whether we could close
437
 * this task.
438
 *
439
 * For example,
440
 * 1. if `_eos` is false which means we should continue to get next block so we cannot close (e.g.
441
 *    `done` is false)
442
 * 2. if `_eos` is true which means all blocks from source are exhausted but `_is_pending_finish()`
443
 *    is true which means we should wait for a pending dependency ready (maybe a running rpc), so we
444
 *    cannot close (e.g. `done` is false)
445
 * 3. if `_eos` is true which means all blocks from source are exhausted and `_is_pending_finish()`
446
 *    is false which means we can close immediately (e.g. `done` is true)
447
 * @param done
448
 * @return
449
6.63M
 */
450
6.63M
Status PipelineTask::execute(bool* done) {
451
    if (_exec_state != State::RUNNABLE || _blocked_dep != nullptr) [[unlikely]] {
452
#ifdef BE_TEST
453
        return Status::InternalError("Pipeline task is not runnable! Task info: {}",
454
                                     debug_string());
455
1
#else
456
1
        return Status::FatalError("Pipeline task is not runnable! Task info: {}", debug_string());
457
1
#endif
458
    }
459
6.63M
460
6.63M
    auto fragment_context = _fragment_context.lock();
461
0
    if (!fragment_context) {
462
0
        return Status::InternalError("Fragment already finished! Query: {}", print_id(_query_id));
463
6.63M
    }
464
6.63M
    int64_t time_spent = 0;
465
6.63M
    ThreadCpuStopWatch cpu_time_stop_watch;
466
6.63M
    cpu_time_stop_watch.start();
467
6.63M
    SCOPED_ATTACH_TASK(_state);
468
6.63M
    Defer running_defer {[&]() {
469
6.63M
        int64_t delta_cpu_time = cpu_time_stop_watch.elapsed_time();
470
6.63M
        _task_cpu_timer->update(delta_cpu_time);
471
6.63M
        fragment_context->get_query_ctx()->resource_ctx()->cpu_context()->update_cpu_cost_ms(
472
                delta_cpu_time);
473
474
6.63M
        // If task is woke up early, we should terminate all operators, and this task could be closed immediately.
475
6.18k
        if (_wake_up_early) {
476
6.18k
            terminate();
477
6.18k
            THROW_IF_ERROR(_root->terminate(_state));
478
6.18k
            THROW_IF_ERROR(_sink->terminate(_state));
479
6.18k
            _eos = true;
480
6.62M
            *done = true;
481
6.62M
        } else if (_eos && !_spilling &&
482
1.91M
                   (fragment_context->is_canceled() || !_is_pending_finish())) {
483
1.91M
            *done = true;
484
6.63M
        }
485
6.63M
    }};
486
    const auto query_id = _state->query_id();
487
    // If this task is already EOS and block is empty (which means we already output all blocks),
488
6.63M
    // just return here.
489
369k
    if (_eos && !_spilling) {
490
369k
        return Status::OK();
491
    }
492
    // If this task is blocked by a spilling request and waken up immediately, the spilling
493
6.26M
    // dependency will not block this task and we should just run here.
494
0
    if (!_block->empty()) {
495
0
        LOG(INFO) << "Query: " << print_id(query_id) << " has pending block, size: "
496
0
                  << PrettyPrinter::print_bytes(_block->allocated_bytes());
497
0
        DCHECK(_spilling);
498
    }
499
6.26M
500
6.26M
    SCOPED_TIMER(_task_profile->total_time_counter());
501
    SCOPED_TIMER(_exec_timer);
502
6.27M
503
6.27M
    if (!_wake_up_early) {
504
6.27M
        RETURN_IF_ERROR(_prepare());
505
6.26M
    }
506
6.26M
    DBUG_EXECUTE_IF("fault_inject::PipelineXTask::execute", {
507
6.26M
        Status status = Status::Error<INTERNAL_ERROR>("fault_inject pipeline_task execute failed");
508
6.26M
        return status;
509
    });
510
6.26M
    // `_wake_up_early` must be after `_wait_to_start()`
511
1.73M
    if (_wait_to_start() || _wake_up_early) {
512
1.73M
        return Status::OK();
513
4.53M
    }
514
    RETURN_IF_ERROR(_prepare());
515
516
4.53M
    // The status must be runnable
517
1.92M
    if (!_opened && !fragment_context->is_canceled()) {
518
1.92M
        DBUG_EXECUTE_IF("PipelineTask::execute.open_sleep", {
519
1.92M
            auto required_pipeline_id =
520
1.92M
                    DebugPoints::instance()->get_debug_param_or_default<int32_t>(
521
1.92M
                            "PipelineTask::execute.open_sleep", "pipeline_id", -1);
522
1.92M
            auto required_task_id = DebugPoints::instance()->get_debug_param_or_default<int32_t>(
523
1.92M
                    "PipelineTask::execute.open_sleep", "task_id", -1);
524
1.92M
            if (required_pipeline_id == pipeline_id() && required_task_id == task_id()) {
525
1.92M
                LOG(WARNING) << "PipelineTask::execute.open_sleep sleep 5s";
526
1.92M
                sleep(5);
527
1.92M
            }
528
        });
529
1.92M
530
1.92M
        SCOPED_RAW_TIMER(&time_spent);
531
1.92M
        RETURN_IF_ERROR(_open());
532
    }
533
8.60M
534
8.60M
    while (!fragment_context->is_canceled()) {
535
8.60M
        SCOPED_RAW_TIMER(&time_spent);
536
        Defer defer {[&]() {
537
8.59M
            // If this run is pended by a spilling request, the block will be output in next run.
538
8.59M
            if (!_spilling) {
539
8.59M
                _block->clear_column_data(_root->row_desc().num_materialized_slots());
540
8.59M
            }
541
        }};
542
8.60M
        // `_wake_up_early` must be after `_is_blocked()`
543
2.57M
        if (_is_blocked() || _wake_up_early) {
544
2.57M
            return Status::OK();
545
        }
546
547
        /// When a task is cancelled,
548
        /// its blocking state will be cleared and it will transition to a ready state (though it is not truly ready).
549
6.03M
        /// Here, checking whether it is cancelled to prevent tasks in a blocking state from being re-executed.
550
0
        if (fragment_context->is_canceled()) {
551
0
            break;
552
        }
553
6.03M
554
43.0k
        if (time_spent > _exec_time_slice) {
555
43.0k
            COUNTER_UPDATE(_yield_counts, 1);
556
43.0k
            break;
557
5.99M
        }
558
        auto* block = _block.get();
559
5.99M
560
5.99M
        DBUG_EXECUTE_IF("fault_inject::PipelineXTask::executing", {
561
5.99M
            Status status =
562
5.99M
                    Status::Error<INTERNAL_ERROR>("fault_inject pipeline_task executing failed");
563
5.99M
            return status;
564
        });
565
566
5.99M
        // `_sink->is_finished(_state)` means sink operator should be finished
567
9
        if (_sink->is_finished(_state)) {
568
9
            set_wake_up_early();
569
9
            return Status::OK();
570
        }
571
572
5.99M
        // `_dry_run` means sink operator need no more data
573
5.99M
        _eos = _dry_run || _eos;
574
5.99M
        _spilling = false;
575
        auto workload_group = _state->workload_group();
576
        // If last run is pended by a spilling request, `_block` is produced with some rows in last
577
5.99M
        // run, so we will resume execution using the block.
578
5.97M
        if (!_eos && _block->empty()) {
579
5.97M
            SCOPED_TIMER(_get_block_timer);
580
2.07k
            if (_state->low_memory_mode()) {
581
2.14k
                _sink->set_low_memory_mode(_state);
582
2.14k
                for (auto& op : _operators) {
583
2.14k
                    op->set_low_memory_mode(_state);
584
2.07k
                }
585
5.97M
            }
586
5.97M
            DEFER_RELEASE_RESERVED();
587
            _get_block_counter->update(1);
588
            // Sum reserve sizes across all operators in this pipeline.
589
5.97M
            // Each operator reports only its own requirement (non-recursive).
590
6.82M
            size_t reserve_size = 0;
591
6.82M
            for (auto& op : _operators) {
592
6.82M
                reserve_size += op->get_reserve_mem_size(_state);
593
6.82M
                op->reset_reserve_mem_size(_state);
594
5.97M
            }
595
5.97M
            if (workload_group &&
596
5.10M
                _state->get_query_ctx()
597
5.10M
                        ->resource_ctx()
598
5.10M
                        ->task_controller()
599
5.97M
                        ->is_enable_reserve_memory() &&
600
4.99M
                reserve_size > 0) {
601
0
                if (_should_trigger_revoking(reserve_size)) {
602
0
                    LOG(INFO) << fmt::format(
603
0
                            "Query: {} sink: {}, node id: {}, task id: {}, reserve size: {} when "
604
0
                            "high memory pressure, try to spill",
605
0
                            print_id(_query_id), _sink->get_name(), _sink->node_id(),
606
0
                            _state->task_id(), reserve_size);
607
0
                    ExecEnv::GetInstance()->workload_group_mgr()->add_paused_query(
608
0
                            _state->get_query_ctx()->resource_ctx()->shared_from_this(),
609
0
                            reserve_size,
610
0
                            Status::Error<ErrorCode::QUERY_MEMORY_EXCEEDED>(
611
0
                                    "high memory pressure, try to spill"));
612
0
                    _spilling = true;
613
0
                    continue;
614
4.99M
                }
615
958
                if (!_try_to_reserve_memory(reserve_size, _root)) {
616
958
                    continue;
617
4.99M
                }
618
            }
619
5.97M
620
5.97M
            bool eos = false;
621
5.97M
            RETURN_IF_ERROR(_root->get_block_after_projects(_state, block, &eos));
622
5.97M
            RETURN_IF_ERROR(block->check_type_and_column());
623
5.97M
            _eos = eos;
624
        }
625
5.99M
626
2.52M
        if (!_block->empty() || _eos) {
627
2.52M
            SCOPED_TIMER(_sink_timer);
628
2.52M
            Status status = Status::OK();
629
2.52M
            DEFER_RELEASE_RESERVED();
630
2.52M
            if (_state->get_query_ctx()
631
2.52M
                        ->resource_ctx()
632
2.52M
                        ->task_controller()
633
2.52M
                        ->is_enable_reserve_memory() &&
634
2.44M
                workload_group && !(_wake_up_early || _dry_run)) {
635
                const auto sink_reserve_size = _sink->get_reserve_mem_size(_state, _eos);
636
2.44M
637
0
                if (sink_reserve_size > 0 && _should_trigger_revoking(sink_reserve_size)) {
638
0
                    LOG(INFO) << fmt::format(
639
0
                            "Query: {} sink: {}, node id: {}, task id: {}, reserve size: {} when "
640
0
                            "high memory pressure, try to spill",
641
0
                            print_id(_query_id), _sink->get_name(), _sink->node_id(),
642
0
                            _state->task_id(), sink_reserve_size);
643
0
                    ExecEnv::GetInstance()->workload_group_mgr()->add_paused_query(
644
0
                            _state->get_query_ctx()->resource_ctx()->shared_from_this(),
645
0
                            sink_reserve_size,
646
0
                            Status::Error<ErrorCode::QUERY_MEMORY_EXCEEDED>(
647
0
                                    "high memory pressure, try to spill"));
648
0
                    _spilling = true;
649
0
                    continue;
650
                }
651
2.44M
652
2.44M
                if (sink_reserve_size > 0 &&
653
927
                    !_try_to_reserve_memory(sink_reserve_size, _sink.get())) {
654
927
                    continue;
655
2.44M
                }
656
            }
657
2.52M
658
2.52M
            DBUG_EXECUTE_IF("PipelineTask::execute.sink_eos_sleep", {
659
2.52M
                auto required_pipeline_id =
660
2.52M
                        DebugPoints::instance()->get_debug_param_or_default<int32_t>(
661
2.52M
                                "PipelineTask::execute.sink_eos_sleep", "pipeline_id", -1);
662
2.52M
                auto required_task_id =
663
2.52M
                        DebugPoints::instance()->get_debug_param_or_default<int32_t>(
664
2.52M
                                "PipelineTask::execute.sink_eos_sleep", "task_id", -1);
665
2.52M
                if (required_pipeline_id == pipeline_id() && required_task_id == task_id()) {
666
2.52M
                    LOG(WARNING) << "PipelineTask::execute.sink_eos_sleep sleep 10s";
667
2.52M
                    sleep(10);
668
2.52M
                }
669
            });
670
2.52M
671
2.52M
            DBUG_EXECUTE_IF("PipelineTask::execute.terminate", {
672
2.52M
                if (_eos) {
673
2.52M
                    auto required_pipeline_id =
674
2.52M
                            DebugPoints::instance()->get_debug_param_or_default<int32_t>(
675
2.52M
                                    "PipelineTask::execute.terminate", "pipeline_id", -1);
676
2.52M
                    auto required_task_id =
677
2.52M
                            DebugPoints::instance()->get_debug_param_or_default<int32_t>(
678
2.52M
                                    "PipelineTask::execute.terminate", "task_id", -1);
679
2.52M
                    auto required_fragment_id =
680
2.52M
                            DebugPoints::instance()->get_debug_param_or_default<int32_t>(
681
2.52M
                                    "PipelineTask::execute.terminate", "fragment_id", -1);
682
2.52M
                    if (required_pipeline_id == pipeline_id() && required_task_id == task_id() &&
683
2.52M
                        fragment_context->get_fragment_id() == required_fragment_id) {
684
2.52M
                        _wake_up_early = true;
685
2.52M
                        terminate();
686
2.52M
                    } else if (required_pipeline_id == pipeline_id() &&
687
2.52M
                               fragment_context->get_fragment_id() == required_fragment_id) {
688
2.52M
                        LOG(WARNING) << "PipelineTask::execute.terminate sleep 5s";
689
2.52M
                        sleep(5);
690
2.52M
                    }
691
2.52M
                }
692
2.52M
            });
693
2.52M
            RETURN_IF_ERROR(block->check_type_and_column());
694
            status = _sink->sink(_state, block, _eos);
695
2.52M
696
1.92M
            if (_eos) {
697
1.97k
                if (_sink->reset_to_rerun(_state, _root)) {
698
1.91M
                    _eos = false;
699
1.91M
                } else {
700
1.91M
                    RETURN_IF_ERROR(close(Status::OK(), false));
701
1.92M
                }
702
            }
703
2.52M
704
10
            if (status.is<ErrorCode::END_OF_FILE>()) {
705
10
                set_wake_up_early();
706
2.52M
                return Status::OK();
707
15
            } else if (!status) {
708
15
                return status;
709
            }
710
2.52M
711
1.91M
            if (_eos) { // just return, the scheduler will do finish work
712
1.91M
                return Status::OK();
713
2.52M
            }
714
5.99M
        }
715
    }
716
42.7k
717
42.7k
    RETURN_IF_ERROR(_state->get_query_ctx()->get_pipe_exec_scheduler()->submit(shared_from_this()));
718
42.7k
    return Status::OK();
719
}
720
29
721
29
Status PipelineTask::do_revoke_memory(const std::shared_ptr<SpillContext>& spill_context) {
722
29
    auto fragment_context = _fragment_context.lock();
723
1
    if (!fragment_context) {
724
1
        return Status::InternalError("Fragment already finished! Query: {}", print_id(_query_id));
725
    }
726
28
727
28
    SCOPED_ATTACH_TASK(_state);
728
28
    ThreadCpuStopWatch cpu_time_stop_watch;
729
28
    cpu_time_stop_watch.start();
730
28
    Defer running_defer {[&]() {
731
28
        int64_t delta_cpu_time = cpu_time_stop_watch.elapsed_time();
732
28
        _task_cpu_timer->update(delta_cpu_time);
733
28
        fragment_context->get_query_ctx()->resource_ctx()->cpu_context()->update_cpu_cost_ms(
734
                delta_cpu_time);
735
736
28
        // If task is woke up early, we should terminate all operators, and this task could be closed immediately.
737
1
        if (_wake_up_early) {
738
1
            terminate();
739
1
            THROW_IF_ERROR(_root->terminate(_state));
740
1
            THROW_IF_ERROR(_sink->terminate(_state));
741
1
            _eos = true;
742
        }
743
744
        // SpillContext tracks pipeline task count, not operator count.
745
28
        // Notify completion once after all operators + sink have finished revoking.
746
25
        if (spill_context) {
747
25
            spill_context->on_task_finished();
748
28
        }
749
    }};
750
751
    // Revoke memory from every operator that has enough revocable memory,
752
28
    // then revoke from the sink.
753
28
    for (auto& op : _operators) {
754
2
        if (op->revocable_mem_size(_state) >= SpillFile::MIN_SPILL_WRITE_BATCH_MEM) {
755
2
            RETURN_IF_ERROR(op->revoke_memory(_state));
756
28
        }
757
    }
758
28
759
23
    if (_sink->revocable_mem_size(_state) >= SpillFile::MIN_SPILL_WRITE_BATCH_MEM) {
760
23
        RETURN_IF_ERROR(_sink->revoke_memory(_state));
761
28
    }
762
28
    return Status::OK();
763
}
764
7.27M
765
7.27M
bool PipelineTask::_try_to_reserve_memory(const size_t reserve_size, OperatorBase* op) {
766
    auto st = thread_context()->thread_mem_tracker_mgr->try_reserve(reserve_size);
767
    // If reserve memory failed and the query is not enable spill, just disable reserve memory(this will enable
768
7.27M
    // memory hard limit check, and will cancel the query if allocate memory failed) and let it run.
769
5.38k
    if (!st.ok() && !_state->enable_spill()) {
770
5.38k
        LOG(INFO) << print_id(_query_id) << " reserve memory failed due to " << st
771
5.38k
                  << ", and it is not enable spill, disable reserve memory and let it run";
772
5.38k
        _state->get_query_ctx()->resource_ctx()->task_controller()->disable_reserve_memory();
773
5.38k
        return true;
774
7.27M
    }
775
    COUNTER_UPDATE(_memory_reserve_times, 1);
776
777
7.27M
    // Compute total revocable memory across all operators and the sink.
778
7.27M
    size_t total_revocable_mem_size = 0;
779
    size_t operator_max_revocable_mem_size = 0;
780
7.27M
781
    if (!st.ok() || _state->enable_force_spill()) {
782
26.2k
        // Compute total revocable memory across all operators and the sink.
783
26.2k
        total_revocable_mem_size = _sink->revocable_mem_size(_state);
784
29.1k
        operator_max_revocable_mem_size = total_revocable_mem_size;
785
29.1k
        for (auto& cur_op : _operators) {
786
29.1k
            total_revocable_mem_size += cur_op->revocable_mem_size(_state);
787
29.1k
            operator_max_revocable_mem_size =
788
29.1k
                    std::max(cur_op->revocable_mem_size(_state), operator_max_revocable_mem_size);
789
26.2k
        }
790
    }
791
792
    // During enable force spill, other operators like scan opeartor will also try to reserve memory and will failed
793
7.27M
    // here, if not add this check, it will always paused and resumed again.
794
24.3k
    if (st.ok() && _state->enable_force_spill()) {
795
24
        if (operator_max_revocable_mem_size >= _state->spill_min_revocable_mem()) {
796
24
            st = Status::Error<ErrorCode::QUERY_MEMORY_EXCEEDED>(
797
24
                    "force spill and there is an operator has memory "
798
24
                    "size {} exceeds min mem size {}",
799
24
                    PrettyPrinter::print_bytes(operator_max_revocable_mem_size),
800
24
                    PrettyPrinter::print_bytes(_state->spill_min_revocable_mem()));
801
24.3k
        }
802
    }
803
7.27M
804
1.88k
    if (!st.ok()) {
805
        COUNTER_UPDATE(_memory_reserve_failed_times, 1);
806
1.88k
        // build per-operator revocable memory info string for debugging
807
1.88k
        std::string ops_revocable_info;
808
1.88k
        {
809
1.88k
            fmt::memory_buffer buf;
810
1.88k
            for (auto& cur_op : _operators) {
811
1.88k
                fmt::format_to(buf, "{}({})-> ", cur_op->get_name(),
812
1.88k
                               PrettyPrinter::print_bytes(cur_op->revocable_mem_size(_state)));
813
1.88k
            }
814
1.88k
            if (_sink) {
815
1.88k
                fmt::format_to(buf, "{}({}) ", _sink->get_name(),
816
1.88k
                               PrettyPrinter::print_bytes(_sink->revocable_mem_size(_state)));
817
1.88k
            }
818
1.88k
            ops_revocable_info = fmt::to_string(buf);
819
        }
820
1.88k
821
1.88k
        auto debug_msg = fmt::format(
822
1.88k
                "Query: {} , try to reserve: {}, total revocable mem size: {}, failed reason: {}",
823
1.88k
                print_id(_query_id), PrettyPrinter::print_bytes(reserve_size),
824
1.88k
                PrettyPrinter::print_bytes(total_revocable_mem_size), st.to_string());
825
1.88k
        if (!ops_revocable_info.empty()) {
826
1.88k
            debug_msg += fmt::format(", ops_revocable=[{}]", ops_revocable_info);
827
        }
828
1.88k
        // PROCESS_MEMORY_EXCEEDED error msg already contains process_mem_log_str
829
1.88k
        if (!st.is<ErrorCode::PROCESS_MEMORY_EXCEEDED>()) {
830
1.88k
            debug_msg +=
831
1.88k
                    fmt::format(", debug info: {}", GlobalMemoryArbitrator::process_mem_log_str());
832
1.88k
        }
833
1.88k
        LOG(INFO) << debug_msg;
834
1.88k
        ExecEnv::GetInstance()->workload_group_mgr()->add_paused_query(
835
1.88k
                _state->get_query_ctx()->resource_ctx()->shared_from_this(), reserve_size, st);
836
1.88k
        _spilling = true;
837
1.88k
        return false;
838
7.26M
    }
839
7.27M
    return true;
840
}
841
1.50M
842
1.50M
void PipelineTask::stop_if_finished() {
843
1.50M
    auto fragment = _fragment_context.lock();
844
0
    if (!fragment) {
845
0
        return;
846
1.50M
    }
847
1.50M
    SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(fragment->get_query_ctx()->query_mem_tracker());
848
1.22M
    if (auto sink = _sink) {
849
2.03k
        if (sink->is_finished(_state)) {
850
2.03k
            set_wake_up_early();
851
2.03k
            terminate();
852
1.22M
        }
853
1.50M
    }
854
}
855
1.91M
856
1.91M
Status PipelineTask::finalize() {
857
1.91M
    auto fragment = _fragment_context.lock();
858
0
    if (!fragment) {
859
0
        return Status::OK();
860
1.91M
    }
861
1.91M
    SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(fragment->get_query_ctx()->query_mem_tracker());
862
1.91M
    RETURN_IF_ERROR(_state_transition(State::FINALIZED));
863
1.91M
    std::unique_lock<std::mutex> lc(_dependency_lock);
864
1.91M
    _sink_shared_state.reset();
865
1.91M
    _op_shared_states.clear();
866
1.91M
    _shared_state_map.clear();
867
1.91M
    _block.reset();
868
1.91M
    _operators.clear();
869
1.91M
    _sink.reset();
870
1.91M
    _pipeline.reset();
871
1.91M
    return Status::OK();
872
}
873
3.82M
874
3.82M
Status PipelineTask::close(Status exec_status, bool close_sink) {
875
3.82M
    int64_t close_ns = 0;
876
3.82M
    Status s;
877
3.82M
    {
878
3.82M
        SCOPED_RAW_TIMER(&close_ns);
879
1.92M
        if (close_sink) {
880
1.92M
            s = _sink->close(_state, exec_status);
881
5.19M
        }
882
5.19M
        for (auto& op : _operators) {
883
5.19M
            auto tem = op->close(_state);
884
9
            if (!tem.ok() && s.ok()) {
885
9
                s = std::move(tem);
886
5.19M
            }
887
3.82M
        }
888
3.83M
    }
889
3.83M
    if (_opened) {
890
3.83M
        COUNTER_UPDATE(_close_timer, close_ns);
891
3.83M
        COUNTER_UPDATE(_task_profile->total_time_counter(), close_ns);
892
    }
893
3.82M
894
1.92M
    if (close_sink && _opened) {
895
1.92M
        _task_profile->add_info_string("WakeUpEarly", std::to_string(_wake_up_early.load()));
896
1.92M
        _fresh_profile_counter();
897
    }
898
3.82M
899
1.92M
    if (close_sink) {
900
1.92M
        RETURN_IF_ERROR(_state_transition(State::FINISHED));
901
3.82M
    }
902
3.82M
    return s;
903
}
904
55.4k
905
55.4k
std::string PipelineTask::debug_string() {
906
    fmt::memory_buffer debug_string_buffer;
907
55.4k
908
55.4k
    fmt::format_to(debug_string_buffer, "QueryId: {}\n", print_id(_query_id));
909
55.4k
    fmt::format_to(debug_string_buffer, "InstanceId: {}\n",
910
                   print_id(_state->fragment_instance_id()));
911
55.4k
912
55.4k
    fmt::format_to(debug_string_buffer,
913
55.4k
                   "PipelineTask[id = {}, open = {}, eos = {}, state = {}, dry run = "
914
55.4k
                   "{}, _wake_up_early = {}, _wake_up_by = {}, time elapsed since last state "
915
55.4k
                   "changing = {}s, spilling = {}, is running = {}]",
916
55.4k
                   _index, _opened, _eos, _to_string(_exec_state), _dry_run, _wake_up_early.load(),
917
55.4k
                   _wake_by, _state_change_watcher.elapsed_time() / NANOS_PER_SEC, _spilling,
918
55.4k
                   is_running());
919
55.4k
    std::unique_lock<std::mutex> lc(_dependency_lock);
920
55.4k
    auto* cur_blocked_dep = _blocked_dep;
921
55.4k
    auto fragment = _fragment_context.lock();
922
245
    if (is_finalized() || !fragment) {
923
245
        fmt::format_to(debug_string_buffer, " pipeline name = {}", _pipeline_name);
924
245
        return fmt::to_string(debug_string_buffer);
925
55.2k
    }
926
55.2k
    auto elapsed = fragment->elapsed_time() / NANOS_PER_SEC;
927
55.2k
    fmt::format_to(debug_string_buffer, " elapse time = {}s, block dependency = [{}]\n", elapsed,
928
                   cur_blocked_dep && !is_finalized() ? cur_blocked_dep->debug_string() : "NULL");
929
55.2k
930
897
    if (_state && _state->local_runtime_filter_mgr()) {
931
897
        fmt::format_to(debug_string_buffer, "local_runtime_filter_mgr: [{}]\n",
932
897
                       _state->local_runtime_filter_mgr()->debug_string());
933
    }
934
55.2k
935
110k
    fmt::format_to(debug_string_buffer, "operators: ");
936
55.2k
    for (size_t i = 0; i < _operators.size(); i++) {
937
55.2k
        fmt::format_to(debug_string_buffer, "\n{}",
938
55.2k
                       _opened && !is_finalized()
939
55.2k
                               ? _operators[i]->debug_string(_state, cast_set<int>(i))
940
55.2k
                               : _operators[i]->debug_string(cast_set<int>(i)));
941
55.2k
    }
942
55.2k
    fmt::format_to(debug_string_buffer, "\n{}\n",
943
55.2k
                   _opened && !is_finalized()
944
55.2k
                           ? _sink->debug_string(_state, cast_set<int>(_operators.size()))
945
                           : _sink->debug_string(cast_set<int>(_operators.size())));
946
55.2k
947
    fmt::format_to(debug_string_buffer, "\nRead Dependency Information: \n");
948
55.2k
949
109k
    size_t i = 0;
950
109k
    for (; i < _read_dependencies.size(); i++) {
951
54.5k
        for (size_t j = 0; j < _read_dependencies[i].size(); j++) {
952
54.5k
            fmt::format_to(debug_string_buffer, "{}. {}\n", i,
953
54.5k
                           _read_dependencies[i][j]->debug_string(cast_set<int>(i) + 1));
954
54.5k
        }
955
    }
956
55.2k
957
55.2k
    fmt::format_to(debug_string_buffer, "{}. {}\n", i,
958
                   _memory_sufficient_dependency->debug_string(cast_set<int>(i++)));
959
55.2k
960
110k
    fmt::format_to(debug_string_buffer, "\nWrite Dependency Information: \n");
961
55.2k
    for (size_t j = 0; j < _write_dependencies.size(); j++, i++) {
962
55.2k
        fmt::format_to(debug_string_buffer, "{}. {}\n", i,
963
55.2k
                       _write_dependencies[j]->debug_string(cast_set<int>(j) + 1));
964
    }
965
55.2k
966
167k
    fmt::format_to(debug_string_buffer, "\nExecution Dependency Information: \n");
967
112k
    for (size_t j = 0; j < _execution_dependencies.size(); j++, i++) {
968
112k
        fmt::format_to(debug_string_buffer, "{}. {}\n", i,
969
112k
                       _execution_dependencies[j]->debug_string(cast_set<int>(i) + 1));
970
    }
971
55.2k
972
164k
    fmt::format_to(debug_string_buffer, "Finish Dependency Information: \n");
973
108k
    for (size_t j = 0; j < _finish_dependencies.size(); j++, i++) {
974
108k
        fmt::format_to(debug_string_buffer, "{}. {}\n", i,
975
108k
                       _finish_dependencies[j]->debug_string(cast_set<int>(i) + 1));
976
55.2k
    }
977
55.4k
    return fmt::to_string(debug_string_buffer);
978
}
979
964
980
964
size_t PipelineTask::get_revocable_size() const {
981
60
    if (!_opened || is_finalized() || _running || (_eos && !_spilling)) {
982
60
        return 0;
983
    }
984
985
    // Sum revocable memory from every operator in the pipeline + the sink.
986
904
    // Each operator reports only its own revocable memory (no child recursion).
987
1.03k
    size_t total = _sink->revocable_mem_size(_state);
988
1.03k
    for (const auto& op : _operators) {
989
1.03k
        total += op->revocable_mem_size(_state);
990
904
    }
991
964
    return total;
992
}
993
25
994
25
Status PipelineTask::revoke_memory(const std::shared_ptr<SpillContext>& spill_context) {
995
25
    DCHECK(spill_context);
996
1
    if (is_finalized()) {
997
1
        spill_context->on_task_finished();
998
0
        VLOG_DEBUG << "Query: " << print_id(_state->query_id()) << ", task: " << ((void*)this)
999
1
                   << " finalized";
1000
1
        return Status::OK();
1001
    }
1002
24
1003
24
    const auto revocable_size = get_revocable_size();
1004
23
    if (revocable_size >= SpillFile::MIN_SPILL_WRITE_BATCH_MEM) {
1005
        auto revokable_task = std::make_shared<RevokableTask>(shared_from_this(), spill_context);
1006
        // Submit a revocable task to run, the run method will call revoke memory. Currently the
1007
23
        // underline pipeline task is still blocked.
1008
23
        RETURN_IF_ERROR(_state->get_query_ctx()->get_pipe_exec_scheduler()->submit(revokable_task));
1009
1
    } else {
1010
1
        spill_context->on_task_finished();
1011
0
        VLOG_DEBUG << "Query: " << print_id(_state->query_id()) << ", task: " << ((void*)this)
1012
1
                   << " has not enough data to revoke: " << revocable_size;
1013
24
    }
1014
24
    return Status::OK();
1015
}
1016
4.67M
1017
Status PipelineTask::wake_up(Dependency* dep, std::unique_lock<std::mutex>& /* dep_lock */) {
1018
4.67M
    // call by dependency
1019
4.67M
    DCHECK_EQ(_blocked_dep, dep) << "dep : " << dep->debug_string(0) << "task: " << debug_string();
1020
4.67M
    _blocked_dep = nullptr;
1021
4.67M
    auto holder = std::dynamic_pointer_cast<PipelineTask>(shared_from_this());
1022
4.68M
    RETURN_IF_ERROR(_state_transition(PipelineTask::State::RUNNABLE));
1023
4.68M
    if (auto f = _fragment_context.lock(); f) {
1024
4.68M
        RETURN_IF_ERROR(_state->get_query_ctx()->get_pipe_exec_scheduler()->submit(holder));
1025
4.67M
    }
1026
4.67M
    return Status::OK();
1027
}
1028
15.0M
1029
15.0M
Status PipelineTask::_state_transition(State new_state) {
1030
15.0M
    if (_exec_state != new_state) {
1031
15.0M
        _state_change_watcher.reset();
1032
15.0M
        _state_change_watcher.start();
1033
15.0M
    }
1034
15.0M
    _task_profile->add_info_string("TaskState", _to_string(new_state));
1035
15.0M
    _task_profile->add_info_string("BlockedByDependency", _blocked_dep ? _blocked_dep->name() : "");
1036
17
    if (!LEGAL_STATE_TRANSITION[(int)new_state].contains(_exec_state)) {
1037
17
        return Status::InternalError(
1038
17
                "Task state transition from {} to {} is not allowed! Task info: {}",
1039
17
                _to_string(_exec_state), _to_string(new_state), debug_string());
1040
15.0M
    }
1041
15.0M
    _exec_state = new_state;
1042
15.0M
    return Status::OK();
1043
}
1044
1045
#include "common/compile_check_end.h"
1046
} // namespace doris