Coverage Report

Created: 2026-03-13 21:50

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exec/pipeline/pipeline_task.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "exec/pipeline/pipeline_task.h"
19
20
#include <fmt/core.h>
21
#include <fmt/format.h>
22
#include <gen_cpp/Metrics_types.h>
23
#include <glog/logging.h>
24
25
#include <algorithm>
26
#include <memory>
27
#include <ostream>
28
#include <vector>
29
30
#include "common/logging.h"
31
#include "common/status.h"
32
#include "core/block/block.h"
33
#include "exec/operator/exchange_source_operator.h"
34
#include "exec/operator/operator.h"
35
#include "exec/operator/rec_cte_source_operator.h"
36
#include "exec/operator/scan_operator.h"
37
#include "exec/pipeline/dependency.h"
38
#include "exec/pipeline/pipeline.h"
39
#include "exec/pipeline/pipeline_fragment_context.h"
40
#include "exec/pipeline/revokable_task.h"
41
#include "exec/pipeline/task_queue.h"
42
#include "exec/pipeline/task_scheduler.h"
43
#include "exec/spill/spill_file.h"
44
#include "runtime/descriptors.h"
45
#include "runtime/exec_env.h"
46
#include "runtime/query_context.h"
47
#include "runtime/runtime_profile.h"
48
#include "runtime/thread_context.h"
49
#include "runtime/workload_group/workload_group_manager.h"
50
#include "util/defer_op.h"
51
#include "util/mem_info.h"
52
#include "util/uid_util.h"
53
54
namespace doris {
55
class RuntimeState;
56
} // namespace doris
57
58
namespace doris {
59
#include "common/compile_check_begin.h"
60
61
PipelineTask::PipelineTask(PipelinePtr& pipeline, uint32_t task_id, RuntimeState* state,
62
                           std::shared_ptr<PipelineFragmentContext> fragment_context,
63
                           RuntimeProfile* parent_profile,
64
                           std::map<int, std::pair<std::shared_ptr<BasicSharedState>,
65
                                                   std::vector<std::shared_ptr<Dependency>>>>
66
                                   shared_state_map,
67
                           int task_idx)
68
        :
69
#ifdef BE_TEST
70
72.1k
          _query_id(fragment_context ? fragment_context->get_query_id() : TUniqueId()),
71
#else
72
          _query_id(fragment_context->get_query_id()),
73
#endif
74
72.1k
          _index(task_id),
75
72.1k
          _pipeline(pipeline),
76
72.1k
          _opened(false),
77
72.1k
          _state(state),
78
72.1k
          _fragment_context(fragment_context),
79
72.1k
          _parent_profile(parent_profile),
80
72.1k
          _operators(pipeline->operators()),
81
72.1k
          _source(_operators.front().get()),
82
72.1k
          _root(_operators.back().get()),
83
72.1k
          _sink(pipeline->sink_shared_pointer()),
84
72.1k
          _shared_state_map(std::move(shared_state_map)),
85
72.1k
          _task_idx(task_idx),
86
72.1k
          _memory_sufficient_dependency(state->get_query_ctx()->get_memory_sufficient_dependency()),
87
72.1k
          _pipeline_name(_pipeline->name()) {
88
#ifndef BE_TEST
89
    _query_mem_tracker = fragment_context->get_query_ctx()->query_mem_tracker();
90
#endif
91
72.1k
    _execution_dependencies.push_back(state->get_query_ctx()->get_execution_dependency());
92
72.1k
    if (!_shared_state_map.contains(_sink->dests_id().front())) {
93
72.1k
        auto shared_state = _sink->create_shared_state();
94
72.1k
        if (shared_state) {
95
34
            _sink_shared_state = shared_state;
96
34
        }
97
72.1k
    }
98
72.1k
}
99
100
72.1k
PipelineTask::~PipelineTask() {
101
72.1k
    auto reset_member = [&]() {
102
72.1k
        _shared_state_map.clear();
103
72.1k
        _sink_shared_state.reset();
104
72.1k
        _op_shared_states.clear();
105
72.1k
        _sink.reset();
106
72.1k
        _operators.clear();
107
72.1k
        _block.reset();
108
72.1k
        _pipeline.reset();
109
72.1k
    };
110
// PipelineTask is also hold by task queue( https://github.com/apache/doris/pull/49753),
111
// so that it maybe the last one to be destructed.
112
// But pipeline task hold some objects, like operators, shared state, etc. So that should release
113
// memory manually.
114
#ifndef BE_TEST
115
    if (_query_mem_tracker) {
116
        SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(_query_mem_tracker);
117
        reset_member();
118
        return;
119
    }
120
#endif
121
72.1k
    reset_member();
122
72.1k
}
123
124
Status PipelineTask::prepare(const std::vector<TScanRangeParams>& scan_range, const int sender_id,
125
20
                             const TDataSink& tsink) {
126
20
    DCHECK(_sink);
127
20
    _init_profile();
128
20
    SCOPED_TIMER(_task_profile->total_time_counter());
129
20
    SCOPED_CPU_TIMER(_task_cpu_timer);
130
20
    SCOPED_TIMER(_prepare_timer);
131
20
    DBUG_EXECUTE_IF("fault_inject::PipelineXTask::prepare", {
132
20
        Status status = Status::Error<INTERNAL_ERROR>("fault_inject pipeline_task prepare failed");
133
20
        return status;
134
20
    });
135
20
    {
136
        // set sink local state
137
20
        LocalSinkStateInfo info {_task_idx,         _task_profile.get(),
138
20
                                 sender_id,         get_sink_shared_state().get(),
139
20
                                 _shared_state_map, tsink};
140
20
        RETURN_IF_ERROR(_sink->setup_local_state(_state, info));
141
20
    }
142
143
20
    _scan_ranges = scan_range;
144
20
    auto* parent_profile = _state->get_sink_local_state()->operator_profile();
145
146
42
    for (int op_idx = cast_set<int>(_operators.size() - 1); op_idx >= 0; op_idx--) {
147
22
        auto& op = _operators[op_idx];
148
22
        LocalStateInfo info {parent_profile, _scan_ranges, get_op_shared_state(op->operator_id()),
149
22
                             _shared_state_map, _task_idx};
150
22
        RETURN_IF_ERROR(op->setup_local_state(_state, info));
151
22
        parent_profile = _state->get_local_state(op->operator_id())->operator_profile();
152
22
    }
153
20
    {
154
20
        const auto& deps =
155
20
                _state->get_local_state(_source->operator_id())->execution_dependencies();
156
20
        std::unique_lock<std::mutex> lc(_dependency_lock);
157
20
        std::copy(deps.begin(), deps.end(),
158
20
                  std::inserter(_execution_dependencies, _execution_dependencies.end()));
159
20
    }
160
20
    if (auto fragment = _fragment_context.lock()) {
161
19
        if (fragment->get_query_ctx()->is_cancelled()) {
162
0
            terminate();
163
0
            return fragment->get_query_ctx()->exec_status();
164
0
        }
165
19
    } else {
166
1
        return Status::InternalError("Fragment already finished! Query: {}", print_id(_query_id));
167
1
    }
168
19
    _block = doris::Block::create_unique();
169
19
    return _state_transition(State::RUNNABLE);
170
20
}
171
172
13
Status PipelineTask::_extract_dependencies() {
173
13
    std::vector<std::vector<Dependency*>> read_dependencies;
174
13
    std::vector<Dependency*> write_dependencies;
175
13
    std::vector<Dependency*> finish_dependencies;
176
13
    read_dependencies.resize(_operators.size());
177
13
    size_t i = 0;
178
15
    for (auto& op : _operators) {
179
15
        auto* local_state = _state->get_local_state(op->operator_id());
180
15
        DCHECK(local_state);
181
15
        read_dependencies[i] = local_state->dependencies();
182
15
        auto* fin_dep = local_state->finishdependency();
183
15
        if (fin_dep) {
184
8
            finish_dependencies.push_back(fin_dep);
185
8
        }
186
15
        i++;
187
15
    }
188
13
    DBUG_EXECUTE_IF("fault_inject::PipelineXTask::_extract_dependencies", {
189
13
        Status status = Status::Error<INTERNAL_ERROR>(
190
13
                "fault_inject pipeline_task _extract_dependencies failed");
191
13
        return status;
192
13
    });
193
13
    {
194
13
        auto* local_state = _state->get_sink_local_state();
195
13
        write_dependencies = local_state->dependencies();
196
13
        auto* fin_dep = local_state->finishdependency();
197
13
        if (fin_dep) {
198
13
            finish_dependencies.push_back(fin_dep);
199
13
        }
200
13
    }
201
13
    {
202
13
        std::unique_lock<std::mutex> lc(_dependency_lock);
203
13
        read_dependencies.swap(_read_dependencies);
204
13
        write_dependencies.swap(_write_dependencies);
205
13
        finish_dependencies.swap(_finish_dependencies);
206
13
    }
207
13
    return Status::OK();
208
13
}
209
210
6
bool PipelineTask::inject_shared_state(std::shared_ptr<BasicSharedState> shared_state) {
211
6
    if (!shared_state) {
212
1
        return false;
213
1
    }
214
    // Shared state is created by upstream task's sink operator and shared by source operator of
215
    // this task.
216
7
    for (auto& op : _operators) {
217
7
        if (shared_state->related_op_ids.contains(op->operator_id())) {
218
3
            _op_shared_states.insert({op->operator_id(), shared_state});
219
3
            return true;
220
3
        }
221
7
    }
222
    // Shared state is created by the first sink operator and shared by sink operator of this task.
223
    // For example, Set operations.
224
2
    if (shared_state->related_op_ids.contains(_sink->dests_id().front())) {
225
1
        DCHECK_EQ(_sink_shared_state, nullptr)
226
0
                << " Sink: " << _sink->get_name() << " dest id: " << _sink->dests_id().front();
227
1
        _sink_shared_state = shared_state;
228
1
        return true;
229
1
    }
230
1
    return false;
231
2
}
232
233
20
void PipelineTask::_init_profile() {
234
20
    _task_profile = std::make_unique<RuntimeProfile>(fmt::format("PipelineTask(index={})", _index));
235
20
    _parent_profile->add_child(_task_profile.get(), true, nullptr);
236
20
    _task_cpu_timer = ADD_TIMER(_task_profile, "TaskCpuTime");
237
238
20
    static const char* exec_time = "ExecuteTime";
239
20
    _exec_timer = ADD_TIMER(_task_profile, exec_time);
240
20
    _prepare_timer = ADD_CHILD_TIMER(_task_profile, "PrepareTime", exec_time);
241
20
    _open_timer = ADD_CHILD_TIMER(_task_profile, "OpenTime", exec_time);
242
20
    _get_block_timer = ADD_CHILD_TIMER(_task_profile, "GetBlockTime", exec_time);
243
20
    _get_block_counter = ADD_COUNTER(_task_profile, "GetBlockCounter", TUnit::UNIT);
244
20
    _sink_timer = ADD_CHILD_TIMER(_task_profile, "SinkTime", exec_time);
245
20
    _close_timer = ADD_CHILD_TIMER(_task_profile, "CloseTime", exec_time);
246
247
20
    _wait_worker_timer = ADD_TIMER_WITH_LEVEL(_task_profile, "WaitWorkerTime", 1);
248
249
20
    _schedule_counts = ADD_COUNTER(_task_profile, "NumScheduleTimes", TUnit::UNIT);
250
20
    _yield_counts = ADD_COUNTER(_task_profile, "NumYieldTimes", TUnit::UNIT);
251
20
    _core_change_times = ADD_COUNTER(_task_profile, "CoreChangeTimes", TUnit::UNIT);
252
20
    _memory_reserve_times = ADD_COUNTER(_task_profile, "MemoryReserveTimes", TUnit::UNIT);
253
20
    _memory_reserve_failed_times =
254
20
            ADD_COUNTER(_task_profile, "MemoryReserveFailedTimes", TUnit::UNIT);
255
20
}
256
257
6
void PipelineTask::_fresh_profile_counter() {
258
6
    COUNTER_SET(_schedule_counts, (int64_t)_schedule_time);
259
6
    COUNTER_SET(_wait_worker_timer, (int64_t)_wait_worker_watcher.elapsed_time());
260
6
}
261
262
13
Status PipelineTask::_open() {
263
13
    SCOPED_TIMER(_task_profile->total_time_counter());
264
13
    SCOPED_CPU_TIMER(_task_cpu_timer);
265
13
    SCOPED_TIMER(_open_timer);
266
13
    _dry_run = _sink->should_dry_run(_state);
267
15
    for (auto& o : _operators) {
268
15
        RETURN_IF_ERROR(_state->get_local_state(o->operator_id())->open(_state));
269
15
    }
270
13
    RETURN_IF_ERROR(_state->get_sink_local_state()->open(_state));
271
13
    RETURN_IF_ERROR(_extract_dependencies());
272
13
    DBUG_EXECUTE_IF("fault_inject::PipelineXTask::open", {
273
13
        Status status = Status::Error<INTERNAL_ERROR>("fault_inject pipeline_task open failed");
274
13
        return status;
275
13
    });
276
13
    _opened = true;
277
13
    return Status::OK();
278
13
}
279
280
66
Status PipelineTask::_prepare() {
281
66
    SCOPED_TIMER(_task_profile->total_time_counter());
282
66
    SCOPED_CPU_TIMER(_task_cpu_timer);
283
78
    for (auto& o : _operators) {
284
78
        RETURN_IF_ERROR(_state->get_local_state(o->operator_id())->prepare(_state));
285
78
    }
286
66
    RETURN_IF_ERROR(_state->get_sink_local_state()->prepare(_state));
287
66
    return Status::OK();
288
66
}
289
290
44
bool PipelineTask::_wait_to_start() {
291
    // Before task starting, we should make sure
292
    // 1. Execution dependency is ready (which is controlled by FE 2-phase commit)
293
    // 2. Runtime filter dependencies are ready
294
    // 3. All tablets are loaded into local storage
295
44
    return std::any_of(
296
44
            _execution_dependencies.begin(), _execution_dependencies.end(),
297
60
            [&](Dependency* dep) -> bool { return dep->is_blocked_by(shared_from_this()); });
298
44
}
299
300
16
bool PipelineTask::_is_pending_finish() {
301
    // Spilling may be in progress if eos is true.
302
21
    return std::ranges::any_of(_finish_dependencies, [&](Dependency* dep) -> bool {
303
21
        return dep->is_blocked_by(shared_from_this());
304
21
    });
305
16
}
306
307
0
bool PipelineTask::is_blockable() const {
308
    // Before task starting, we should make sure
309
    // 1. Execution dependency is ready (which is controlled by FE 2-phase commit)
310
    // 2. Runtime filter dependencies are ready
311
    // 3. All tablets are loaded into local storage
312
313
0
    if (_state->enable_fuzzy_blockable_task()) {
314
0
        if ((_schedule_time + _task_idx) % 2 == 0) {
315
0
            return true;
316
0
        }
317
0
    }
318
319
0
    return std::ranges::any_of(_operators,
320
0
                               [&](OperatorPtr op) -> bool { return op->is_blockable(_state); }) ||
321
0
           _sink->is_blockable(_state);
322
0
}
323
324
906k
bool PipelineTask::_is_blocked() {
325
    // `_dry_run = true` means we do not need data from source operator.
326
906k
    if (!_dry_run) {
327
1.81M
        for (int i = cast_set<int>(_read_dependencies.size() - 1); i >= 0; i--) {
328
            // `_read_dependencies` is organized according to operators. For each operator, running condition is met iff all dependencies are ready.
329
906k
            for (auto* dep : _read_dependencies[i]) {
330
906k
                if (dep->is_blocked_by(shared_from_this())) {
331
15
                    return true;
332
15
                }
333
906k
            }
334
            // If all dependencies are ready for this operator, we can execute this task if no datum is needed from upstream operators.
335
906k
            if (!_operators[i]->need_more_input_data(_state)) {
336
2
                break;
337
2
            }
338
906k
        }
339
906k
    }
340
906k
    return _memory_sufficient_dependency->is_blocked_by(shared_from_this()) ||
341
906k
           std::ranges::any_of(_write_dependencies, [&](Dependency* dep) -> bool {
342
906k
               return dep->is_blocked_by(shared_from_this());
343
906k
           });
344
906k
}
345
346
6
void PipelineTask::terminate() {
347
    // We use a lock to assure all dependencies are not deconstructed here.
348
6
    std::unique_lock<std::mutex> lc(_dependency_lock);
349
6
    auto fragment = _fragment_context.lock();
350
6
    if (!is_finalized() && fragment) {
351
6
        try {
352
6
            DCHECK(_wake_up_early || fragment->is_canceled());
353
6
            std::ranges::for_each(_write_dependencies,
354
6
                                  [&](Dependency* dep) { dep->set_always_ready(); });
355
6
            std::ranges::for_each(_finish_dependencies,
356
10
                                  [&](Dependency* dep) { dep->set_always_ready(); });
357
6
            std::ranges::for_each(_read_dependencies, [&](std::vector<Dependency*>& deps) {
358
5
                std::ranges::for_each(deps, [&](Dependency* dep) { dep->set_always_ready(); });
359
5
            });
360
            // All `_execution_deps` will never be set blocking from ready. So we just set ready here.
361
6
            std::ranges::for_each(_execution_dependencies,
362
12
                                  [&](Dependency* dep) { dep->set_ready(); });
363
6
            _memory_sufficient_dependency->set_ready();
364
6
        } catch (const doris::Exception& e) {
365
0
            LOG(WARNING) << "Terminate failed: " << e.code() << ", " << e.to_string();
366
0
        }
367
6
    }
368
6
}
369
370
// When current memory pressure is low, memory usage may increase significantly in the next
371
// operator run, while there is no revocable memory available for spilling.
372
// Trigger memory revoking when pressure is high and revocable memory is significant.
373
// Memory pressure is evaluated using two signals:
374
// 1. Query memory usage exceeds a threshold ratio of the query memory limit.
375
// 2. Workload group memory usage reaches the workload group low-watermark threshold.
376
1.88k
bool PipelineTask::_should_trigger_revoking(const size_t reserve_size) const {
377
1.88k
    if (!_state->enable_spill()) {
378
3
        return false;
379
3
    }
380
381
1.88k
    auto query_mem_tracker = _state->get_query_ctx()->query_mem_tracker();
382
1.88k
    auto wg = _state->get_query_ctx()->workload_group();
383
1.88k
    if (!query_mem_tracker || !wg) {
384
1.87k
        return false;
385
1.87k
    }
386
387
8
    const auto parallelism = std::max(1, _pipeline->num_tasks());
388
8
    const auto query_water_mark = 90; // 90%
389
8
    const auto group_mem_limit = wg->memory_limit();
390
8
    auto query_limit = query_mem_tracker->limit();
391
8
    if (query_limit <= 0) {
392
1
        query_limit = group_mem_limit;
393
7
    } else if (query_limit > group_mem_limit && group_mem_limit > 0) {
394
1
        query_limit = group_mem_limit;
395
1
    }
396
397
8
    if (query_limit <= 0) {
398
1
        return false;
399
1
    }
400
401
7
    if ((reserve_size * parallelism) <= (query_limit / 5)) {
402
1
        return false;
403
1
    }
404
405
6
    bool is_high_memory_pressure = false;
406
6
    const auto used_mem = query_mem_tracker->consumption() + reserve_size * parallelism;
407
6
    if (used_mem >= int64_t((double(query_limit) * query_water_mark / 100))) {
408
2
        is_high_memory_pressure = true;
409
2
    }
410
411
6
    if (!is_high_memory_pressure) {
412
4
        bool is_low_watermark;
413
4
        bool is_high_watermark;
414
4
        wg->check_mem_used(&is_low_watermark, &is_high_watermark);
415
4
        is_high_memory_pressure = is_low_watermark || is_high_watermark;
416
4
    }
417
418
6
    if (is_high_memory_pressure) {
419
4
        const auto revocable_size = [&]() {
420
4
            size_t total = _sink->revocable_mem_size(_state);
421
4
            for (const auto& op : _operators) {
422
4
                total += op->revocable_mem_size(_state);
423
4
            }
424
4
            return total;
425
4
        }();
426
427
4
        const auto total_estimated_revocable = revocable_size * parallelism;
428
4
        return total_estimated_revocable >= int64_t(double(query_limit) * 0.2);
429
4
    }
430
431
2
    return false;
432
6
}
433
434
/**
435
 * `_eos` indicates whether the execution phase is done. `done` indicates whether we could close
436
 * this task.
437
 *
438
 * For example,
439
 * 1. if `_eos` is false which means we should continue to get next block so we cannot close (e.g.
440
 *    `done` is false)
441
 * 2. if `_eos` is true which means all blocks from source are exhausted but `_is_pending_finish()`
442
 *    is true which means we should wait for a pending dependency ready (maybe a running rpc), so we
443
 *    cannot close (e.g. `done` is false)
444
 * 3. if `_eos` is true which means all blocks from source are exhausted and `_is_pending_finish()`
445
 *    is false which means we can close immediately (e.g. `done` is true)
446
 * @param done
447
 * @return
448
 */
449
37
Status PipelineTask::execute(bool* done) {
450
37
    if (_exec_state != State::RUNNABLE || _blocked_dep != nullptr) [[unlikely]] {
451
1
#ifdef BE_TEST
452
1
        return Status::InternalError("Pipeline task is not runnable! Task info: {}",
453
1
                                     debug_string());
454
#else
455
        return Status::FatalError("Pipeline task is not runnable! Task info: {}", debug_string());
456
#endif
457
1
    }
458
459
36
    auto fragment_context = _fragment_context.lock();
460
36
    if (!fragment_context) {
461
0
        return Status::InternalError("Fragment already finished! Query: {}", print_id(_query_id));
462
0
    }
463
36
    int64_t time_spent = 0;
464
36
    ThreadCpuStopWatch cpu_time_stop_watch;
465
36
    cpu_time_stop_watch.start();
466
36
    SCOPED_ATTACH_TASK(_state);
467
36
    Defer running_defer {[&]() {
468
36
        int64_t delta_cpu_time = cpu_time_stop_watch.elapsed_time();
469
36
        _task_cpu_timer->update(delta_cpu_time);
470
36
        fragment_context->get_query_ctx()->resource_ctx()->cpu_context()->update_cpu_cost_ms(
471
36
                delta_cpu_time);
472
473
        // If task is woke up early, we should terminate all operators, and this task could be closed immediately.
474
36
        if (_wake_up_early) {
475
3
            terminate();
476
3
            THROW_IF_ERROR(_root->terminate(_state));
477
3
            THROW_IF_ERROR(_sink->terminate(_state));
478
3
            _eos = true;
479
3
            *done = true;
480
33
        } else if (_eos && !_spilling &&
481
33
                   (fragment_context->is_canceled() || !_is_pending_finish())) {
482
10
            *done = true;
483
10
        }
484
36
    }};
485
36
    const auto query_id = _state->query_id();
486
    // If this task is already EOS and block is empty (which means we already output all blocks),
487
    // just return here.
488
36
    if (_eos && !_spilling) {
489
2
        return Status::OK();
490
2
    }
491
    // If this task is blocked by a spilling request and waken up immediately, the spilling
492
    // dependency will not block this task and we should just run here.
493
34
    if (!_block->empty()) {
494
0
        LOG(INFO) << "Query: " << print_id(query_id) << " has pending block, size: "
495
0
                  << PrettyPrinter::print_bytes(_block->allocated_bytes());
496
0
        DCHECK(_spilling);
497
0
    }
498
499
34
    SCOPED_TIMER(_task_profile->total_time_counter());
500
34
    SCOPED_TIMER(_exec_timer);
501
502
34
    if (!_wake_up_early) {
503
34
        RETURN_IF_ERROR(_prepare());
504
34
    }
505
34
    DBUG_EXECUTE_IF("fault_inject::PipelineXTask::execute", {
506
34
        Status status = Status::Error<INTERNAL_ERROR>("fault_inject pipeline_task execute failed");
507
34
        return status;
508
34
    });
509
    // `_wake_up_early` must be after `_wait_to_start()`
510
34
    if (_wait_to_start() || _wake_up_early) {
511
2
        return Status::OK();
512
2
    }
513
32
    RETURN_IF_ERROR(_prepare());
514
515
    // The status must be runnable
516
32
    if (!_opened && !fragment_context->is_canceled()) {
517
12
        DBUG_EXECUTE_IF("PipelineTask::execute.open_sleep", {
518
12
            auto required_pipeline_id =
519
12
                    DebugPoints::instance()->get_debug_param_or_default<int32_t>(
520
12
                            "PipelineTask::execute.open_sleep", "pipeline_id", -1);
521
12
            auto required_task_id = DebugPoints::instance()->get_debug_param_or_default<int32_t>(
522
12
                    "PipelineTask::execute.open_sleep", "task_id", -1);
523
12
            if (required_pipeline_id == pipeline_id() && required_task_id == task_id()) {
524
12
                LOG(WARNING) << "PipelineTask::execute.open_sleep sleep 5s";
525
12
                sleep(5);
526
12
            }
527
12
        });
528
529
12
        SCOPED_RAW_TIMER(&time_spent);
530
12
        RETURN_IF_ERROR(_open());
531
12
    }
532
533
906k
    while (!fragment_context->is_canceled()) {
534
906k
        SCOPED_RAW_TIMER(&time_spent);
535
906k
        Defer defer {[&]() {
536
            // If this run is pended by a spilling request, the block will be output in next run.
537
906k
            if (!_spilling) {
538
904k
                _block->clear_column_data(_root->row_desc().num_materialized_slots());
539
904k
            }
540
906k
        }};
541
        // `_wake_up_early` must be after `_is_blocked()`
542
906k
        if (_is_blocked() || _wake_up_early) {
543
17
            return Status::OK();
544
17
        }
545
546
        /// When a task is cancelled,
547
        /// its blocking state will be cleared and it will transition to a ready state (though it is not truly ready).
548
        /// Here, checking whether it is cancelled to prevent tasks in a blocking state from being re-executed.
549
906k
        if (fragment_context->is_canceled()) {
550
0
            break;
551
0
        }
552
553
906k
        if (time_spent > _exec_time_slice) {
554
4
            COUNTER_UPDATE(_yield_counts, 1);
555
4
            break;
556
4
        }
557
906k
        auto* block = _block.get();
558
559
906k
        DBUG_EXECUTE_IF("fault_inject::PipelineXTask::executing", {
560
906k
            Status status =
561
906k
                    Status::Error<INTERNAL_ERROR>("fault_inject pipeline_task executing failed");
562
906k
            return status;
563
906k
        });
564
565
        // `_sink->is_finished(_state)` means sink operator should be finished
566
906k
        if (_sink->is_finished(_state)) {
567
1
            set_wake_up_early();
568
1
            return Status::OK();
569
1
        }
570
571
        // `_dry_run` means sink operator need no more data
572
906k
        _eos = _dry_run || _eos;
573
906k
        _spilling = false;
574
906k
        auto workload_group = _state->workload_group();
575
        // If last run is pended by a spilling request, `_block` is produced with some rows in last
576
        // run, so we will resume execution using the block.
577
906k
        if (!_eos && _block->empty()) {
578
905k
            SCOPED_TIMER(_get_block_timer);
579
905k
            if (_state->low_memory_mode()) {
580
0
                _sink->set_low_memory_mode(_state);
581
0
                for (auto& op : _operators) {
582
0
                    op->set_low_memory_mode(_state);
583
0
                }
584
0
            }
585
905k
            DEFER_RELEASE_RESERVED();
586
905k
            _get_block_counter->update(1);
587
            // Sum reserve sizes across all operators in this pipeline.
588
            // Each operator reports only its own requirement (non-recursive).
589
905k
            size_t reserve_size = 0;
590
905k
            for (auto& op : _operators) {
591
905k
                reserve_size += op->get_reserve_mem_size(_state);
592
905k
                op->reset_reserve_mem_size(_state);
593
905k
            }
594
905k
            if (workload_group &&
595
905k
                _state->get_query_ctx()
596
45.1k
                        ->resource_ctx()
597
45.1k
                        ->task_controller()
598
45.1k
                        ->is_enable_reserve_memory() &&
599
905k
                reserve_size > 0) {
600
958
                if (_should_trigger_revoking(reserve_size)) {
601
0
                    LOG(INFO) << fmt::format(
602
0
                            "Query: {} sink: {}, node id: {}, task id: {}, reserve size: {} when "
603
0
                            "high memory pressure, try to spill",
604
0
                            print_id(_query_id), _sink->get_name(), _sink->node_id(),
605
0
                            _state->task_id(), reserve_size);
606
0
                    ExecEnv::GetInstance()->workload_group_mgr()->add_paused_query(
607
0
                            _state->get_query_ctx()->resource_ctx()->shared_from_this(),
608
0
                            reserve_size,
609
0
                            Status::Error<ErrorCode::QUERY_MEMORY_EXCEEDED>(
610
0
                                    "high memory pressure, try to spill"));
611
0
                    _spilling = true;
612
0
                    continue;
613
0
                }
614
958
                if (!_try_to_reserve_memory(reserve_size, _root)) {
615
956
                    continue;
616
956
                }
617
958
            }
618
619
904k
            bool eos = false;
620
904k
            RETURN_IF_ERROR(_root->get_block_after_projects(_state, block, &eos));
621
904k
            RETURN_IF_ERROR(block->check_type_and_column());
622
904k
            _eos = eos;
623
904k
        }
624
625
905k
        if (!_block->empty() || _eos) {
626
936
            SCOPED_TIMER(_sink_timer);
627
936
            Status status = Status::OK();
628
936
            DEFER_RELEASE_RESERVED();
629
936
            if (_state->get_query_ctx()
630
936
                        ->resource_ctx()
631
936
                        ->task_controller()
632
936
                        ->is_enable_reserve_memory() &&
633
936
                workload_group && !(_wake_up_early || _dry_run)) {
634
921
                const auto sink_reserve_size = _sink->get_reserve_mem_size(_state, _eos);
635
636
921
                if (sink_reserve_size > 0 && _should_trigger_revoking(sink_reserve_size)) {
637
0
                    LOG(INFO) << fmt::format(
638
0
                            "Query: {} sink: {}, node id: {}, task id: {}, reserve size: {} when "
639
0
                            "high memory pressure, try to spill",
640
0
                            print_id(_query_id), _sink->get_name(), _sink->node_id(),
641
0
                            _state->task_id(), sink_reserve_size);
642
0
                    ExecEnv::GetInstance()->workload_group_mgr()->add_paused_query(
643
0
                            _state->get_query_ctx()->resource_ctx()->shared_from_this(),
644
0
                            sink_reserve_size,
645
0
                            Status::Error<ErrorCode::QUERY_MEMORY_EXCEEDED>(
646
0
                                    "high memory pressure, try to spill"));
647
0
                    _spilling = true;
648
0
                    continue;
649
0
                }
650
651
921
                if (sink_reserve_size > 0 &&
652
921
                    !_try_to_reserve_memory(sink_reserve_size, _sink.get())) {
653
920
                    continue;
654
920
                }
655
921
            }
656
657
16
            DBUG_EXECUTE_IF("PipelineTask::execute.sink_eos_sleep", {
658
16
                auto required_pipeline_id =
659
16
                        DebugPoints::instance()->get_debug_param_or_default<int32_t>(
660
16
                                "PipelineTask::execute.sink_eos_sleep", "pipeline_id", -1);
661
16
                auto required_task_id =
662
16
                        DebugPoints::instance()->get_debug_param_or_default<int32_t>(
663
16
                                "PipelineTask::execute.sink_eos_sleep", "task_id", -1);
664
16
                if (required_pipeline_id == pipeline_id() && required_task_id == task_id()) {
665
16
                    LOG(WARNING) << "PipelineTask::execute.sink_eos_sleep sleep 10s";
666
16
                    sleep(10);
667
16
                }
668
16
            });
669
670
16
            DBUG_EXECUTE_IF("PipelineTask::execute.terminate", {
671
16
                if (_eos) {
672
16
                    auto required_pipeline_id =
673
16
                            DebugPoints::instance()->get_debug_param_or_default<int32_t>(
674
16
                                    "PipelineTask::execute.terminate", "pipeline_id", -1);
675
16
                    auto required_task_id =
676
16
                            DebugPoints::instance()->get_debug_param_or_default<int32_t>(
677
16
                                    "PipelineTask::execute.terminate", "task_id", -1);
678
16
                    auto required_fragment_id =
679
16
                            DebugPoints::instance()->get_debug_param_or_default<int32_t>(
680
16
                                    "PipelineTask::execute.terminate", "fragment_id", -1);
681
16
                    if (required_pipeline_id == pipeline_id() && required_task_id == task_id() &&
682
16
                        fragment_context->get_fragment_id() == required_fragment_id) {
683
16
                        _wake_up_early = true;
684
16
                        terminate();
685
16
                    } else if (required_pipeline_id == pipeline_id() &&
686
16
                               fragment_context->get_fragment_id() == required_fragment_id) {
687
16
                        LOG(WARNING) << "PipelineTask::execute.terminate sleep 5s";
688
16
                        sleep(5);
689
16
                    }
690
16
                }
691
16
            });
692
16
            RETURN_IF_ERROR(block->check_type_and_column());
693
16
            status = _sink->sink(_state, block, _eos);
694
695
16
            if (_eos) {
696
10
                if (_sink->reset_to_rerun(_state, _root)) {
697
0
                    _eos = false;
698
10
                } else {
699
10
                    RETURN_IF_ERROR(close(Status::OK(), false));
700
10
                }
701
10
            }
702
703
16
            if (status.is<ErrorCode::END_OF_FILE>()) {
704
1
                set_wake_up_early();
705
1
                return Status::OK();
706
15
            } else if (!status) {
707
0
                return status;
708
0
            }
709
710
15
            if (_eos) { // just return, the scheduler will do finish work
711
9
                return Status::OK();
712
9
            }
713
15
        }
714
905k
    }
715
716
4
    RETURN_IF_ERROR(_state->get_query_ctx()->get_pipe_exec_scheduler()->submit(shared_from_this()));
717
4
    return Status::OK();
718
4
}
719
720
7
Status PipelineTask::do_revoke_memory(const std::shared_ptr<SpillContext>& spill_context) {
721
7
    auto fragment_context = _fragment_context.lock();
722
7
    if (!fragment_context) {
723
1
        return Status::InternalError("Fragment already finished! Query: {}", print_id(_query_id));
724
1
    }
725
726
6
    SCOPED_ATTACH_TASK(_state);
727
6
    ThreadCpuStopWatch cpu_time_stop_watch;
728
6
    cpu_time_stop_watch.start();
729
6
    Defer running_defer {[&]() {
730
6
        int64_t delta_cpu_time = cpu_time_stop_watch.elapsed_time();
731
6
        _task_cpu_timer->update(delta_cpu_time);
732
6
        fragment_context->get_query_ctx()->resource_ctx()->cpu_context()->update_cpu_cost_ms(
733
6
                delta_cpu_time);
734
735
        // If task is woke up early, we should terminate all operators, and this task could be closed immediately.
736
6
        if (_wake_up_early) {
737
1
            terminate();
738
1
            THROW_IF_ERROR(_root->terminate(_state));
739
1
            THROW_IF_ERROR(_sink->terminate(_state));
740
1
            _eos = true;
741
1
        }
742
743
        // SpillContext tracks pipeline task count, not operator count.
744
        // Notify completion once after all operators + sink have finished revoking.
745
6
        if (spill_context) {
746
3
            spill_context->on_task_finished();
747
3
        }
748
6
    }};
749
750
    // Revoke memory from every operator that has enough revocable memory,
751
    // then revoke from the sink.
752
6
    for (auto& op : _operators) {
753
6
        if (op->revocable_mem_size(_state) >= SpillFile::MIN_SPILL_WRITE_BATCH_MEM) {
754
2
            RETURN_IF_ERROR(op->revoke_memory(_state));
755
2
        }
756
6
    }
757
758
6
    if (_sink->revocable_mem_size(_state) >= SpillFile::MIN_SPILL_WRITE_BATCH_MEM) {
759
1
        RETURN_IF_ERROR(_sink->revoke_memory(_state));
760
1
    }
761
6
    return Status::OK();
762
6
}
763
764
1.87k
bool PipelineTask::_try_to_reserve_memory(const size_t reserve_size, OperatorBase* op) {
765
1.87k
    auto st = thread_context()->thread_mem_tracker_mgr->try_reserve(reserve_size);
766
    // If reserve memory failed and the query is not enable spill, just disable reserve memory(this will enable
767
    // memory hard limit check, and will cancel the query if allocate memory failed) and let it run.
768
1.87k
    if (!st.ok() && !_state->enable_spill()) {
769
2
        LOG(INFO) << print_id(_query_id) << " reserve memory failed due to " << st
770
2
                  << ", and it is not enable spill, disable reserve memory and let it run";
771
2
        _state->get_query_ctx()->resource_ctx()->task_controller()->disable_reserve_memory();
772
2
        return true;
773
2
    }
774
1.87k
    COUNTER_UPDATE(_memory_reserve_times, 1);
775
776
    // Compute total revocable memory across all operators and the sink.
777
1.87k
    size_t total_revocable_mem_size = 0;
778
1.87k
    size_t operator_max_revocable_mem_size = 0;
779
780
1.87k
    if (!st.ok() || _state->enable_force_spill()) {
781
        // Compute total revocable memory across all operators and the sink.
782
1.87k
        total_revocable_mem_size = _sink->revocable_mem_size(_state);
783
1.87k
        operator_max_revocable_mem_size = total_revocable_mem_size;
784
1.87k
        for (auto& cur_op : _operators) {
785
1.87k
            total_revocable_mem_size += cur_op->revocable_mem_size(_state);
786
1.87k
            operator_max_revocable_mem_size =
787
1.87k
                    std::max(cur_op->revocable_mem_size(_state), operator_max_revocable_mem_size);
788
1.87k
        }
789
1.87k
    }
790
791
    // During enable force spill, other operators like scan opeartor will also try to reserve memory and will failed
792
    // here, if not add this check, it will always paused and resumed again.
793
1.87k
    if (st.ok() && _state->enable_force_spill()) {
794
0
        if (operator_max_revocable_mem_size >= _state->spill_min_revocable_mem()) {
795
0
            st = Status::Error<ErrorCode::QUERY_MEMORY_EXCEEDED>(
796
0
                    "force spill and there is an operator has memory "
797
0
                    "size {} exceeds min mem size {}",
798
0
                    PrettyPrinter::print_bytes(operator_max_revocable_mem_size),
799
0
                    PrettyPrinter::print_bytes(_state->spill_min_revocable_mem()));
800
0
        }
801
0
    }
802
803
1.87k
    if (!st.ok()) {
804
1.87k
        COUNTER_UPDATE(_memory_reserve_failed_times, 1);
805
        // build per-operator revocable memory info string for debugging
806
1.87k
        std::string ops_revocable_info;
807
1.87k
        {
808
1.87k
            fmt::memory_buffer buf;
809
1.87k
            for (auto& cur_op : _operators) {
810
1.87k
                fmt::format_to(buf, "{}({})-> ", cur_op->get_name(),
811
1.87k
                               PrettyPrinter::print_bytes(cur_op->revocable_mem_size(_state)));
812
1.87k
            }
813
1.87k
            if (_sink) {
814
1.87k
                fmt::format_to(buf, "{}({}) ", _sink->get_name(),
815
1.87k
                               PrettyPrinter::print_bytes(_sink->revocable_mem_size(_state)));
816
1.87k
            }
817
1.87k
            ops_revocable_info = fmt::to_string(buf);
818
1.87k
        }
819
820
1.87k
        auto debug_msg = fmt::format(
821
1.87k
                "Query: {} , try to reserve: {}, total revocable mem size: {}, failed reason: {}",
822
1.87k
                print_id(_query_id), PrettyPrinter::print_bytes(reserve_size),
823
1.87k
                PrettyPrinter::print_bytes(total_revocable_mem_size), st.to_string());
824
1.87k
        if (!ops_revocable_info.empty()) {
825
1.87k
            debug_msg += fmt::format(", ops_revocable=[{}]", ops_revocable_info);
826
1.87k
        }
827
        // PROCESS_MEMORY_EXCEEDED error msg already contains process_mem_log_str
828
1.87k
        if (!st.is<ErrorCode::PROCESS_MEMORY_EXCEEDED>()) {
829
1.87k
            debug_msg +=
830
1.87k
                    fmt::format(", debug info: {}", GlobalMemoryArbitrator::process_mem_log_str());
831
1.87k
        }
832
1.87k
        LOG(INFO) << debug_msg;
833
1.87k
        ExecEnv::GetInstance()->workload_group_mgr()->add_paused_query(
834
1.87k
                _state->get_query_ctx()->resource_ctx()->shared_from_this(), reserve_size, st);
835
1.87k
        _spilling = true;
836
1.87k
        return false;
837
1.87k
    }
838
0
    return true;
839
1.87k
}
840
841
482k
void PipelineTask::stop_if_finished() {
842
482k
    auto fragment = _fragment_context.lock();
843
482k
    if (!fragment) {
844
0
        return;
845
0
    }
846
482k
    SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(fragment->get_query_ctx()->query_mem_tracker());
847
482k
    if (auto sink = _sink) {
848
482k
        if (sink->is_finished(_state)) {
849
1
            set_wake_up_early();
850
1
            terminate();
851
1
        }
852
482k
    }
853
482k
}
854
855
1
Status PipelineTask::finalize() {
856
1
    auto fragment = _fragment_context.lock();
857
1
    if (!fragment) {
858
0
        return Status::OK();
859
0
    }
860
1
    SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(fragment->get_query_ctx()->query_mem_tracker());
861
1
    RETURN_IF_ERROR(_state_transition(State::FINALIZED));
862
1
    std::unique_lock<std::mutex> lc(_dependency_lock);
863
1
    _sink_shared_state.reset();
864
1
    _op_shared_states.clear();
865
1
    _shared_state_map.clear();
866
1
    _block.reset();
867
1
    _operators.clear();
868
1
    _sink.reset();
869
1
    _pipeline.reset();
870
1
    return Status::OK();
871
1
}
872
873
16
Status PipelineTask::close(Status exec_status, bool close_sink) {
874
16
    int64_t close_ns = 0;
875
16
    Status s;
876
16
    {
877
16
        SCOPED_RAW_TIMER(&close_ns);
878
16
        if (close_sink) {
879
6
            s = _sink->close(_state, exec_status);
880
6
        }
881
20
        for (auto& op : _operators) {
882
20
            auto tem = op->close(_state);
883
20
            if (!tem.ok() && s.ok()) {
884
0
                s = std::move(tem);
885
0
            }
886
20
        }
887
16
    }
888
16
    if (_opened) {
889
16
        COUNTER_UPDATE(_close_timer, close_ns);
890
16
        COUNTER_UPDATE(_task_profile->total_time_counter(), close_ns);
891
16
    }
892
893
16
    if (close_sink && _opened) {
894
6
        _task_profile->add_info_string("WakeUpEarly", std::to_string(_wake_up_early.load()));
895
6
        _fresh_profile_counter();
896
6
    }
897
898
16
    if (close_sink) {
899
6
        RETURN_IF_ERROR(_state_transition(State::FINISHED));
900
6
    }
901
16
    return s;
902
16
}
903
904
55.0k
std::string PipelineTask::debug_string() {
905
55.0k
    fmt::memory_buffer debug_string_buffer;
906
907
55.0k
    fmt::format_to(debug_string_buffer, "QueryId: {}\n", print_id(_query_id));
908
55.0k
    fmt::format_to(debug_string_buffer, "InstanceId: {}\n",
909
55.0k
                   print_id(_state->fragment_instance_id()));
910
911
55.0k
    fmt::format_to(debug_string_buffer,
912
55.0k
                   "PipelineTask[id = {}, open = {}, eos = {}, state = {}, dry run = "
913
55.0k
                   "{}, _wake_up_early = {}, _wake_up_by = {}, time elapsed since last state "
914
55.0k
                   "changing = {}s, spilling = {}, is running = {}]",
915
55.0k
                   _index, _opened, _eos, _to_string(_exec_state), _dry_run, _wake_up_early.load(),
916
55.0k
                   _wake_by, _state_change_watcher.elapsed_time() / NANOS_PER_SEC, _spilling,
917
55.0k
                   is_running());
918
55.0k
    std::unique_lock<std::mutex> lc(_dependency_lock);
919
55.0k
    auto* cur_blocked_dep = _blocked_dep;
920
55.0k
    auto fragment = _fragment_context.lock();
921
55.0k
    if (is_finalized() || !fragment) {
922
5
        fmt::format_to(debug_string_buffer, " pipeline name = {}", _pipeline_name);
923
5
        return fmt::to_string(debug_string_buffer);
924
5
    }
925
55.0k
    auto elapsed = fragment->elapsed_time() / NANOS_PER_SEC;
926
55.0k
    fmt::format_to(debug_string_buffer, " elapse time = {}s, block dependency = [{}]\n", elapsed,
927
55.0k
                   cur_blocked_dep && !is_finalized() ? cur_blocked_dep->debug_string() : "NULL");
928
929
55.0k
    if (_state && _state->local_runtime_filter_mgr()) {
930
0
        fmt::format_to(debug_string_buffer, "local_runtime_filter_mgr: [{}]\n",
931
0
                       _state->local_runtime_filter_mgr()->debug_string());
932
0
    }
933
934
55.0k
    fmt::format_to(debug_string_buffer, "operators: ");
935
110k
    for (size_t i = 0; i < _operators.size(); i++) {
936
55.0k
        fmt::format_to(debug_string_buffer, "\n{}",
937
55.0k
                       _opened && !is_finalized()
938
55.0k
                               ? _operators[i]->debug_string(_state, cast_set<int>(i))
939
55.0k
                               : _operators[i]->debug_string(cast_set<int>(i)));
940
55.0k
    }
941
55.0k
    fmt::format_to(debug_string_buffer, "\n{}\n",
942
55.0k
                   _opened && !is_finalized()
943
55.0k
                           ? _sink->debug_string(_state, cast_set<int>(_operators.size()))
944
55.0k
                           : _sink->debug_string(cast_set<int>(_operators.size())));
945
946
55.0k
    fmt::format_to(debug_string_buffer, "\nRead Dependency Information: \n");
947
948
55.0k
    size_t i = 0;
949
110k
    for (; i < _read_dependencies.size(); i++) {
950
110k
        for (size_t j = 0; j < _read_dependencies[i].size(); j++) {
951
55.0k
            fmt::format_to(debug_string_buffer, "{}. {}\n", i,
952
55.0k
                           _read_dependencies[i][j]->debug_string(cast_set<int>(i) + 1));
953
55.0k
        }
954
55.0k
    }
955
956
55.0k
    fmt::format_to(debug_string_buffer, "{}. {}\n", i,
957
55.0k
                   _memory_sufficient_dependency->debug_string(cast_set<int>(i++)));
958
959
55.0k
    fmt::format_to(debug_string_buffer, "\nWrite Dependency Information: \n");
960
110k
    for (size_t j = 0; j < _write_dependencies.size(); j++, i++) {
961
55.0k
        fmt::format_to(debug_string_buffer, "{}. {}\n", i,
962
55.0k
                       _write_dependencies[j]->debug_string(cast_set<int>(j) + 1));
963
55.0k
    }
964
965
55.0k
    fmt::format_to(debug_string_buffer, "\nExecution Dependency Information: \n");
966
165k
    for (size_t j = 0; j < _execution_dependencies.size(); j++, i++) {
967
110k
        fmt::format_to(debug_string_buffer, "{}. {}\n", i,
968
110k
                       _execution_dependencies[j]->debug_string(cast_set<int>(i) + 1));
969
110k
    }
970
971
55.0k
    fmt::format_to(debug_string_buffer, "Finish Dependency Information: \n");
972
165k
    for (size_t j = 0; j < _finish_dependencies.size(); j++, i++) {
973
110k
        fmt::format_to(debug_string_buffer, "{}. {}\n", i,
974
110k
                       _finish_dependencies[j]->debug_string(cast_set<int>(i) + 1));
975
110k
    }
976
55.0k
    return fmt::to_string(debug_string_buffer);
977
55.0k
}
978
979
2
size_t PipelineTask::get_revocable_size() const {
980
2
    if (!_opened || is_finalized() || _running || (_eos && !_spilling)) {
981
0
        return 0;
982
0
    }
983
984
    // Sum revocable memory from every operator in the pipeline + the sink.
985
    // Each operator reports only its own revocable memory (no child recursion).
986
2
    size_t total = _sink->revocable_mem_size(_state);
987
2
    for (const auto& op : _operators) {
988
2
        total += op->revocable_mem_size(_state);
989
2
    }
990
2
    return total;
991
2
}
992
993
3
Status PipelineTask::revoke_memory(const std::shared_ptr<SpillContext>& spill_context) {
994
3
    DCHECK(spill_context);
995
3
    if (is_finalized()) {
996
1
        spill_context->on_task_finished();
997
1
        VLOG_DEBUG << "Query: " << print_id(_state->query_id()) << ", task: " << ((void*)this)
998
0
                   << " finalized";
999
1
        return Status::OK();
1000
1
    }
1001
1002
2
    const auto revocable_size = get_revocable_size();
1003
2
    if (revocable_size >= SpillFile::MIN_SPILL_WRITE_BATCH_MEM) {
1004
1
        auto revokable_task = std::make_shared<RevokableTask>(shared_from_this(), spill_context);
1005
        // Submit a revocable task to run, the run method will call revoke memory. Currently the
1006
        // underline pipeline task is still blocked.
1007
1
        RETURN_IF_ERROR(_state->get_query_ctx()->get_pipe_exec_scheduler()->submit(revokable_task));
1008
1
    } else {
1009
1
        spill_context->on_task_finished();
1010
1
        VLOG_DEBUG << "Query: " << print_id(_state->query_id()) << ", task: " << ((void*)this)
1011
0
                   << " has not enough data to revoke: " << revocable_size;
1012
1
    }
1013
2
    return Status::OK();
1014
2
}
1015
1016
24
Status PipelineTask::wake_up(Dependency* dep, std::unique_lock<std::mutex>& /* dep_lock */) {
1017
    // call by dependency
1018
24
    DCHECK_EQ(_blocked_dep, dep) << "dep : " << dep->debug_string(0) << "task: " << debug_string();
1019
24
    _blocked_dep = nullptr;
1020
24
    auto holder = std::dynamic_pointer_cast<PipelineTask>(shared_from_this());
1021
24
    RETURN_IF_ERROR(_state_transition(PipelineTask::State::RUNNABLE));
1022
24
    if (auto f = _fragment_context.lock(); f) {
1023
24
        RETURN_IF_ERROR(_state->get_query_ctx()->get_pipe_exec_scheduler()->submit(holder));
1024
24
    }
1025
24
    return Status::OK();
1026
24
}
1027
1028
99
Status PipelineTask::_state_transition(State new_state) {
1029
99
    if (_exec_state != new_state) {
1030
94
        _state_change_watcher.reset();
1031
94
        _state_change_watcher.start();
1032
94
    }
1033
99
    _task_profile->add_info_string("TaskState", _to_string(new_state));
1034
99
    _task_profile->add_info_string("BlockedByDependency", _blocked_dep ? _blocked_dep->name() : "");
1035
99
    if (!LEGAL_STATE_TRANSITION[(int)new_state].contains(_exec_state)) {
1036
17
        return Status::InternalError(
1037
17
                "Task state transition from {} to {} is not allowed! Task info: {}",
1038
17
                _to_string(_exec_state), _to_string(new_state), debug_string());
1039
17
    }
1040
82
    _exec_state = new_state;
1041
82
    return Status::OK();
1042
99
}
1043
1044
#include "common/compile_check_end.h"
1045
} // namespace doris