Coverage Report

Created: 2025-10-10 19:27

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/root/doris/be/src/pipeline/pipeline_task.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "pipeline_task.h"
19
20
#include <fmt/core.h>
21
#include <fmt/format.h>
22
#include <gen_cpp/Metrics_types.h>
23
#include <glog/logging.h>
24
25
#include <algorithm>
26
#include <ostream>
27
#include <vector>
28
29
#include "common/logging.h"
30
#include "common/status.h"
31
#include "pipeline/dependency.h"
32
#include "pipeline/exec/operator.h"
33
#include "pipeline/exec/scan_operator.h"
34
#include "pipeline/pipeline.h"
35
#include "pipeline/pipeline_fragment_context.h"
36
#include "pipeline/task_queue.h"
37
#include "pipeline/task_scheduler.h"
38
#include "runtime/descriptors.h"
39
#include "runtime/exec_env.h"
40
#include "runtime/query_context.h"
41
#include "runtime/thread_context.h"
42
#include "runtime/workload_group/workload_group_manager.h"
43
#include "util/defer_op.h"
44
#include "util/mem_info.h"
45
#include "util/runtime_profile.h"
46
#include "util/uid_util.h"
47
#include "vec/core/block.h"
48
#include "vec/spill/spill_stream.h"
49
50
namespace doris {
51
class RuntimeState;
52
} // namespace doris
53
54
namespace doris::pipeline {
55
#include "common/compile_check_begin.h"
56
57
PipelineTask::PipelineTask(PipelinePtr& pipeline, uint32_t task_id, RuntimeState* state,
58
                           std::shared_ptr<PipelineFragmentContext> fragment_context,
59
                           RuntimeProfile* parent_profile,
60
                           std::map<int, std::pair<std::shared_ptr<BasicSharedState>,
61
                                                   std::vector<std::shared_ptr<Dependency>>>>
62
                                   shared_state_map,
63
                           int task_idx)
64
        :
65
#ifdef BE_TEST
66
72.1k
          _query_id(fragment_context ? fragment_context->get_query_id() : TUniqueId()),
67
#else
68
          _query_id(fragment_context->get_query_id()),
69
#endif
70
72.1k
          _index(task_id),
71
72.1k
          _pipeline(pipeline),
72
72.1k
          _opened(false),
73
72.1k
          _state(state),
74
72.1k
          _fragment_context(fragment_context),
75
72.1k
          _parent_profile(parent_profile),
76
72.1k
          _operators(pipeline->operators()),
77
72.1k
          _source(_operators.front().get()),
78
72.1k
          _root(_operators.back().get()),
79
72.1k
          _sink(pipeline->sink_shared_pointer()),
80
72.1k
          _shared_state_map(std::move(shared_state_map)),
81
72.1k
          _task_idx(task_idx),
82
72.1k
          _memory_sufficient_dependency(state->get_query_ctx()->get_memory_sufficient_dependency()),
83
72.1k
          _pipeline_name(_pipeline->name()) {
84
#ifndef BE_TEST
85
    _query_mem_tracker = fragment_context->get_query_ctx()->query_mem_tracker();
86
#endif
87
72.1k
    _execution_dependencies.push_back(state->get_query_ctx()->get_execution_dependency());
88
72.1k
    if (!_shared_state_map.contains(_sink->dests_id().front())) {
89
72.1k
        auto shared_state = _sink->create_shared_state();
90
72.1k
        if (shared_state) {
91
29
            _sink_shared_state = shared_state;
92
29
        }
93
72.1k
    }
94
72.1k
}
95
96
72.1k
PipelineTask::~PipelineTask() {
97
// PipelineTask is also hold by task queue( https://github.com/apache/doris/pull/49753),
98
// so that it maybe the last one to be destructed.
99
// But pipeline task hold some objects, like operators, shared state, etc. So that should release
100
// memory manually.
101
#ifndef BE_TEST
102
    SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(_query_mem_tracker);
103
#endif
104
72.1k
    _shared_state_map.clear();
105
72.1k
    _sink_shared_state.reset();
106
72.1k
    _op_shared_states.clear();
107
72.1k
    _sink.reset();
108
72.1k
    _operators.clear();
109
72.1k
    _spill_context.reset();
110
72.1k
    _block.reset();
111
72.1k
    _pipeline.reset();
112
72.1k
}
113
114
Status PipelineTask::prepare(const std::vector<TScanRangeParams>& scan_range, const int sender_id,
115
15
                             const TDataSink& tsink) {
116
15
    DCHECK(_sink);
117
15
    _init_profile();
118
15
    SCOPED_TIMER(_task_profile->total_time_counter());
119
15
    SCOPED_CPU_TIMER(_task_cpu_timer);
120
15
    SCOPED_TIMER(_prepare_timer);
121
15
    DBUG_EXECUTE_IF("fault_inject::PipelineXTask::prepare", {
122
15
        Status status = Status::Error<INTERNAL_ERROR>("fault_inject pipeline_task prepare failed");
123
15
        return status;
124
15
    });
125
15
    {
126
        // set sink local state
127
15
        LocalSinkStateInfo info {_task_idx,         _task_profile.get(),
128
15
                                 sender_id,         get_sink_shared_state().get(),
129
15
                                 _shared_state_map, tsink};
130
15
        RETURN_IF_ERROR(_sink->setup_local_state(_state, info));
131
15
    }
132
133
15
    _scan_ranges = scan_range;
134
15
    auto* parent_profile = _state->get_sink_local_state()->operator_profile();
135
136
32
    for (int op_idx = cast_set<int>(_operators.size() - 1); op_idx >= 0; op_idx--) {
137
17
        auto& op = _operators[op_idx];
138
17
        LocalStateInfo info {parent_profile, _scan_ranges, get_op_shared_state(op->operator_id()),
139
17
                             _shared_state_map, _task_idx};
140
17
        RETURN_IF_ERROR(op->setup_local_state(_state, info));
141
17
        parent_profile = _state->get_local_state(op->operator_id())->operator_profile();
142
17
    }
143
15
    {
144
15
        const auto& deps =
145
15
                _state->get_local_state(_source->operator_id())->execution_dependencies();
146
15
        std::unique_lock<std::mutex> lc(_dependency_lock);
147
15
        std::copy(deps.begin(), deps.end(),
148
15
                  std::inserter(_execution_dependencies, _execution_dependencies.end()));
149
15
    }
150
15
    if (auto fragment = _fragment_context.lock()) {
151
14
        if (fragment->get_query_ctx()->is_cancelled()) {
152
0
            terminate();
153
0
            return fragment->get_query_ctx()->exec_status();
154
0
        }
155
14
    } else {
156
1
        return Status::InternalError("Fragment already finished! Query: {}", print_id(_query_id));
157
1
    }
158
14
    _block = doris::vectorized::Block::create_unique();
159
14
    return _state_transition(State::RUNNABLE);
160
15
}
161
162
13
Status PipelineTask::_extract_dependencies() {
163
13
    std::vector<std::vector<Dependency*>> read_dependencies;
164
13
    std::vector<Dependency*> write_dependencies;
165
13
    std::vector<Dependency*> finish_dependencies;
166
13
    read_dependencies.resize(_operators.size());
167
13
    size_t i = 0;
168
15
    for (auto& op : _operators) {
169
15
        auto result = _state->get_local_state_result(op->operator_id());
170
15
        if (!result) {
171
1
            return result.error();
172
1
        }
173
14
        auto* local_state = result.value();
174
14
        read_dependencies[i] = local_state->dependencies();
175
14
        auto* fin_dep = local_state->finishdependency();
176
14
        if (fin_dep) {
177
7
            finish_dependencies.push_back(fin_dep);
178
7
        }
179
14
        i++;
180
14
    }
181
12
    DBUG_EXECUTE_IF("fault_inject::PipelineXTask::_extract_dependencies", {
182
12
        Status status = Status::Error<INTERNAL_ERROR>(
183
12
                "fault_inject pipeline_task _extract_dependencies failed");
184
12
        return status;
185
12
    });
186
12
    {
187
12
        auto* local_state = _state->get_sink_local_state();
188
12
        write_dependencies = local_state->dependencies();
189
12
        auto* fin_dep = local_state->finishdependency();
190
12
        if (fin_dep) {
191
12
            finish_dependencies.push_back(fin_dep);
192
12
        }
193
12
    }
194
12
    {
195
12
        std::unique_lock<std::mutex> lc(_dependency_lock);
196
12
        read_dependencies.swap(_read_dependencies);
197
12
        write_dependencies.swap(_write_dependencies);
198
12
        finish_dependencies.swap(_finish_dependencies);
199
12
    }
200
12
    return Status::OK();
201
12
}
202
203
6
bool PipelineTask::inject_shared_state(std::shared_ptr<BasicSharedState> shared_state) {
204
6
    if (!shared_state) {
205
1
        return false;
206
1
    }
207
    // Shared state is created by upstream task's sink operator and shared by source operator of
208
    // this task.
209
7
    for (auto& op : _operators) {
210
7
        if (shared_state->related_op_ids.contains(op->operator_id())) {
211
3
            _op_shared_states.insert({op->operator_id(), shared_state});
212
3
            return true;
213
3
        }
214
7
    }
215
    // Shared state is created by the first sink operator and shared by sink operator of this task.
216
    // For example, Set operations.
217
2
    if (shared_state->related_op_ids.contains(_sink->dests_id().front())) {
218
1
        DCHECK_EQ(_sink_shared_state, nullptr)
219
0
                << " Sink: " << _sink->get_name() << " dest id: " << _sink->dests_id().front();
220
1
        _sink_shared_state = shared_state;
221
1
        return true;
222
1
    }
223
1
    return false;
224
2
}
225
226
15
void PipelineTask::_init_profile() {
227
15
    _task_profile = std::make_unique<RuntimeProfile>(fmt::format("PipelineTask(index={})", _index));
228
15
    _parent_profile->add_child(_task_profile.get(), true, nullptr);
229
15
    _task_cpu_timer = ADD_TIMER(_task_profile, "TaskCpuTime");
230
231
15
    static const char* exec_time = "ExecuteTime";
232
15
    _exec_timer = ADD_TIMER(_task_profile, exec_time);
233
15
    _prepare_timer = ADD_CHILD_TIMER(_task_profile, "PrepareTime", exec_time);
234
15
    _open_timer = ADD_CHILD_TIMER(_task_profile, "OpenTime", exec_time);
235
15
    _get_block_timer = ADD_CHILD_TIMER(_task_profile, "GetBlockTime", exec_time);
236
15
    _get_block_counter = ADD_COUNTER(_task_profile, "GetBlockCounter", TUnit::UNIT);
237
15
    _sink_timer = ADD_CHILD_TIMER(_task_profile, "SinkTime", exec_time);
238
15
    _close_timer = ADD_CHILD_TIMER(_task_profile, "CloseTime", exec_time);
239
240
15
    _wait_worker_timer = ADD_TIMER_WITH_LEVEL(_task_profile, "WaitWorkerTime", 1);
241
242
15
    _schedule_counts = ADD_COUNTER(_task_profile, "NumScheduleTimes", TUnit::UNIT);
243
15
    _yield_counts = ADD_COUNTER(_task_profile, "NumYieldTimes", TUnit::UNIT);
244
15
    _core_change_times = ADD_COUNTER(_task_profile, "CoreChangeTimes", TUnit::UNIT);
245
15
    _memory_reserve_times = ADD_COUNTER(_task_profile, "MemoryReserveTimes", TUnit::UNIT);
246
15
    _memory_reserve_failed_times =
247
15
            ADD_COUNTER(_task_profile, "MemoryReserveFailedTimes", TUnit::UNIT);
248
15
}
249
250
6
void PipelineTask::_fresh_profile_counter() {
251
6
    COUNTER_SET(_schedule_counts, (int64_t)_schedule_time);
252
6
    COUNTER_SET(_wait_worker_timer, (int64_t)_wait_worker_watcher.elapsed_time());
253
6
}
254
255
12
Status PipelineTask::_open() {
256
12
    SCOPED_TIMER(_task_profile->total_time_counter());
257
12
    SCOPED_CPU_TIMER(_task_cpu_timer);
258
12
    SCOPED_TIMER(_open_timer);
259
12
    _dry_run = _sink->should_dry_run(_state);
260
14
    for (auto& o : _operators) {
261
14
        RETURN_IF_ERROR(_state->get_local_state(o->operator_id())->open(_state));
262
14
    }
263
12
    RETURN_IF_ERROR(_state->get_sink_local_state()->open(_state));
264
12
    RETURN_IF_ERROR(_extract_dependencies());
265
12
    DBUG_EXECUTE_IF("fault_inject::PipelineXTask::open", {
266
12
        Status status = Status::Error<INTERNAL_ERROR>("fault_inject pipeline_task open failed");
267
12
        return status;
268
12
    });
269
12
    _opened = true;
270
12
    return Status::OK();
271
12
}
272
273
60
Status PipelineTask::_prepare() {
274
60
    SCOPED_TIMER(_task_profile->total_time_counter());
275
60
    SCOPED_CPU_TIMER(_task_cpu_timer);
276
72
    for (auto& o : _operators) {
277
72
        RETURN_IF_ERROR(_state->get_local_state(o->operator_id())->prepare(_state));
278
72
    }
279
60
    RETURN_IF_ERROR(_state->get_sink_local_state()->prepare(_state));
280
60
    return Status::OK();
281
60
}
282
283
41
bool PipelineTask::_wait_to_start() {
284
    // Before task starting, we should make sure
285
    // 1. Execution dependency is ready (which is controlled by FE 2-phase commit)
286
    // 2. Runtime filter dependencies are ready
287
    // 3. All tablets are loaded into local storage
288
41
    return std::any_of(
289
41
            _execution_dependencies.begin(), _execution_dependencies.end(),
290
54
            [&](Dependency* dep) -> bool { return dep->is_blocked_by(shared_from_this()); });
291
41
}
292
293
14
bool PipelineTask::_is_pending_finish() {
294
    // Spilling may be in progress if eos is true.
295
17
    return std::ranges::any_of(_finish_dependencies, [&](Dependency* dep) -> bool {
296
17
        return dep->is_blocked_by(shared_from_this());
297
17
    });
298
14
}
299
300
0
bool PipelineTask::is_blockable() const {
301
    // Before task starting, we should make sure
302
    // 1. Execution dependency is ready (which is controlled by FE 2-phase commit)
303
    // 2. Runtime filter dependencies are ready
304
    // 3. All tablets are loaded into local storage
305
306
0
    if (_state->enable_fuzzy_blockable_task()) {
307
0
        if ((_schedule_time + _task_idx) % 2 == 0) {
308
0
            return true;
309
0
        }
310
0
    }
311
312
0
    return _need_to_revoke_memory ||
313
0
           std::ranges::any_of(_operators,
314
0
                               [&](OperatorPtr op) -> bool { return op->is_blockable(_state); }) ||
315
0
           _sink->is_blockable(_state);
316
0
}
317
318
1.03M
bool PipelineTask::_is_blocked() {
319
1.03M
    if (_need_to_revoke_memory) {
320
0
        return false;
321
0
    }
322
323
    // `_dry_run = true` means we do not need data from source operator.
324
1.03M
    if (!_dry_run) {
325
2.06M
        for (int i = cast_set<int>(_read_dependencies.size() - 1); i >= 0; i--) {
326
            // `_read_dependencies` is organized according to operators. For each operator, running condition is met iff all dependencies are ready.
327
1.03M
            for (auto* dep : _read_dependencies[i]) {
328
1.03M
                if (dep->is_blocked_by(shared_from_this())) {
329
14
                    return true;
330
14
                }
331
1.03M
            }
332
            // If all dependencies are ready for this operator, we can execute this task if no datum is needed from upstream operators.
333
1.03M
            if (!_operators[i]->need_more_input_data(_state)) {
334
2
                break;
335
2
            }
336
1.03M
        }
337
1.03M
    }
338
1.03M
    return _memory_sufficient_dependency->is_blocked_by(shared_from_this()) ||
339
1.03M
           std::ranges::any_of(_write_dependencies, [&](Dependency* dep) -> bool {
340
1.03M
               return dep->is_blocked_by(shared_from_this());
341
1.03M
           });
342
1.03M
}
343
344
4
void PipelineTask::terminate() {
345
    // We use a lock to assure all dependencies are not deconstructed here.
346
4
    std::unique_lock<std::mutex> lc(_dependency_lock);
347
4
    auto fragment = _fragment_context.lock();
348
4
    if (!is_finalized() && fragment) {
349
4
        try {
350
4
            DCHECK(_wake_up_early || fragment->is_canceled());
351
4
            std::ranges::for_each(_write_dependencies,
352
4
                                  [&](Dependency* dep) { dep->set_always_ready(); });
353
4
            std::ranges::for_each(_finish_dependencies,
354
8
                                  [&](Dependency* dep) { dep->set_always_ready(); });
355
4
            std::ranges::for_each(_read_dependencies, [&](std::vector<Dependency*>& deps) {
356
4
                std::ranges::for_each(deps, [&](Dependency* dep) { dep->set_always_ready(); });
357
4
            });
358
            // All `_execution_deps` will never be set blocking from ready. So we just set ready here.
359
4
            std::ranges::for_each(_execution_dependencies,
360
8
                                  [&](Dependency* dep) { dep->set_ready(); });
361
4
            _memory_sufficient_dependency->set_ready();
362
4
        } catch (const doris::Exception& e) {
363
0
            LOG(WARNING) << "Terminate failed: " << e.code() << ", " << e.to_string();
364
0
        }
365
4
    }
366
4
}
367
368
/**
369
 * `_eos` indicates whether the execution phase is done. `done` indicates whether we could close
370
 * this task.
371
 *
372
 * For example,
373
 * 1. if `_eos` is false which means we should continue to get next block so we cannot close (e.g.
374
 *    `done` is false)
375
 * 2. if `_eos` is true which means all blocks from source are exhausted but `_is_pending_finish()`
376
 *    is true which means we should wait for a pending dependency ready (maybe a running rpc), so we
377
 *    cannot close (e.g. `done` is false)
378
 * 3. if `_eos` is true which means all blocks from source are exhausted and `_is_pending_finish()`
379
 *    is false which means we can close immediately (e.g. `done` is true)
380
 * @param done
381
 * @return
382
 */
383
33
Status PipelineTask::execute(bool* done) {
384
33
    if (!_need_to_revoke_memory && (_exec_state != State::RUNNABLE || _blocked_dep != nullptr))
385
1
            [[unlikely]] {
386
1
        return Status::InternalError("Pipeline task is not runnable! Task info: {}",
387
1
                                     debug_string());
388
1
    }
389
32
    auto fragment_context = _fragment_context.lock();
390
32
    if (!fragment_context) {
391
0
        return Status::InternalError("Fragment already finished! Query: {}", print_id(_query_id));
392
0
    }
393
32
    int64_t time_spent = 0;
394
32
    ThreadCpuStopWatch cpu_time_stop_watch;
395
32
    cpu_time_stop_watch.start();
396
32
    SCOPED_ATTACH_TASK(_state);
397
32
    Defer running_defer {[&]() {
398
32
        int64_t delta_cpu_time = cpu_time_stop_watch.elapsed_time();
399
32
        _task_cpu_timer->update(delta_cpu_time);
400
32
        fragment_context->get_query_ctx()->resource_ctx()->cpu_context()->update_cpu_cost_ms(
401
32
                delta_cpu_time);
402
403
        // If task is woke up early, we should terminate all operators, and this task could be closed immediately.
404
32
        if (_wake_up_early) {
405
3
            terminate();
406
3
            THROW_IF_ERROR(_root->terminate(_state));
407
3
            THROW_IF_ERROR(_sink->terminate(_state));
408
3
            _eos = true;
409
3
            *done = true;
410
29
        } else if (_eos && !_spilling &&
411
29
                   (fragment_context->is_canceled() || !_is_pending_finish())) {
412
8
            *done = true;
413
8
        }
414
32
    }};
415
32
    const auto query_id = _state->query_id();
416
    // If this task is already EOS and block is empty (which means we already output all blocks),
417
    // just return here.
418
32
    if (_eos && !_spilling) {
419
1
        return Status::OK();
420
1
    }
421
    // If this task is blocked by a spilling request and waken up immediately, the spilling
422
    // dependency will not block this task and we should just run here.
423
31
    if (!_block->empty()) {
424
0
        LOG(INFO) << "Query: " << print_id(query_id) << " has pending block, size: "
425
0
                  << PrettyPrinter::print_bytes(_block->allocated_bytes());
426
0
        DCHECK(_spilling);
427
0
    }
428
429
31
    SCOPED_TIMER(_task_profile->total_time_counter());
430
31
    SCOPED_TIMER(_exec_timer);
431
432
31
    if (!_wake_up_early) {
433
31
        RETURN_IF_ERROR(_prepare());
434
31
    }
435
31
    DBUG_EXECUTE_IF("fault_inject::PipelineXTask::execute", {
436
31
        Status status = Status::Error<INTERNAL_ERROR>("fault_inject pipeline_task execute failed");
437
31
        return status;
438
31
    });
439
    // `_wake_up_early` must be after `_wait_to_start()`
440
31
    if (_wait_to_start() || _wake_up_early) {
441
2
        return Status::OK();
442
2
    }
443
29
    RETURN_IF_ERROR(_prepare());
444
445
    // The status must be runnable
446
29
    if (!_opened && !fragment_context->is_canceled()) {
447
11
        DBUG_EXECUTE_IF("PipelineTask::execute.open_sleep", {
448
11
            auto required_pipeline_id =
449
11
                    DebugPoints::instance()->get_debug_param_or_default<int32_t>(
450
11
                            "PipelineTask::execute.open_sleep", "pipeline_id", -1);
451
11
            auto required_task_id = DebugPoints::instance()->get_debug_param_or_default<int32_t>(
452
11
                    "PipelineTask::execute.open_sleep", "task_id", -1);
453
11
            if (required_pipeline_id == pipeline_id() && required_task_id == task_id()) {
454
11
                LOG(WARNING) << "PipelineTask::execute.open_sleep sleep 5s";
455
11
                sleep(5);
456
11
            }
457
11
        });
458
459
11
        SCOPED_RAW_TIMER(&time_spent);
460
11
        RETURN_IF_ERROR(_open());
461
11
    }
462
463
1.03M
    while (!fragment_context->is_canceled()) {
464
1.03M
        SCOPED_RAW_TIMER(&time_spent);
465
1.03M
        Defer defer {[&]() {
466
            // If this run is pended by a spilling request, the block will be output in next run.
467
1.03M
            if (!_spilling) {
468
1.03M
                _block->clear_column_data(_root->row_desc().num_materialized_slots());
469
1.03M
            }
470
1.03M
        }};
471
        // `_wake_up_early` must be after `_is_blocked()`
472
1.03M
        if (_is_blocked() || _wake_up_early) {
473
16
            return Status::OK();
474
16
        }
475
476
        /// When a task is cancelled,
477
        /// its blocking state will be cleared and it will transition to a ready state (though it is not truly ready).
478
        /// Here, checking whether it is cancelled to prevent tasks in a blocking state from being re-executed.
479
1.03M
        if (fragment_context->is_canceled()) {
480
0
            break;
481
0
        }
482
483
1.03M
        if (_need_to_revoke_memory) {
484
0
            _need_to_revoke_memory = false;
485
0
            return _sink->revoke_memory(_state, _spill_context);
486
0
        }
487
488
1.03M
        if (time_spent > _exec_time_slice) {
489
3
            COUNTER_UPDATE(_yield_counts, 1);
490
3
            break;
491
3
        }
492
1.03M
        auto* block = _block.get();
493
494
1.03M
        DBUG_EXECUTE_IF("fault_inject::PipelineXTask::executing", {
495
1.03M
            Status status =
496
1.03M
                    Status::Error<INTERNAL_ERROR>("fault_inject pipeline_task executing failed");
497
1.03M
            return status;
498
1.03M
        });
499
500
        // `_sink->is_finished(_state)` means sink operator should be finished
501
1.03M
        if (_sink->is_finished(_state)) {
502
1
            set_wake_up_early();
503
1
            return Status::OK();
504
1
        }
505
506
        // `_dry_run` means sink operator need no more data
507
1.03M
        _eos = _dry_run || _eos;
508
1.03M
        _spilling = false;
509
1.03M
        auto workload_group = _state->workload_group();
510
        // If last run is pended by a spilling request, `_block` is produced with some rows in last
511
        // run, so we will resume execution using the block.
512
1.03M
        if (!_eos && _block->empty()) {
513
1.03M
            SCOPED_TIMER(_get_block_timer);
514
1.03M
            if (_state->low_memory_mode()) {
515
1.58k
                _sink->set_low_memory_mode(_state);
516
1.58k
                _root->set_low_memory_mode(_state);
517
1.58k
            }
518
1.03M
            DEFER_RELEASE_RESERVED();
519
1.03M
            _get_block_counter->update(1);
520
1.03M
            const auto reserve_size = _root->get_reserve_mem_size(_state);
521
1.03M
            _root->reset_reserve_mem_size(_state);
522
523
1.03M
            if (workload_group &&
524
1.03M
                _state->get_query_ctx()
525
2.65k
                        ->resource_ctx()
526
2.65k
                        ->task_controller()
527
2.65k
                        ->is_enable_reserve_memory() &&
528
1.03M
                reserve_size > 0) {
529
2.65k
                if (!_try_to_reserve_memory(reserve_size, _root)) {
530
1.07k
                    continue;
531
1.07k
                }
532
2.65k
            }
533
534
1.03M
            bool eos = false;
535
1.03M
            RETURN_IF_ERROR(_root->get_block_after_projects(_state, block, &eos));
536
1.03M
            RETURN_IF_ERROR(block->check_type_and_column());
537
1.03M
            _eos = eos;
538
1.03M
        }
539
540
1.03M
        if (!_block->empty() || _eos) {
541
1.05k
            SCOPED_TIMER(_sink_timer);
542
1.05k
            Status status = Status::OK();
543
1.05k
            DEFER_RELEASE_RESERVED();
544
1.05k
            COUNTER_UPDATE(_memory_reserve_times, 1);
545
1.05k
            if (_state->get_query_ctx()
546
1.05k
                        ->resource_ctx()
547
1.05k
                        ->task_controller()
548
1.05k
                        ->is_enable_reserve_memory() &&
549
1.05k
                workload_group && !(_wake_up_early || _dry_run)) {
550
1.04k
                const auto sink_reserve_size = _sink->get_reserve_mem_size(_state, _eos);
551
1.04k
                if (sink_reserve_size > 0 &&
552
1.04k
                    !_try_to_reserve_memory(sink_reserve_size, _sink.get())) {
553
1.04k
                    continue;
554
1.04k
                }
555
1.04k
            }
556
557
15
            if (_eos) {
558
9
                RETURN_IF_ERROR(close(Status::OK(), false));
559
9
            }
560
561
15
            DBUG_EXECUTE_IF("PipelineTask::execute.sink_eos_sleep", {
562
15
                auto required_pipeline_id =
563
15
                        DebugPoints::instance()->get_debug_param_or_default<int32_t>(
564
15
                                "PipelineTask::execute.sink_eos_sleep", "pipeline_id", -1);
565
15
                auto required_task_id =
566
15
                        DebugPoints::instance()->get_debug_param_or_default<int32_t>(
567
15
                                "PipelineTask::execute.sink_eos_sleep", "task_id", -1);
568
15
                if (required_pipeline_id == pipeline_id() && required_task_id == task_id()) {
569
15
                    LOG(WARNING) << "PipelineTask::execute.sink_eos_sleep sleep 10s";
570
15
                    sleep(10);
571
15
                }
572
15
            });
573
574
15
            DBUG_EXECUTE_IF("PipelineTask::execute.terminate", {
575
15
                if (_eos) {
576
15
                    auto required_pipeline_id =
577
15
                            DebugPoints::instance()->get_debug_param_or_default<int32_t>(
578
15
                                    "PipelineTask::execute.terminate", "pipeline_id", -1);
579
15
                    auto required_task_id =
580
15
                            DebugPoints::instance()->get_debug_param_or_default<int32_t>(
581
15
                                    "PipelineTask::execute.terminate", "task_id", -1);
582
15
                    auto required_fragment_id =
583
15
                            DebugPoints::instance()->get_debug_param_or_default<int32_t>(
584
15
                                    "PipelineTask::execute.terminate", "fragment_id", -1);
585
15
                    if (required_pipeline_id == pipeline_id() && required_task_id == task_id() &&
586
15
                        fragment_context->get_fragment_id() == required_fragment_id) {
587
15
                        _wake_up_early = true;
588
15
                        terminate();
589
15
                    } else if (required_pipeline_id == pipeline_id() &&
590
15
                               fragment_context->get_fragment_id() == required_fragment_id) {
591
15
                        LOG(WARNING) << "PipelineTask::execute.terminate sleep 5s";
592
15
                        sleep(5);
593
15
                    }
594
15
                }
595
15
            });
596
15
            RETURN_IF_ERROR(block->check_type_and_column());
597
15
            status = _sink->sink(_state, block, _eos);
598
599
15
            if (status.is<ErrorCode::END_OF_FILE>()) {
600
1
                set_wake_up_early();
601
1
                return Status::OK();
602
14
            } else if (!status) {
603
0
                return status;
604
0
            }
605
606
14
            if (_eos) { // just return, the scheduler will do finish work
607
8
                return Status::OK();
608
8
            }
609
14
        }
610
1.03M
    }
611
612
3
    RETURN_IF_ERROR(_state->get_query_ctx()->get_pipe_exec_scheduler()->submit(shared_from_this()));
613
3
    return Status::OK();
614
3
}
615
616
3.70k
bool PipelineTask::_try_to_reserve_memory(const size_t reserve_size, OperatorBase* op) {
617
3.70k
    auto st = thread_context()->thread_mem_tracker_mgr->try_reserve(reserve_size);
618
3.70k
    COUNTER_UPDATE(_memory_reserve_times, 1);
619
3.70k
    auto sink_revocable_mem_size = _sink->revocable_mem_size(_state);
620
3.70k
    if (st.ok() && _state->enable_force_spill() && _sink->is_spillable() &&
621
3.70k
        sink_revocable_mem_size >= vectorized::SpillStream::MIN_SPILL_WRITE_BATCH_MEM) {
622
0
        st = Status(ErrorCode::QUERY_MEMORY_EXCEEDED, "Force Spill");
623
0
    }
624
3.70k
    if (!st.ok()) {
625
3.70k
        COUNTER_UPDATE(_memory_reserve_failed_times, 1);
626
3.70k
        auto debug_msg = fmt::format(
627
3.70k
                "Query: {} , try to reserve: {}, operator name: {}, operator "
628
3.70k
                "id: {}, task id: {}, root revocable mem size: {}, sink revocable mem"
629
3.70k
                "size: {}, failed: {}",
630
3.70k
                print_id(_query_id), PrettyPrinter::print_bytes(reserve_size), op->get_name(),
631
3.70k
                op->node_id(), _state->task_id(),
632
3.70k
                PrettyPrinter::print_bytes(op->revocable_mem_size(_state)),
633
3.70k
                PrettyPrinter::print_bytes(sink_revocable_mem_size), st.to_string());
634
        // PROCESS_MEMORY_EXCEEDED error msg alread contains process_mem_log_str
635
3.70k
        if (!st.is<ErrorCode::PROCESS_MEMORY_EXCEEDED>()) {
636
3.70k
            debug_msg +=
637
3.70k
                    fmt::format(", debug info: {}", GlobalMemoryArbitrator::process_mem_log_str());
638
3.70k
        }
639
3.70k
        LOG_EVERY_N(INFO, 100) << debug_msg;
640
        // If sink has enough revocable memory, trigger revoke memory
641
3.70k
        if (sink_revocable_mem_size >= vectorized::SpillStream::MIN_SPILL_WRITE_BATCH_MEM) {
642
2.11k
            LOG(INFO) << fmt::format(
643
2.11k
                    "Query: {} sink: {}, node id: {}, task id: "
644
2.11k
                    "{}, revocable mem size: {}",
645
2.11k
                    print_id(_query_id), _sink->get_name(), _sink->node_id(), _state->task_id(),
646
2.11k
                    PrettyPrinter::print_bytes(sink_revocable_mem_size));
647
2.11k
            ExecEnv::GetInstance()->workload_group_mgr()->add_paused_query(
648
2.11k
                    _state->get_query_ctx()->resource_ctx()->shared_from_this(), reserve_size, st);
649
2.11k
            _spilling = true;
650
2.11k
            return false;
651
2.11k
        } else {
652
            // If reserve failed, not add this query to paused list, because it is very small, will not
653
            // consume a lot of memory. But need set low memory mode to indicate that the system should
654
            // not use too much memory.
655
1.58k
            _state->get_query_ctx()->set_low_memory_mode();
656
1.58k
        }
657
3.70k
    }
658
1.58k
    return true;
659
3.70k
}
660
661
84.6k
void PipelineTask::stop_if_finished() {
662
84.6k
    auto fragment = _fragment_context.lock();
663
84.6k
    if (!fragment) {
664
0
        return;
665
0
    }
666
84.6k
    SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(fragment->get_query_ctx()->query_mem_tracker());
667
84.6k
    if (auto sink = _sink) {
668
84.6k
        if (sink->is_finished(_state)) {
669
0
            set_wake_up_early();
670
0
            terminate();
671
0
        }
672
84.6k
    }
673
84.6k
}
674
675
1
Status PipelineTask::finalize() {
676
1
    auto fragment = _fragment_context.lock();
677
1
    if (!fragment) {
678
0
        return Status::OK();
679
0
    }
680
1
    SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(fragment->get_query_ctx()->query_mem_tracker());
681
1
    std::unique_lock<std::mutex> lc(_dependency_lock);
682
1
    RETURN_IF_ERROR(_state_transition(State::FINALIZED));
683
1
    _sink_shared_state.reset();
684
1
    _op_shared_states.clear();
685
1
    _shared_state_map.clear();
686
1
    _block.reset();
687
1
    _operators.clear();
688
1
    _sink.reset();
689
1
    _pipeline.reset();
690
1
    return Status::OK();
691
1
}
692
693
15
Status PipelineTask::close(Status exec_status, bool close_sink) {
694
15
    int64_t close_ns = 0;
695
15
    Status s;
696
15
    {
697
15
        SCOPED_RAW_TIMER(&close_ns);
698
15
        if (close_sink) {
699
6
            s = _sink->close(_state, exec_status);
700
6
        }
701
19
        for (auto& op : _operators) {
702
19
            auto tem = op->close(_state);
703
19
            if (!tem.ok() && s.ok()) {
704
0
                s = tem;
705
0
            }
706
19
        }
707
15
    }
708
15
    if (_opened) {
709
15
        COUNTER_UPDATE(_close_timer, close_ns);
710
15
        COUNTER_UPDATE(_task_profile->total_time_counter(), close_ns);
711
15
    }
712
713
15
    if (close_sink && _opened) {
714
6
        _task_profile->add_info_string("WakeUpEarly", std::to_string(_wake_up_early.load()));
715
6
        _fresh_profile_counter();
716
6
    }
717
718
15
    if (close_sink) {
719
6
        RETURN_IF_ERROR(_state_transition(State::FINISHED));
720
6
    }
721
15
    return s;
722
15
}
723
724
11.3k
std::string PipelineTask::debug_string() {
725
11.3k
    fmt::memory_buffer debug_string_buffer;
726
727
11.3k
    fmt::format_to(debug_string_buffer, "QueryId: {}\n", print_id(_query_id));
728
11.3k
    fmt::format_to(debug_string_buffer, "InstanceId: {}\n",
729
11.3k
                   print_id(_state->fragment_instance_id()));
730
731
11.3k
    fmt::format_to(debug_string_buffer,
732
11.3k
                   "PipelineTask[id = {}, open = {}, eos = {}, state = {}, dry run = "
733
11.3k
                   "{}, _wake_up_early = {}, _wake_up_by = {}, time elapsed since last state "
734
11.3k
                   "changing = {}s, spilling = {}, is running = {}]",
735
11.3k
                   _index, _opened, _eos, _to_string(_exec_state), _dry_run, _wake_up_early.load(),
736
11.3k
                   _wake_by, _state_change_watcher.elapsed_time() / NANOS_PER_SEC, _spilling,
737
11.3k
                   is_running());
738
11.3k
    std::unique_lock<std::mutex> lc(_dependency_lock);
739
11.3k
    auto* cur_blocked_dep = _blocked_dep;
740
11.3k
    auto fragment = _fragment_context.lock();
741
11.3k
    if (is_finalized() || !fragment) {
742
5
        fmt::format_to(debug_string_buffer, " pipeline name = {}", _pipeline_name);
743
5
        return fmt::to_string(debug_string_buffer);
744
5
    }
745
11.3k
    auto elapsed = fragment->elapsed_time() / NANOS_PER_SEC;
746
11.3k
    fmt::format_to(debug_string_buffer, " elapse time = {}s, block dependency = [{}]\n", elapsed,
747
11.3k
                   cur_blocked_dep && !is_finalized() ? cur_blocked_dep->debug_string() : "NULL");
748
749
11.3k
    if (_state && _state->local_runtime_filter_mgr()) {
750
0
        fmt::format_to(debug_string_buffer, "local_runtime_filter_mgr: [{}]\n",
751
0
                       _state->local_runtime_filter_mgr()->debug_string());
752
0
    }
753
754
11.3k
    fmt::format_to(debug_string_buffer, "operators: ");
755
22.6k
    for (size_t i = 0; i < _operators.size(); i++) {
756
11.3k
        fmt::format_to(debug_string_buffer, "\n{}",
757
11.3k
                       _opened && !is_finalized()
758
11.3k
                               ? _operators[i]->debug_string(_state, cast_set<int>(i))
759
11.3k
                               : _operators[i]->debug_string(cast_set<int>(i)));
760
11.3k
    }
761
11.3k
    fmt::format_to(debug_string_buffer, "\n{}\n",
762
11.3k
                   _opened && !is_finalized()
763
11.3k
                           ? _sink->debug_string(_state, cast_set<int>(_operators.size()))
764
11.3k
                           : _sink->debug_string(cast_set<int>(_operators.size())));
765
766
11.3k
    fmt::format_to(debug_string_buffer, "\nRead Dependency Information: \n");
767
768
11.3k
    size_t i = 0;
769
22.6k
    for (; i < _read_dependencies.size(); i++) {
770
22.6k
        for (size_t j = 0; j < _read_dependencies[i].size(); j++) {
771
11.3k
            fmt::format_to(debug_string_buffer, "{}. {}\n", i,
772
11.3k
                           _read_dependencies[i][j]->debug_string(cast_set<int>(i) + 1));
773
11.3k
        }
774
11.3k
    }
775
776
11.3k
    fmt::format_to(debug_string_buffer, "{}. {}\n", i,
777
11.3k
                   _memory_sufficient_dependency->debug_string(cast_set<int>(i++)));
778
779
11.3k
    fmt::format_to(debug_string_buffer, "\nWrite Dependency Information: \n");
780
22.6k
    for (size_t j = 0; j < _write_dependencies.size(); j++, i++) {
781
11.3k
        fmt::format_to(debug_string_buffer, "{}. {}\n", i,
782
11.3k
                       _write_dependencies[j]->debug_string(cast_set<int>(j) + 1));
783
11.3k
    }
784
785
11.3k
    fmt::format_to(debug_string_buffer, "\nExecution Dependency Information: \n");
786
33.9k
    for (size_t j = 0; j < _execution_dependencies.size(); j++, i++) {
787
22.6k
        fmt::format_to(debug_string_buffer, "{}. {}\n", i,
788
22.6k
                       _execution_dependencies[j]->debug_string(cast_set<int>(i) + 1));
789
22.6k
    }
790
791
11.3k
    fmt::format_to(debug_string_buffer, "Finish Dependency Information: \n");
792
33.9k
    for (size_t j = 0; j < _finish_dependencies.size(); j++, i++) {
793
22.6k
        fmt::format_to(debug_string_buffer, "{}. {}\n", i,
794
22.6k
                       _finish_dependencies[j]->debug_string(cast_set<int>(i) + 1));
795
22.6k
    }
796
11.3k
    return fmt::to_string(debug_string_buffer);
797
11.3k
}
798
799
0
size_t PipelineTask::get_revocable_size() const {
800
0
    if (is_finalized() || _running || (_eos && !_spilling)) {
801
0
        return 0;
802
0
    }
803
804
0
    return _sink->revocable_mem_size(_state);
805
0
}
806
807
0
Status PipelineTask::revoke_memory(const std::shared_ptr<SpillContext>& spill_context) {
808
0
    if (is_finalized()) {
809
0
        if (spill_context) {
810
0
            spill_context->on_task_finished();
811
0
            VLOG_DEBUG << "Query: " << print_id(_state->query_id()) << ", task: " << ((void*)this)
812
0
                       << " finalized";
813
0
        }
814
0
        return Status::OK();
815
0
    }
816
817
0
    const auto revocable_size = _sink->revocable_mem_size(_state);
818
0
    if (revocable_size >= vectorized::SpillStream::MIN_SPILL_WRITE_BATCH_MEM) {
819
0
        _need_to_revoke_memory = true;
820
0
        _spill_context = spill_context;
821
0
        RETURN_IF_ERROR(
822
0
                _state->get_query_ctx()->get_pipe_exec_scheduler()->submit(shared_from_this()));
823
0
    } else if (spill_context) {
824
0
        spill_context->on_task_finished();
825
0
        LOG(INFO) << "Query: " << print_id(_state->query_id()) << ", task: " << ((void*)this)
826
0
                  << " has not enough data to revoke: " << revocable_size;
827
0
    }
828
0
    return Status::OK();
829
0
}
830
831
23
Status PipelineTask::wake_up(Dependency* dep) {
832
    // call by dependency
833
23
    DCHECK_EQ(_blocked_dep, dep) << "dep : " << dep->debug_string(0) << "task: " << debug_string();
834
23
    _blocked_dep = nullptr;
835
23
    auto holder = std::dynamic_pointer_cast<PipelineTask>(shared_from_this());
836
23
    RETURN_IF_ERROR(_state_transition(PipelineTask::State::RUNNABLE));
837
23
    RETURN_IF_ERROR(_state->get_query_ctx()->get_pipe_exec_scheduler()->submit(holder));
838
23
    return Status::OK();
839
23
}
840
841
92
Status PipelineTask::_state_transition(State new_state) {
842
92
    if (_exec_state != new_state) {
843
87
        _state_change_watcher.reset();
844
87
        _state_change_watcher.start();
845
87
    }
846
92
    _task_profile->add_info_string("TaskState", _to_string(new_state));
847
92
    _task_profile->add_info_string("BlockedByDependency", _blocked_dep ? _blocked_dep->name() : "");
848
92
    if (!LEGAL_STATE_TRANSITION[(int)new_state].contains(_exec_state)) {
849
17
        return Status::InternalError(
850
17
                "Task state transition from {} to {} is not allowed! Task info: {}",
851
17
                _to_string(_exec_state), _to_string(new_state), debug_string());
852
17
    }
853
75
    _exec_state = new_state;
854
75
    return Status::OK();
855
92
}
856
857
#include "common/compile_check_end.h"
858
} // namespace doris::pipeline