Coverage Report

Created: 2025-04-15 13:12

/root/doris/be/src/pipeline/pipeline_task.cpp
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "pipeline_task.h"
19
20
#include <fmt/core.h>
21
#include <fmt/format.h>
22
#include <gen_cpp/Metrics_types.h>
23
#include <glog/logging.h>
24
25
#include <ostream>
26
#include <vector>
27
28
#include "common/logging.h"
29
#include "common/status.h"
30
#include "pipeline/dependency.h"
31
#include "pipeline/exec/operator.h"
32
#include "pipeline/exec/scan_operator.h"
33
#include "pipeline/pipeline.h"
34
#include "pipeline/pipeline_fragment_context.h"
35
#include "pipeline/task_queue.h"
36
#include "pipeline/task_scheduler.h"
37
#include "runtime/descriptors.h"
38
#include "runtime/exec_env.h"
39
#include "runtime/query_context.h"
40
#include "runtime/thread_context.h"
41
#include "runtime/workload_group/workload_group_manager.h"
42
#include "util/container_util.hpp"
43
#include "util/defer_op.h"
44
#include "util/mem_info.h"
45
#include "util/runtime_profile.h"
46
#include "util/uid_util.h"
47
#include "vec/core/block.h"
48
#include "vec/spill/spill_stream.h"
49
50
namespace doris {
51
class RuntimeState;
52
} // namespace doris
53
54
namespace doris::pipeline {
55
56
PipelineTask::PipelineTask(PipelinePtr& pipeline, uint32_t task_id, RuntimeState* state,
57
                           std::shared_ptr<PipelineFragmentContext> fragment_context,
58
                           RuntimeProfile* parent_profile,
59
                           std::map<int, std::pair<std::shared_ptr<BasicSharedState>,
60
                                                   std::vector<std::shared_ptr<Dependency>>>>
61
                                   shared_state_map,
62
                           int task_idx)
63
        :
64
#ifdef BE_TEST
65
          _query_id(fragment_context ? fragment_context->get_query_id() : TUniqueId()),
66
#else
67
          _query_id(fragment_context->get_query_id()),
68
#endif
69
          _index(task_id),
70
          _pipeline(pipeline),
71
          _opened(false),
72
          _state(state),
73
          _fragment_context(fragment_context),
74
          _parent_profile(parent_profile),
75
          _operators(pipeline->operators()),
76
          _source(_operators.front().get()),
77
          _root(_operators.back().get()),
78
          _sink(pipeline->sink_shared_pointer()),
79
          _shared_state_map(std::move(shared_state_map)),
80
          _task_idx(task_idx),
81
          _execution_dep(state->get_query_ctx()->get_execution_dependency()),
82
          _memory_sufficient_dependency(state->get_query_ctx()->get_memory_sufficient_dependency()),
83
78.1k
          _pipeline_name(_pipeline->name()) {
84
78.1k
    _pipeline_task_watcher.start();
85
86
78.1k
    if (!_shared_state_map.contains(_sink->dests_id().front())) {
87
78.1k
        auto shared_state = _sink->create_shared_state();
88
78.1k
        if (shared_state) {
89
26
            _sink_shared_state = shared_state;
90
26
        }
91
78.1k
    }
92
78.1k
}
93
94
Status PipelineTask::prepare(const std::vector<TScanRangeParams>& scan_range, const int sender_id,
95
14
                             const TDataSink& tsink) {
96
14
    DCHECK(_sink);
97
14
    _init_profile();
98
14
    SCOPED_TIMER(_task_profile->total_time_counter());
99
14
    SCOPED_CPU_TIMER(_task_cpu_timer);
100
14
    SCOPED_TIMER(_prepare_timer);
101
14
    DBUG_EXECUTE_IF("fault_inject::PipelineXTask::prepare", {
102
14
        Status status = Status::Error<INTERNAL_ERROR>("fault_inject pipeline_task prepare failed");
103
14
        return status;
104
14
    });
105
14
    {
106
        // set sink local state
107
14
        LocalSinkStateInfo info {_task_idx,         _task_profile.get(),
108
14
                                 sender_id,         get_sink_shared_state().get(),
109
14
                                 _shared_state_map, tsink};
110
14
        RETURN_IF_ERROR(_sink->setup_local_state(_state, info));
111
14
    }
112
113
14
    _scan_ranges = scan_range;
114
14
    auto* parent_profile = _state->get_sink_local_state()->profile();
115
116
30
    for (int op_idx = _operators.size() - 1; op_idx >= 0; op_idx--) {
117
16
        auto& op = _operators[op_idx];
118
16
        LocalStateInfo info {parent_profile, _scan_ranges, get_op_shared_state(op->operator_id()),
119
16
                             _shared_state_map, _task_idx};
120
16
        RETURN_IF_ERROR(op->setup_local_state(_state, info));
121
16
        parent_profile = _state->get_local_state(op->operator_id())->profile();
122
16
    }
123
14
    {
124
14
        std::vector<Dependency*> filter_dependencies;
125
14
        const auto& deps = _state->get_local_state(_source->operator_id())->filter_dependencies();
126
14
        std::copy(deps.begin(), deps.end(),
127
14
                  std::inserter(filter_dependencies, filter_dependencies.end()));
128
129
14
        std::unique_lock<std::mutex> lc(_dependency_lock);
130
14
        filter_dependencies.swap(_filter_dependencies);
131
14
    }
132
14
    if (auto fragment = _fragment_context.lock()) {
133
13
        if (fragment->get_query_ctx()->is_cancelled()) {
134
0
            terminate();
135
0
            return fragment->get_query_ctx()->exec_status();
136
0
        }
137
13
    } else {
138
1
        return Status::InternalError("Fragment already finished! Query: {}", print_id(_query_id));
139
1
    }
140
13
    _block = doris::vectorized::Block::create_unique();
141
13
    return _state_transition(State::RUNNABLE);
142
14
}
143
144
12
Status PipelineTask::_extract_dependencies() {
145
12
    std::vector<std::vector<Dependency*>> read_dependencies;
146
12
    std::vector<Dependency*> write_dependencies;
147
12
    std::vector<Dependency*> finish_dependencies;
148
12
    std::vector<Dependency*> spill_dependencies;
149
12
    read_dependencies.resize(_operators.size());
150
12
    size_t i = 0;
151
14
    for (auto& op : _operators) {
152
14
        auto result = _state->get_local_state_result(op->operator_id());
153
14
        if (!result) {
154
1
            return result.error();
155
1
        }
156
13
        auto* local_state = result.value();
157
13
        read_dependencies[i] = local_state->dependencies();
158
13
        auto* fin_dep = local_state->finishdependency();
159
13
        if (fin_dep) {
160
6
            finish_dependencies.push_back(fin_dep);
161
6
        }
162
13
        if (auto* spill_dependency = local_state->spill_dependency()) {
163
6
            spill_dependencies.push_back(spill_dependency);
164
6
        }
165
13
        i++;
166
13
    }
167
11
    DBUG_EXECUTE_IF("fault_inject::PipelineXTask::_extract_dependencies", {
168
11
        Status status = Status::Error<INTERNAL_ERROR>(
169
11
                "fault_inject pipeline_task _extract_dependencies failed");
170
11
        return status;
171
11
    });
172
11
    {
173
11
        auto* local_state = _state->get_sink_local_state();
174
11
        write_dependencies = local_state->dependencies();
175
11
        auto* fin_dep = local_state->finishdependency();
176
11
        if (fin_dep) {
177
11
            finish_dependencies.push_back(fin_dep);
178
11
        }
179
11
        if (auto* spill_dependency = local_state->spill_dependency()) {
180
6
            spill_dependencies.push_back(spill_dependency);
181
6
        }
182
11
    }
183
11
    {
184
11
        std::unique_lock<std::mutex> lc(_dependency_lock);
185
11
        read_dependencies.swap(_read_dependencies);
186
11
        write_dependencies.swap(_write_dependencies);
187
11
        finish_dependencies.swap(_finish_dependencies);
188
11
        spill_dependencies.swap(_spill_dependencies);
189
11
    }
190
11
    return Status::OK();
191
11
}
192
193
14
void PipelineTask::_init_profile() {
194
14
    _task_profile =
195
14
            std::make_unique<RuntimeProfile>(fmt::format("PipelineTask (index={})", _index));
196
14
    _parent_profile->add_child(_task_profile.get(), true, nullptr);
197
14
    _task_cpu_timer = ADD_TIMER(_task_profile, "TaskCpuTime");
198
199
14
    static const char* exec_time = "ExecuteTime";
200
14
    _exec_timer = ADD_TIMER(_task_profile, exec_time);
201
14
    _prepare_timer = ADD_CHILD_TIMER(_task_profile, "PrepareTime", exec_time);
202
14
    _open_timer = ADD_CHILD_TIMER(_task_profile, "OpenTime", exec_time);
203
14
    _get_block_timer = ADD_CHILD_TIMER(_task_profile, "GetBlockTime", exec_time);
204
14
    _get_block_counter = ADD_COUNTER(_task_profile, "GetBlockCounter", TUnit::UNIT);
205
14
    _sink_timer = ADD_CHILD_TIMER(_task_profile, "SinkTime", exec_time);
206
14
    _close_timer = ADD_CHILD_TIMER(_task_profile, "CloseTime", exec_time);
207
208
14
    _wait_worker_timer = ADD_TIMER_WITH_LEVEL(_task_profile, "WaitWorkerTime", 1);
209
210
14
    _schedule_counts = ADD_COUNTER(_task_profile, "NumScheduleTimes", TUnit::UNIT);
211
14
    _yield_counts = ADD_COUNTER(_task_profile, "NumYieldTimes", TUnit::UNIT);
212
14
    _core_change_times = ADD_COUNTER(_task_profile, "CoreChangeTimes", TUnit::UNIT);
213
14
    _memory_reserve_times = ADD_COUNTER(_task_profile, "MemoryReserveTimes", TUnit::UNIT);
214
14
    _memory_reserve_failed_times =
215
14
            ADD_COUNTER(_task_profile, "MemoryReserveFailedTimes", TUnit::UNIT);
216
14
}
217
218
6
void PipelineTask::_fresh_profile_counter() {
219
6
    COUNTER_SET(_schedule_counts, (int64_t)_schedule_time);
220
6
    COUNTER_SET(_wait_worker_timer, (int64_t)_wait_worker_watcher.elapsed_time());
221
6
}
222
223
11
Status PipelineTask::_open() {
224
11
    SCOPED_TIMER(_task_profile->total_time_counter());
225
11
    SCOPED_CPU_TIMER(_task_cpu_timer);
226
11
    SCOPED_TIMER(_open_timer);
227
11
    _dry_run = _sink->should_dry_run(_state);
228
13
    for (auto& o : _operators) {
229
13
        RETURN_IF_ERROR(_state->get_local_state(o->operator_id())->open(_state));
230
13
    }
231
11
    RETURN_IF_ERROR(_state->get_sink_local_state()->open(_state));
232
11
    RETURN_IF_ERROR(_extract_dependencies());
233
11
    DBUG_EXECUTE_IF("fault_inject::PipelineXTask::open", {
234
11
        Status status = Status::Error<INTERNAL_ERROR>("fault_inject pipeline_task open failed");
235
11
        return status;
236
11
    });
237
11
    _opened = true;
238
11
    return Status::OK();
239
11
}
240
241
37
bool PipelineTask::_wait_to_start() {
242
    // Before task starting, we should make sure
243
    // 1. Execution dependency is ready (which is controlled by FE 2-phase commit)
244
    // 2. Runtime filter dependencies are ready
245
37
    return _execution_dep->is_blocked_by(shared_from_this()) ||
246
37
           std::any_of(
247
31
                   _filter_dependencies.begin(), _filter_dependencies.end(),
248
31
                   [&](Dependency* dep) -> bool { return dep->is_blocked_by(shared_from_this()); });
249
37
}
250
251
13
bool PipelineTask::_is_pending_finish() {
252
    // Spilling may be in progress if eos is true.
253
13
    return std::any_of(_spill_dependencies.begin(), _spill_dependencies.end(),
254
13
                       [&](Dependency* dep) -> bool {
255
6
                           return dep->is_blocked_by(shared_from_this());
256
6
                       }) ||
257
13
           std::any_of(
258
13
                   _finish_dependencies.begin(), _finish_dependencies.end(),
259
15
                   [&](Dependency* dep) -> bool { return dep->is_blocked_by(shared_from_this()); });
260
13
}
261
262
516k
bool PipelineTask::_is_blocked() {
263
    // `_dry_run = true` means we do not need data from source operator.
264
516k
    if (!_dry_run) {
265
1.03M
        for (int i = _read_dependencies.size() - 1; i >= 0; i--) {
266
            // `_read_dependencies` is organized according to operators. For each operator, running condition is met iff all dependencies are ready.
267
516k
            for (auto* dep : _read_dependencies[i]) {
268
516k
                if (dep->is_blocked_by(shared_from_this())) {
269
13
                    return true;
270
13
                }
271
516k
            }
272
            // If all dependencies are ready for this operator, we can execute this task if no datum is needed from upstream operators.
273
516k
            if (!_operators[i]->need_more_input_data(_state)) {
274
2
                break;
275
2
            }
276
516k
        }
277
516k
    }
278
516k
    return std::any_of(_spill_dependencies.begin(), _spill_dependencies.end(),
279
1.03M
                       [&](Dependency* dep) -> bool {
280
1.03M
                           return dep->is_blocked_by(shared_from_this());
281
1.03M
                       }) ||
282
516k
           _memory_sufficient_dependency->is_blocked_by(shared_from_this()) ||
283
516k
           std::any_of(
284
516k
                   _write_dependencies.begin(), _write_dependencies.end(),
285
516k
                   [&](Dependency* dep) -> bool { return dep->is_blocked_by(shared_from_this()); });
286
516k
}
287
288
5
void PipelineTask::terminate() {
289
    // We use a lock to assure all dependencies are not deconstructed here.
290
5
    std::unique_lock<std::mutex> lc(_dependency_lock);
291
5
    auto fragment = _fragment_context.lock();
292
5
    if (!is_finalized() && fragment) {
293
5
        DCHECK(_wake_up_early || fragment->is_canceled());
294
5
        std::for_each(_spill_dependencies.begin(), _spill_dependencies.end(),
295
10
                      [&](Dependency* dep) { dep->set_always_ready(); });
296
5
        std::for_each(_filter_dependencies.begin(), _filter_dependencies.end(),
297
5
                      [&](Dependency* dep) { dep->set_always_ready(); });
298
5
        std::for_each(_write_dependencies.begin(), _write_dependencies.end(),
299
5
                      [&](Dependency* dep) { dep->set_always_ready(); });
300
5
        std::for_each(_finish_dependencies.begin(), _finish_dependencies.end(),
301
10
                      [&](Dependency* dep) { dep->set_always_ready(); });
302
5
        std::for_each(_read_dependencies.begin(), _read_dependencies.end(),
303
5
                      [&](std::vector<Dependency*>& deps) {
304
5
                          std::for_each(deps.begin(), deps.end(),
305
5
                                        [&](Dependency* dep) { dep->set_always_ready(); });
306
5
                      });
307
5
        _execution_dep->set_ready();
308
5
        _memory_sufficient_dependency->set_ready();
309
5
    }
310
5
}
311
312
/**
313
 * `_eos` indicates whether the execution phase is done. `done` indicates whether we could close
314
 * this task.
315
 *
316
 * For example,
317
 * 1. if `_eos` is false which means we should continue to get next block so we cannot close (e.g.
318
 *    `done` is false)
319
 * 2. if `_eos` is true which means all blocks from source are exhausted but `_is_pending_finish()`
320
 *    is true which means we should wait for a pending dependency ready (maybe a running rpc), so we
321
 *    cannot close (e.g. `done` is false)
322
 * 3. if `_eos` is true which means all blocks from source are exhausted and `_is_pending_finish()`
323
 *    is false which means we can close immediately (e.g. `done` is true)
324
 * @param done
325
 * @return
326
 */
327
29
Status PipelineTask::execute(bool* done) {
328
29
    if (_exec_state != State::RUNNABLE || _blocked_dep != nullptr) [[unlikely]] {
329
1
        return Status::InternalError("Pipeline task is not runnable! Task info: {}",
330
1
                                     debug_string());
331
1
    }
332
28
    auto fragment_context = _fragment_context.lock();
333
28
    DCHECK(fragment_context);
334
28
    int64_t time_spent = 0;
335
28
    ThreadCpuStopWatch cpu_time_stop_watch;
336
28
    cpu_time_stop_watch.start();
337
28
    SCOPED_ATTACH_TASK(_state);
338
28
    Defer running_defer {[&]() {
339
28
        if (_task_queue) {
340
28
            _task_queue->update_statistics(this, time_spent);
341
28
        }
342
28
        int64_t delta_cpu_time = cpu_time_stop_watch.elapsed_time();
343
28
        _task_cpu_timer->update(delta_cpu_time);
344
28
        fragment_context->get_query_ctx()->resource_ctx()->cpu_context()->update_cpu_cost_ms(
345
28
                delta_cpu_time);
346
347
        // If task is woke up early, we should terminate all operators, and this task could be closed immediately.
348
28
        if (_wake_up_early) {
349
3
            terminate();
350
3
            THROW_IF_ERROR(_root->terminate(_state));
351
3
            THROW_IF_ERROR(_sink->terminate(_state));
352
3
            _eos = true;
353
3
            *done = true;
354
25
        } else if (_eos && !_spilling &&
355
25
                   (fragment_context->is_canceled() || !_is_pending_finish())) {
356
7
            *done = true;
357
7
        }
358
28
    }};
359
28
    const auto query_id = _state->query_id();
360
    // If this task is already EOS and block is empty (which means we already output all blocks),
361
    // just return here.
362
28
    if (_eos && !_spilling) {
363
1
        return Status::OK();
364
1
    }
365
    // If this task is blocked by a spilling request and waken up immediately, the spilling
366
    // dependency will not block this task and we should just run here.
367
27
    if (!_block->empty()) {
368
0
        LOG(INFO) << "Query: " << print_id(query_id) << " has pending block, size: "
369
0
                  << PrettyPrinter::print_bytes(_block->allocated_bytes());
370
0
        DCHECK(_spilling);
371
0
    }
372
373
27
    SCOPED_TIMER(_task_profile->total_time_counter());
374
27
    SCOPED_TIMER(_exec_timer);
375
376
27
    DBUG_EXECUTE_IF("fault_inject::PipelineXTask::execute", {
377
27
        Status status = Status::Error<INTERNAL_ERROR>("fault_inject pipeline_task execute failed");
378
27
        return status;
379
27
    });
380
    // `_wake_up_early` must be after `_wait_to_start()`
381
27
    if (_wait_to_start() || _wake_up_early) {
382
2
        if (config::enable_prefetch_tablet) {
383
2
            RETURN_IF_ERROR(_source->hold_tablets(_state));
384
2
        }
385
2
        return Status::OK();
386
2
    }
387
388
    // The status must be runnable
389
25
    if (!_opened && !fragment_context->is_canceled()) {
390
10
        DBUG_EXECUTE_IF("PipelineTask::execute.open_sleep", {
391
10
            auto required_pipeline_id =
392
10
                    DebugPoints::instance()->get_debug_param_or_default<int32_t>(
393
10
                            "PipelineTask::execute.open_sleep", "pipeline_id", -1);
394
10
            auto required_task_id = DebugPoints::instance()->get_debug_param_or_default<int32_t>(
395
10
                    "PipelineTask::execute.open_sleep", "task_id", -1);
396
10
            if (required_pipeline_id == pipeline_id() && required_task_id == task_id()) {
397
10
                LOG(WARNING) << "PipelineTask::execute.open_sleep sleep 5s";
398
10
                sleep(5);
399
10
            }
400
10
        });
401
402
10
        SCOPED_RAW_TIMER(&time_spent);
403
10
        RETURN_IF_ERROR(_open());
404
10
    }
405
406
516k
    while (!fragment_context->is_canceled()) {
407
516k
        SCOPED_RAW_TIMER(&time_spent);
408
516k
        Defer defer {[&]() {
409
            // If this run is pended by a spilling request, the block will be output in next run.
410
516k
            if (!_spilling) {
411
516k
                _block->clear_column_data(_root->row_desc().num_materialized_slots());
412
516k
            }
413
516k
        }};
414
        // `_wake_up_early` must be after `_is_blocked()`
415
516k
        if (_is_blocked() || _wake_up_early) {
416
16
            return Status::OK();
417
16
        }
418
419
        /// When a task is cancelled,
420
        /// its blocking state will be cleared and it will transition to a ready state (though it is not truly ready).
421
        /// Here, checking whether it is cancelled to prevent tasks in a blocking state from being re-executed.
422
516k
        if (fragment_context->is_canceled()) {
423
0
            break;
424
0
        }
425
426
516k
        if (time_spent > THREAD_TIME_SLICE) {
427
1
            COUNTER_UPDATE(_yield_counts, 1);
428
1
            break;
429
1
        }
430
516k
        auto* block = _block.get();
431
432
516k
        DBUG_EXECUTE_IF("fault_inject::PipelineXTask::executing", {
433
516k
            Status status =
434
516k
                    Status::Error<INTERNAL_ERROR>("fault_inject pipeline_task executing failed");
435
516k
            return status;
436
516k
        });
437
438
        // `_sink->is_finished(_state)` means sink operator should be finished
439
516k
        if (_sink->is_finished(_state)) {
440
0
            set_wake_up_early();
441
0
            return Status::OK();
442
0
        }
443
444
        // `_dry_run` means sink operator need no more data
445
516k
        _eos = _dry_run || _eos;
446
516k
        _spilling = false;
447
516k
        auto workload_group = _state->workload_group();
448
        // If last run is pended by a spilling request, `_block` is produced with some rows in last
449
        // run, so we will resume execution using the block.
450
516k
        if (!_eos && _block->empty()) {
451
516k
            SCOPED_TIMER(_get_block_timer);
452
516k
            if (_state->low_memory_mode()) {
453
1.31k
                _sink->set_low_memory_mode(_state);
454
1.31k
                _root->set_low_memory_mode(_state);
455
1.31k
            }
456
516k
            DEFER_RELEASE_RESERVED();
457
516k
            _get_block_counter->update(1);
458
516k
            const auto reserve_size = _root->get_reserve_mem_size(_state);
459
516k
            _root->reset_reserve_mem_size(_state);
460
461
516k
            if (workload_group && _state->get_query_ctx()->enable_reserve_memory() &&
462
516k
                reserve_size > 0) {
463
1.31k
                if (!_try_to_reserve_memory(reserve_size, _root)) {
464
0
                    continue;
465
0
                }
466
1.31k
            }
467
468
516k
            bool eos = false;
469
516k
            RETURN_IF_ERROR(_root->get_block_after_projects(_state, block, &eos));
470
516k
            _eos = eos;
471
516k
        }
472
473
516k
        if (!_block->empty() || _eos) {
474
14
            SCOPED_TIMER(_sink_timer);
475
14
            Status status = Status::OK();
476
14
            DEFER_RELEASE_RESERVED();
477
14
            COUNTER_UPDATE(_memory_reserve_times, 1);
478
14
            if (_state->get_query_ctx()->enable_reserve_memory() && workload_group &&
479
14
                !(_wake_up_early || _dry_run)) {
480
1
                const auto sink_reserve_size = _sink->get_reserve_mem_size(_state, _eos);
481
1
                if (!_try_to_reserve_memory(sink_reserve_size, _sink.get())) {
482
0
                    continue;
483
0
                }
484
1
            }
485
486
14
            if (_eos) {
487
8
                RETURN_IF_ERROR(close(Status::OK(), false));
488
8
            }
489
490
14
            DBUG_EXECUTE_IF("PipelineTask::execute.sink_eos_sleep", {
491
14
                auto required_pipeline_id =
492
14
                        DebugPoints::instance()->get_debug_param_or_default<int32_t>(
493
14
                                "PipelineTask::execute.sink_eos_sleep", "pipeline_id", -1);
494
14
                auto required_task_id =
495
14
                        DebugPoints::instance()->get_debug_param_or_default<int32_t>(
496
14
                                "PipelineTask::execute.sink_eos_sleep", "task_id", -1);
497
14
                if (required_pipeline_id == pipeline_id() && required_task_id == task_id()) {
498
14
                    LOG(WARNING) << "PipelineTask::execute.sink_eos_sleep sleep 10s";
499
14
                    sleep(10);
500
14
                }
501
14
            });
502
503
14
            DBUG_EXECUTE_IF("PipelineTask::execute.terminate", {
504
14
                if (_eos) {
505
14
                    auto required_pipeline_id =
506
14
                            DebugPoints::instance()->get_debug_param_or_default<int32_t>(
507
14
                                    "PipelineTask::execute.terminate", "pipeline_id", -1);
508
14
                    auto required_task_id =
509
14
                            DebugPoints::instance()->get_debug_param_or_default<int32_t>(
510
14
                                    "PipelineTask::execute.terminate", "task_id", -1);
511
14
                    auto required_fragment_id =
512
14
                            DebugPoints::instance()->get_debug_param_or_default<int32_t>(
513
14
                                    "PipelineTask::execute.terminate", "fragment_id", -1);
514
14
                    if (required_pipeline_id == pipeline_id() && required_task_id == task_id() &&
515
14
                        fragment_context->get_fragment_id() == required_fragment_id) {
516
14
                        _wake_up_early = true;
517
14
                        terminate();
518
14
                    } else if (required_pipeline_id == pipeline_id() &&
519
14
                               fragment_context->get_fragment_id() == required_fragment_id) {
520
14
                        LOG(WARNING) << "PipelineTask::execute.terminate sleep 5s";
521
14
                        sleep(5);
522
14
                    }
523
14
                }
524
14
            });
525
14
            status = _sink->sink(_state, block, _eos);
526
527
14
            if (status.is<ErrorCode::END_OF_FILE>()) {
528
1
                set_wake_up_early();
529
1
                return Status::OK();
530
13
            } else if (!status) {
531
0
                return status;
532
0
            }
533
534
13
            if (_eos) { // just return, the scheduler will do finish work
535
7
                return Status::OK();
536
7
            }
537
13
        }
538
516k
    }
539
540
1
    RETURN_IF_ERROR(get_task_queue()->push_back(shared_from_this()));
541
1
    return Status::OK();
542
1
}
543
544
1.31k
bool PipelineTask::_try_to_reserve_memory(const size_t reserve_size, OperatorBase* op) {
545
1.31k
    auto st = thread_context()->thread_mem_tracker_mgr->try_reserve(reserve_size);
546
1.31k
    COUNTER_UPDATE(_memory_reserve_times, 1);
547
1.31k
    auto sink_revocable_mem_size =
548
1.31k
            reserve_size > 0 ? _sink->revocable_mem_size(_state) : Status::OK();
549
1.31k
    if (st.ok() && _state->enable_force_spill() && _sink->is_spillable() &&
550
1.31k
        sink_revocable_mem_size >= vectorized::SpillStream::MIN_SPILL_WRITE_BATCH_MEM) {
551
0
        st = Status(ErrorCode::QUERY_MEMORY_EXCEEDED, "Force Spill");
552
0
    }
553
1.31k
    if (!st.ok()) {
554
1.31k
        COUNTER_UPDATE(_memory_reserve_failed_times, 1);
555
1.31k
        auto debug_msg = fmt::format(
556
1.31k
                "Query: {} , try to reserve: {}, operator name: {}, operator "
557
1.31k
                "id: {}, task id: {}, root revocable mem size: {}, sink revocable mem"
558
1.31k
                "size: {}, failed: {}",
559
1.31k
                print_id(_query_id), PrettyPrinter::print_bytes(reserve_size), op->get_name(),
560
1.31k
                op->node_id(), _state->task_id(),
561
1.31k
                PrettyPrinter::print_bytes(op->revocable_mem_size(_state)),
562
1.31k
                PrettyPrinter::print_bytes(sink_revocable_mem_size), st.to_string());
563
        // PROCESS_MEMORY_EXCEEDED error msg alread contains process_mem_log_str
564
1.31k
        if (!st.is<ErrorCode::PROCESS_MEMORY_EXCEEDED>()) {
565
1.31k
            debug_msg +=
566
1.31k
                    fmt::format(", debug info: {}", GlobalMemoryArbitrator::process_mem_log_str());
567
1.31k
        }
568
1.31k
        LOG_EVERY_N(INFO, 100) << debug_msg;
569
        // If sink has enough revocable memory, trigger revoke memory
570
1.31k
        if (sink_revocable_mem_size >= vectorized::SpillStream::MIN_SPILL_WRITE_BATCH_MEM) {
571
0
            LOG(INFO) << fmt::format(
572
0
                    "Query: {} sink: {}, node id: {}, task id: "
573
0
                    "{}, revocable mem size: {}",
574
0
                    print_id(_query_id), _sink->get_name(), _sink->node_id(), _state->task_id(),
575
0
                    PrettyPrinter::print_bytes(sink_revocable_mem_size));
576
0
            ExecEnv::GetInstance()->workload_group_mgr()->add_paused_query(
577
0
                    _state->get_query_ctx()->shared_from_this(), reserve_size, st);
578
0
            _spilling = true;
579
0
            return false;
580
1.31k
        } else {
581
            // If reserve failed, not add this query to paused list, because it is very small, will not
582
            // consume a lot of memory. But need set low memory mode to indicate that the system should
583
            // not use too much memory.
584
1.31k
            _state->get_query_ctx()->set_low_memory_mode();
585
1.31k
        }
586
1.31k
    }
587
1.31k
    return true;
588
1.31k
}
589
590
108k
void PipelineTask::stop_if_finished() {
591
108k
    auto fragment = _fragment_context.lock();
592
108k
    if (!fragment) {
593
0
        return;
594
0
    }
595
108k
    SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(fragment->get_query_ctx()->query_mem_tracker());
596
108k
    if (auto sink = _sink) {
597
108k
        if (sink->is_finished(_state)) {
598
1
            set_wake_up_early();
599
1
            terminate();
600
1
        }
601
108k
    }
602
108k
}
603
604
1
Status PipelineTask::finalize() {
605
1
    auto fragment = _fragment_context.lock();
606
1
    if (!fragment) {
607
0
        return Status::OK();
608
0
    }
609
1
    SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(fragment->get_query_ctx()->query_mem_tracker());
610
1
    std::unique_lock<std::mutex> lc(_dependency_lock);
611
1
    RETURN_IF_ERROR(_state_transition(State::FINALIZED));
612
1
    _sink_shared_state.reset();
613
1
    _op_shared_states.clear();
614
1
    _shared_state_map.clear();
615
1
    _block.reset();
616
1
    _operators.clear();
617
1
    _sink.reset();
618
1
    _pipeline.reset();
619
1
    return Status::OK();
620
1
}
621
622
14
Status PipelineTask::close(Status exec_status, bool close_sink) {
623
14
    int64_t close_ns = 0;
624
14
    Status s;
625
14
    {
626
14
        SCOPED_RAW_TIMER(&close_ns);
627
14
        if (close_sink) {
628
6
            s = _sink->close(_state, exec_status);
629
6
        }
630
18
        for (auto& op : _operators) {
631
18
            auto tem = op->close(_state);
632
18
            if (!tem.ok() && s.ok()) {
633
0
                s = tem;
634
0
            }
635
18
        }
636
14
    }
637
14
    if (_opened) {
638
14
        COUNTER_UPDATE(_close_timer, close_ns);
639
14
        COUNTER_UPDATE(_task_profile->total_time_counter(), close_ns);
640
14
    }
641
642
14
    if (close_sink && _opened) {
643
6
        _task_profile->add_info_string("WakeUpEarly", std::to_string(_wake_up_early.load()));
644
6
        _fresh_profile_counter();
645
6
    }
646
647
14
    if (_task_queue) {
648
14
        _task_queue->update_statistics(this, close_ns);
649
14
    }
650
14
    if (close_sink) {
651
6
        RETURN_IF_ERROR(_state_transition(State::FINISHED));
652
6
    }
653
14
    return s;
654
14
}
655
656
4.63k
std::string PipelineTask::debug_string() {
657
4.63k
    fmt::memory_buffer debug_string_buffer;
658
659
4.63k
    fmt::format_to(debug_string_buffer, "QueryId: {}\n", print_id(_query_id));
660
4.63k
    fmt::format_to(debug_string_buffer, "InstanceId: {}\n",
661
4.63k
                   print_id(_state->fragment_instance_id()));
662
663
4.63k
    fmt::format_to(debug_string_buffer,
664
4.63k
                   "PipelineTask[id = {}, open = {}, eos = {}, state = {}, dry run = "
665
4.63k
                   "{}, _wake_up_early = {}, time elapsed since last state changing = {}s, spilling"
666
4.63k
                   " = {}, is running = {}]",
667
4.63k
                   _index, _opened, _eos, _to_string(_exec_state), _dry_run, _wake_up_early.load(),
668
4.63k
                   _state_change_watcher.elapsed_time() / NANOS_PER_SEC, _spilling, is_running());
669
4.63k
    std::unique_lock<std::mutex> lc(_dependency_lock);
670
4.63k
    auto* cur_blocked_dep = _blocked_dep;
671
4.63k
    auto fragment = _fragment_context.lock();
672
4.63k
    if (is_finalized() || !fragment) {
673
5
        fmt::format_to(debug_string_buffer, " pipeline name = {}", _pipeline_name);
674
5
        return fmt::to_string(debug_string_buffer);
675
5
    }
676
4.63k
    auto elapsed = fragment->elapsed_time() / NANOS_PER_SEC;
677
4.63k
    fmt::format_to(debug_string_buffer,
678
4.63k
                   " elapse time = {}s, block dependency = [{}]\noperators: ", elapsed,
679
4.63k
                   cur_blocked_dep && !is_finalized() ? cur_blocked_dep->debug_string() : "NULL");
680
681
9.26k
    for (size_t i = 0; i < _operators.size(); i++) {
682
4.63k
        fmt::format_to(debug_string_buffer, "\n{}",
683
4.63k
                       _opened && !is_finalized() ? _operators[i]->debug_string(_state, i)
684
4.63k
                                                  : _operators[i]->debug_string(i));
685
4.63k
    }
686
4.63k
    fmt::format_to(debug_string_buffer, "\n{}\n",
687
4.63k
                   _opened && !is_finalized() ? _sink->debug_string(_state, _operators.size())
688
4.63k
                                              : _sink->debug_string(_operators.size()));
689
690
4.63k
    fmt::format_to(debug_string_buffer, "\nRead Dependency Information: \n");
691
692
4.63k
    size_t i = 0;
693
9.25k
    for (; i < _read_dependencies.size(); i++) {
694
9.23k
        for (size_t j = 0; j < _read_dependencies[i].size(); j++) {
695
4.61k
            fmt::format_to(debug_string_buffer, "{}. {}\n", i,
696
4.61k
                           _read_dependencies[i][j]->debug_string(i + 1));
697
4.61k
        }
698
4.61k
    }
699
700
4.63k
    fmt::format_to(debug_string_buffer, "{}. {}\n", i,
701
4.63k
                   _memory_sufficient_dependency->debug_string(i++));
702
703
4.63k
    fmt::format_to(debug_string_buffer, "\nWrite Dependency Information: \n");
704
9.25k
    for (size_t j = 0; j < _write_dependencies.size(); j++, i++) {
705
4.61k
        fmt::format_to(debug_string_buffer, "{}. {}\n", i,
706
4.61k
                       _write_dependencies[j]->debug_string(i + 1));
707
4.61k
    }
708
709
4.63k
    fmt::format_to(debug_string_buffer, "\nRuntime Filter Dependency Information: \n");
710
9.26k
    for (size_t j = 0; j < _filter_dependencies.size(); j++, i++) {
711
4.63k
        fmt::format_to(debug_string_buffer, "{}. {}\n", i,
712
4.63k
                       _filter_dependencies[j]->debug_string(i + 1));
713
4.63k
    }
714
715
4.63k
    fmt::format_to(debug_string_buffer, "\nSpill Dependency Information: \n");
716
13.8k
    for (size_t j = 0; j < _spill_dependencies.size(); j++, i++) {
717
9.23k
        fmt::format_to(debug_string_buffer, "{}. {}\n", i,
718
9.23k
                       _spill_dependencies[j]->debug_string(i + 1));
719
9.23k
    }
720
721
4.63k
    fmt::format_to(debug_string_buffer, "Finish Dependency Information: \n");
722
13.8k
    for (size_t j = 0; j < _finish_dependencies.size(); j++, i++) {
723
9.23k
        fmt::format_to(debug_string_buffer, "{}. {}\n", i,
724
9.23k
                       _finish_dependencies[j]->debug_string(j + 1));
725
9.23k
    }
726
4.63k
    return fmt::to_string(debug_string_buffer);
727
4.63k
}
728
729
0
size_t PipelineTask::get_revocable_size() const {
730
0
    if (is_finalized() || _running || (_eos && !_spilling)) {
731
0
        return 0;
732
0
    }
733
734
0
    return _sink->revocable_mem_size(_state);
735
0
}
736
737
0
Status PipelineTask::revoke_memory(const std::shared_ptr<SpillContext>& spill_context) {
738
0
    if (is_finalized()) {
739
0
        if (spill_context) {
740
0
            spill_context->on_task_finished();
741
0
            VLOG_DEBUG << "Query: " << print_id(_state->query_id()) << ", task: " << ((void*)this)
742
0
                       << " finalized";
743
0
        }
744
0
        return Status::OK();
745
0
    }
746
747
0
    const auto revocable_size = _sink->revocable_mem_size(_state);
748
0
    if (revocable_size >= vectorized::SpillStream::MIN_SPILL_WRITE_BATCH_MEM) {
749
0
        RETURN_IF_ERROR(_sink->revoke_memory(_state, spill_context));
750
0
    } else if (spill_context) {
751
0
        spill_context->on_task_finished();
752
0
        LOG(INFO) << "Query: " << print_id(_state->query_id()) << ", task: " << ((void*)this)
753
0
                  << " has not enough data to revoke: " << revocable_size;
754
0
    }
755
0
    return Status::OK();
756
0
}
757
758
22
Status PipelineTask::wake_up(Dependency* dep) {
759
    // call by dependency
760
22
    DCHECK_EQ(_blocked_dep, dep) << "dep : " << dep->debug_string(0) << "task: " << debug_string();
761
22
    _blocked_dep = nullptr;
762
22
    auto holder = std::dynamic_pointer_cast<PipelineTask>(shared_from_this());
763
22
    RETURN_IF_ERROR(_state_transition(PipelineTask::State::RUNNABLE));
764
22
    RETURN_IF_ERROR(get_task_queue()->push_back(holder));
765
22
    return Status::OK();
766
22
}
767
768
89
Status PipelineTask::_state_transition(State new_state) {
769
89
    if (_exec_state != new_state) {
770
84
        _state_change_watcher.reset();
771
84
        _state_change_watcher.start();
772
84
    }
773
89
    _task_profile->add_info_string("TaskState", _to_string(new_state));
774
89
    _task_profile->add_info_string("BlockedByDependency", _blocked_dep ? _blocked_dep->name() : "");
775
89
    if (!LEGAL_STATE_TRANSITION[(int)new_state].contains(_exec_state)) {
776
17
        return Status::InternalError(
777
17
                "Task state transition from {} to {} is not allowed! Task info: {}",
778
17
                _to_string(_exec_state), _to_string(new_state), debug_string());
779
17
    }
780
72
    _exec_state = new_state;
781
72
    return Status::OK();
782
89
}
783
784
} // namespace doris::pipeline