Coverage Report

Created: 2025-06-18 11:42

/root/doris/be/src/pipeline/pipeline_task.cpp
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "pipeline_task.h"
19
20
#include <fmt/core.h>
21
#include <fmt/format.h>
22
#include <gen_cpp/Metrics_types.h>
23
#include <glog/logging.h>
24
25
#include <ostream>
26
#include <vector>
27
28
#include "common/logging.h"
29
#include "common/status.h"
30
#include "pipeline/dependency.h"
31
#include "pipeline/exec/operator.h"
32
#include "pipeline/exec/scan_operator.h"
33
#include "pipeline/pipeline.h"
34
#include "pipeline/pipeline_fragment_context.h"
35
#include "pipeline/task_queue.h"
36
#include "pipeline/task_scheduler.h"
37
#include "runtime/descriptors.h"
38
#include "runtime/exec_env.h"
39
#include "runtime/query_context.h"
40
#include "runtime/thread_context.h"
41
#include "runtime/workload_group/workload_group_manager.h"
42
#include "util/container_util.hpp"
43
#include "util/defer_op.h"
44
#include "util/mem_info.h"
45
#include "util/runtime_profile.h"
46
#include "util/uid_util.h"
47
#include "vec/core/block.h"
48
#include "vec/spill/spill_stream.h"
49
50
namespace doris {
51
class RuntimeState;
52
} // namespace doris
53
54
namespace doris::pipeline {
55
56
PipelineTask::PipelineTask(PipelinePtr& pipeline, uint32_t task_id, RuntimeState* state,
57
                           std::shared_ptr<PipelineFragmentContext> fragment_context,
58
                           RuntimeProfile* parent_profile,
59
                           std::map<int, std::pair<std::shared_ptr<BasicSharedState>,
60
                                                   std::vector<std::shared_ptr<Dependency>>>>
61
                                   shared_state_map,
62
                           int task_idx)
63
        :
64
#ifdef BE_TEST
65
          _query_id(fragment_context ? fragment_context->get_query_id() : TUniqueId()),
66
#else
67
          _query_id(fragment_context->get_query_id()),
68
#endif
69
          _index(task_id),
70
          _pipeline(pipeline),
71
          _opened(false),
72
          _state(state),
73
          _fragment_context(fragment_context),
74
          _parent_profile(parent_profile),
75
          _operators(pipeline->operators()),
76
          _source(_operators.front().get()),
77
          _root(_operators.back().get()),
78
          _sink(pipeline->sink_shared_pointer()),
79
          _shared_state_map(std::move(shared_state_map)),
80
          _task_idx(task_idx),
81
          _memory_sufficient_dependency(state->get_query_ctx()->get_memory_sufficient_dependency()),
82
78.1k
          _pipeline_name(_pipeline->name()) {
83
78.1k
    _execution_deps.push_back(state->get_query_ctx()->get_execution_dependency());
84
78.1k
    if (!_shared_state_map.contains(_sink->dests_id().front())) {
85
78.1k
        auto shared_state = _sink->create_shared_state();
86
78.1k
        if (shared_state) {
87
29
            _sink_shared_state = shared_state;
88
29
        }
89
78.1k
    }
90
78.1k
}
91
92
Status PipelineTask::prepare(const std::vector<TScanRangeParams>& scan_range, const int sender_id,
93
15
                             const TDataSink& tsink) {
94
15
    DCHECK(_sink);
95
15
    _init_profile();
96
15
    SCOPED_TIMER(_task_profile->total_time_counter());
97
15
    SCOPED_CPU_TIMER(_task_cpu_timer);
98
15
    SCOPED_TIMER(_prepare_timer);
99
15
    DBUG_EXECUTE_IF("fault_inject::PipelineXTask::prepare", {
100
15
        Status status = Status::Error<INTERNAL_ERROR>("fault_inject pipeline_task prepare failed");
101
15
        return status;
102
15
    });
103
15
    {
104
        // set sink local state
105
15
        LocalSinkStateInfo info {_task_idx,         _task_profile.get(),
106
15
                                 sender_id,         get_sink_shared_state().get(),
107
15
                                 _shared_state_map, tsink};
108
15
        RETURN_IF_ERROR(_sink->setup_local_state(_state, info));
109
15
    }
110
111
15
    _scan_ranges = scan_range;
112
15
    auto* parent_profile = _state->get_sink_local_state()->profile();
113
114
32
    for (int op_idx = _operators.size() - 1; op_idx >= 0; op_idx--) {
115
17
        auto& op = _operators[op_idx];
116
17
        LocalStateInfo info {parent_profile, _scan_ranges, get_op_shared_state(op->operator_id()),
117
17
                             _shared_state_map, _task_idx};
118
17
        RETURN_IF_ERROR(op->setup_local_state(_state, info));
119
17
        parent_profile = _state->get_local_state(op->operator_id())->profile();
120
17
    }
121
15
    {
122
15
        std::unique_lock<std::mutex> lc(_dependency_lock);
123
15
        const auto& deps =
124
15
                _state->get_local_state(_source->operator_id())->execution_dependencies();
125
15
        std::copy(deps.begin(), deps.end(), std::inserter(_execution_deps, _execution_deps.end()));
126
15
    }
127
15
    if (auto fragment = _fragment_context.lock()) {
128
14
        if (fragment->get_query_ctx()->is_cancelled()) {
129
0
            terminate();
130
0
            return fragment->get_query_ctx()->exec_status();
131
0
        }
132
14
    } else {
133
1
        return Status::InternalError("Fragment already finished! Query: {}", print_id(_query_id));
134
1
    }
135
14
    _block = doris::vectorized::Block::create_unique();
136
14
    return _state_transition(State::RUNNABLE);
137
15
}
138
139
13
Status PipelineTask::_extract_dependencies() {
140
13
    std::vector<std::vector<Dependency*>> read_dependencies;
141
13
    std::vector<Dependency*> write_dependencies;
142
13
    std::vector<Dependency*> finish_dependencies;
143
13
    std::vector<Dependency*> spill_dependencies;
144
13
    read_dependencies.resize(_operators.size());
145
13
    size_t i = 0;
146
15
    for (auto& op : _operators) {
147
15
        auto result = _state->get_local_state_result(op->operator_id());
148
15
        if (!result) {
149
1
            return result.error();
150
1
        }
151
14
        auto* local_state = result.value();
152
14
        read_dependencies[i] = local_state->dependencies();
153
14
        auto* fin_dep = local_state->finishdependency();
154
14
        if (fin_dep) {
155
7
            finish_dependencies.push_back(fin_dep);
156
7
        }
157
14
        if (auto* spill_dependency = local_state->spill_dependency()) {
158
7
            spill_dependencies.push_back(spill_dependency);
159
7
        }
160
14
        i++;
161
14
    }
162
12
    DBUG_EXECUTE_IF("fault_inject::PipelineXTask::_extract_dependencies", {
163
12
        Status status = Status::Error<INTERNAL_ERROR>(
164
12
                "fault_inject pipeline_task _extract_dependencies failed");
165
12
        return status;
166
12
    });
167
12
    {
168
12
        auto* local_state = _state->get_sink_local_state();
169
12
        write_dependencies = local_state->dependencies();
170
12
        auto* fin_dep = local_state->finishdependency();
171
12
        if (fin_dep) {
172
12
            finish_dependencies.push_back(fin_dep);
173
12
        }
174
12
        if (auto* spill_dependency = local_state->spill_dependency()) {
175
7
            spill_dependencies.push_back(spill_dependency);
176
7
        }
177
12
    }
178
12
    {
179
12
        std::unique_lock<std::mutex> lc(_dependency_lock);
180
12
        read_dependencies.swap(_read_dependencies);
181
12
        write_dependencies.swap(_write_dependencies);
182
12
        finish_dependencies.swap(_finish_dependencies);
183
12
        spill_dependencies.swap(_spill_dependencies);
184
12
    }
185
12
    return Status::OK();
186
12
}
187
188
6
bool PipelineTask::inject_shared_state(std::shared_ptr<BasicSharedState> shared_state) {
189
6
    if (!shared_state) {
190
1
        return false;
191
1
    }
192
    // Shared state is created by upstream task's sink operator and shared by source operator of
193
    // this task.
194
7
    for (auto& op : _operators) {
195
7
        if (shared_state->related_op_ids.contains(op->operator_id())) {
196
3
            _op_shared_states.insert({op->operator_id(), shared_state});
197
3
            return true;
198
3
        }
199
7
    }
200
    // Shared state is created by the first sink operator and shared by sink operator of this task.
201
    // For example, Set operations.
202
2
    if (shared_state->related_op_ids.contains(_sink->dests_id().front())) {
203
1
        DCHECK_EQ(_sink_shared_state, nullptr)
204
0
                << " Sink: " << _sink->get_name() << " dest id: " << _sink->dests_id().front();
205
1
        _sink_shared_state = shared_state;
206
1
        return true;
207
1
    }
208
1
    return false;
209
2
}
210
211
15
void PipelineTask::_init_profile() {
212
15
    _task_profile =
213
15
            std::make_unique<RuntimeProfile>(fmt::format("PipelineTask (index={})", _index));
214
15
    _parent_profile->add_child(_task_profile.get(), true, nullptr);
215
15
    _task_cpu_timer = ADD_TIMER(_task_profile, "TaskCpuTime");
216
217
15
    static const char* exec_time = "ExecuteTime";
218
15
    _exec_timer = ADD_TIMER(_task_profile, exec_time);
219
15
    _prepare_timer = ADD_CHILD_TIMER(_task_profile, "PrepareTime", exec_time);
220
15
    _open_timer = ADD_CHILD_TIMER(_task_profile, "OpenTime", exec_time);
221
15
    _get_block_timer = ADD_CHILD_TIMER(_task_profile, "GetBlockTime", exec_time);
222
15
    _get_block_counter = ADD_COUNTER(_task_profile, "GetBlockCounter", TUnit::UNIT);
223
15
    _sink_timer = ADD_CHILD_TIMER(_task_profile, "SinkTime", exec_time);
224
15
    _close_timer = ADD_CHILD_TIMER(_task_profile, "CloseTime", exec_time);
225
226
15
    _wait_worker_timer = ADD_TIMER_WITH_LEVEL(_task_profile, "WaitWorkerTime", 1);
227
228
15
    _schedule_counts = ADD_COUNTER(_task_profile, "NumScheduleTimes", TUnit::UNIT);
229
15
    _yield_counts = ADD_COUNTER(_task_profile, "NumYieldTimes", TUnit::UNIT);
230
15
    _core_change_times = ADD_COUNTER(_task_profile, "CoreChangeTimes", TUnit::UNIT);
231
15
    _memory_reserve_times = ADD_COUNTER(_task_profile, "MemoryReserveTimes", TUnit::UNIT);
232
15
    _memory_reserve_failed_times =
233
15
            ADD_COUNTER(_task_profile, "MemoryReserveFailedTimes", TUnit::UNIT);
234
15
}
235
236
6
void PipelineTask::_fresh_profile_counter() {
237
6
    COUNTER_SET(_schedule_counts, (int64_t)_schedule_time);
238
6
    COUNTER_SET(_wait_worker_timer, (int64_t)_wait_worker_watcher.elapsed_time());
239
6
}
240
241
12
Status PipelineTask::_open() {
242
12
    SCOPED_TIMER(_task_profile->total_time_counter());
243
12
    SCOPED_CPU_TIMER(_task_cpu_timer);
244
12
    SCOPED_TIMER(_open_timer);
245
12
    _dry_run = _sink->should_dry_run(_state);
246
14
    for (auto& o : _operators) {
247
14
        RETURN_IF_ERROR(_state->get_local_state(o->operator_id())->open(_state));
248
14
    }
249
12
    RETURN_IF_ERROR(_state->get_sink_local_state()->open(_state));
250
12
    RETURN_IF_ERROR(_extract_dependencies());
251
12
    DBUG_EXECUTE_IF("fault_inject::PipelineXTask::open", {
252
12
        Status status = Status::Error<INTERNAL_ERROR>("fault_inject pipeline_task open failed");
253
12
        return status;
254
12
    });
255
12
    _opened = true;
256
12
    return Status::OK();
257
12
}
258
259
41
bool PipelineTask::_wait_to_start() {
260
    // Before task starting, we should make sure
261
    // 1. Execution dependency is ready (which is controlled by FE 2-phase commit)
262
    // 2. Runtime filter dependencies are ready
263
    // 3. All tablets are loaded into local storage
264
41
    return std::any_of(
265
41
            _execution_deps.begin(), _execution_deps.end(),
266
54
            [&](Dependency* dep) -> bool { return dep->is_blocked_by(shared_from_this()); });
267
41
}
268
269
16
bool PipelineTask::_is_pending_finish() {
270
    // Spilling may be in progress if eos is true.
271
16
    return std::any_of(_spill_dependencies.begin(), _spill_dependencies.end(),
272
16
                       [&](Dependency* dep) -> bool {
273
11
                           return dep->is_blocked_by(shared_from_this());
274
11
                       }) ||
275
16
           std::any_of(
276
15
                   _finish_dependencies.begin(), _finish_dependencies.end(),
277
19
                   [&](Dependency* dep) -> bool { return dep->is_blocked_by(shared_from_this()); });
278
16
}
279
280
2
bool PipelineTask::is_revoking() const {
281
    // Spilling may be in progress if eos is true.
282
2
    return std::any_of(_spill_dependencies.begin(), _spill_dependencies.end(),
283
3
                       [&](Dependency* dep) -> bool { return dep->is_blocked_by(); });
284
2
}
285
286
816k
bool PipelineTask::_is_blocked() {
287
    // `_dry_run = true` means we do not need data from source operator.
288
816k
    if (!_dry_run) {
289
1.63M
        for (int i = _read_dependencies.size() - 1; i >= 0; i--) {
290
            // `_read_dependencies` is organized according to operators. For each operator, running condition is met iff all dependencies are ready.
291
816k
            for (auto* dep : _read_dependencies[i]) {
292
816k
                if (dep->is_blocked_by(shared_from_this())) {
293
14
                    return true;
294
14
                }
295
816k
            }
296
            // If all dependencies are ready for this operator, we can execute this task if no datum is needed from upstream operators.
297
816k
            if (!_operators[i]->need_more_input_data(_state)) {
298
2
                break;
299
2
            }
300
816k
        }
301
816k
    }
302
816k
    return std::any_of(_spill_dependencies.begin(), _spill_dependencies.end(),
303
1.63M
                       [&](Dependency* dep) -> bool {
304
1.63M
                           return dep->is_blocked_by(shared_from_this());
305
1.63M
                       }) ||
306
816k
           _memory_sufficient_dependency->is_blocked_by(shared_from_this()) ||
307
816k
           std::any_of(
308
816k
                   _write_dependencies.begin(), _write_dependencies.end(),
309
816k
                   [&](Dependency* dep) -> bool { return dep->is_blocked_by(shared_from_this()); });
310
816k
}
311
312
4
void PipelineTask::terminate() {
313
    // We use a lock to assure all dependencies are not deconstructed here.
314
4
    std::unique_lock<std::mutex> lc(_dependency_lock);
315
4
    auto fragment = _fragment_context.lock();
316
4
    if (!is_finalized() && fragment) {
317
4
        DCHECK(_wake_up_early || fragment->is_canceled());
318
4
        std::for_each(_spill_dependencies.begin(), _spill_dependencies.end(),
319
8
                      [&](Dependency* dep) { dep->set_always_ready(); });
320
4
        std::for_each(_write_dependencies.begin(), _write_dependencies.end(),
321
4
                      [&](Dependency* dep) { dep->set_always_ready(); });
322
4
        std::for_each(_finish_dependencies.begin(), _finish_dependencies.end(),
323
8
                      [&](Dependency* dep) { dep->set_always_ready(); });
324
4
        std::for_each(_read_dependencies.begin(), _read_dependencies.end(),
325
4
                      [&](std::vector<Dependency*>& deps) {
326
4
                          std::for_each(deps.begin(), deps.end(),
327
4
                                        [&](Dependency* dep) { dep->set_always_ready(); });
328
4
                      });
329
        // All `_execution_deps` will never be set blocking from ready. So we just set ready here.
330
4
        std::for_each(_execution_deps.begin(), _execution_deps.end(),
331
8
                      [&](Dependency* dep) { dep->set_ready(); });
332
4
        _memory_sufficient_dependency->set_ready();
333
4
    }
334
4
}
335
336
/**
337
 * `_eos` indicates whether the execution phase is done. `done` indicates whether we could close
338
 * this task.
339
 *
340
 * For example,
341
 * 1. if `_eos` is false which means we should continue to get next block so we cannot close (e.g.
342
 *    `done` is false)
343
 * 2. if `_eos` is true which means all blocks from source are exhausted but `_is_pending_finish()`
344
 *    is true which means we should wait for a pending dependency ready (maybe a running rpc), so we
345
 *    cannot close (e.g. `done` is false)
346
 * 3. if `_eos` is true which means all blocks from source are exhausted and `_is_pending_finish()`
347
 *    is false which means we can close immediately (e.g. `done` is true)
348
 * @param done
349
 * @return
350
 */
351
33
Status PipelineTask::execute(bool* done) {
352
33
    if (_exec_state != State::RUNNABLE || _blocked_dep != nullptr) [[unlikely]] {
353
1
        return Status::InternalError("Pipeline task is not runnable! Task info: {}",
354
1
                                     debug_string());
355
1
    }
356
32
    auto fragment_context = _fragment_context.lock();
357
32
    DCHECK(fragment_context);
358
32
    int64_t time_spent = 0;
359
32
    ThreadCpuStopWatch cpu_time_stop_watch;
360
32
    cpu_time_stop_watch.start();
361
32
    SCOPED_ATTACH_TASK(_state);
362
32
    Defer running_defer {[&]() {
363
32
        if (_task_queue) {
364
32
            _task_queue->update_statistics(this, time_spent);
365
32
        }
366
32
        int64_t delta_cpu_time = cpu_time_stop_watch.elapsed_time();
367
32
        _task_cpu_timer->update(delta_cpu_time);
368
32
        fragment_context->get_query_ctx()->resource_ctx()->cpu_context()->update_cpu_cost_ms(
369
32
                delta_cpu_time);
370
371
        // If task is woke up early, we should terminate all operators, and this task could be closed immediately.
372
32
        if (_wake_up_early) {
373
3
            terminate();
374
3
            THROW_IF_ERROR(_root->terminate(_state));
375
3
            THROW_IF_ERROR(_sink->terminate(_state));
376
3
            _eos = true;
377
3
            *done = true;
378
29
        } else if (_eos && !_spilling &&
379
29
                   (fragment_context->is_canceled() || !_is_pending_finish())) {
380
8
            *done = true;
381
8
        }
382
32
    }};
383
32
    const auto query_id = _state->query_id();
384
    // If this task is already EOS and block is empty (which means we already output all blocks),
385
    // just return here.
386
32
    if (_eos && !_spilling) {
387
1
        return Status::OK();
388
1
    }
389
    // If this task is blocked by a spilling request and waken up immediately, the spilling
390
    // dependency will not block this task and we should just run here.
391
31
    if (!_block->empty()) {
392
0
        LOG(INFO) << "Query: " << print_id(query_id) << " has pending block, size: "
393
0
                  << PrettyPrinter::print_bytes(_block->allocated_bytes());
394
0
        DCHECK(_spilling);
395
0
    }
396
397
31
    SCOPED_TIMER(_task_profile->total_time_counter());
398
31
    SCOPED_TIMER(_exec_timer);
399
400
31
    DBUG_EXECUTE_IF("fault_inject::PipelineXTask::execute", {
401
31
        Status status = Status::Error<INTERNAL_ERROR>("fault_inject pipeline_task execute failed");
402
31
        return status;
403
31
    });
404
31
    if (!_wake_up_early && !_hold_tablets) {
405
11
        RETURN_IF_ERROR(_source->hold_tablets(_state));
406
11
        _hold_tablets = true;
407
11
    }
408
    // `_wake_up_early` must be after `_wait_to_start()`
409
31
    if (_wait_to_start() || _wake_up_early) {
410
2
        return Status::OK();
411
2
    }
412
413
    // The status must be runnable
414
29
    if (!_opened && !fragment_context->is_canceled()) {
415
11
        DBUG_EXECUTE_IF("PipelineTask::execute.open_sleep", {
416
11
            auto required_pipeline_id =
417
11
                    DebugPoints::instance()->get_debug_param_or_default<int32_t>(
418
11
                            "PipelineTask::execute.open_sleep", "pipeline_id", -1);
419
11
            auto required_task_id = DebugPoints::instance()->get_debug_param_or_default<int32_t>(
420
11
                    "PipelineTask::execute.open_sleep", "task_id", -1);
421
11
            if (required_pipeline_id == pipeline_id() && required_task_id == task_id()) {
422
11
                LOG(WARNING) << "PipelineTask::execute.open_sleep sleep 5s";
423
11
                sleep(5);
424
11
            }
425
11
        });
426
427
11
        SCOPED_RAW_TIMER(&time_spent);
428
11
        RETURN_IF_ERROR(_open());
429
11
    }
430
431
816k
    while (!fragment_context->is_canceled()) {
432
816k
        SCOPED_RAW_TIMER(&time_spent);
433
816k
        Defer defer {[&]() {
434
            // If this run is pended by a spilling request, the block will be output in next run.
435
816k
            if (!_spilling) {
436
814k
                _block->clear_column_data(_root->row_desc().num_materialized_slots());
437
814k
            }
438
816k
        }};
439
        // `_wake_up_early` must be after `_is_blocked()`
440
816k
        if (_is_blocked() || _wake_up_early) {
441
16
            return Status::OK();
442
16
        }
443
444
        /// When a task is cancelled,
445
        /// its blocking state will be cleared and it will transition to a ready state (though it is not truly ready).
446
        /// Here, checking whether it is cancelled to prevent tasks in a blocking state from being re-executed.
447
816k
        if (fragment_context->is_canceled()) {
448
0
            break;
449
0
        }
450
451
816k
        if (time_spent > _exec_time_slice) {
452
3
            COUNTER_UPDATE(_yield_counts, 1);
453
3
            break;
454
3
        }
455
816k
        auto* block = _block.get();
456
457
816k
        DBUG_EXECUTE_IF("fault_inject::PipelineXTask::executing", {
458
816k
            Status status =
459
816k
                    Status::Error<INTERNAL_ERROR>("fault_inject pipeline_task executing failed");
460
816k
            return status;
461
816k
        });
462
463
        // `_sink->is_finished(_state)` means sink operator should be finished
464
816k
        if (_sink->is_finished(_state)) {
465
1
            set_wake_up_early();
466
1
            return Status::OK();
467
1
        }
468
469
        // `_dry_run` means sink operator need no more data
470
816k
        _eos = _dry_run || _eos;
471
816k
        _spilling = false;
472
816k
        auto workload_group = _state->workload_group();
473
        // If last run is pended by a spilling request, `_block` is produced with some rows in last
474
        // run, so we will resume execution using the block.
475
816k
        if (!_eos && _block->empty()) {
476
815k
            SCOPED_TIMER(_get_block_timer);
477
815k
            if (_state->low_memory_mode()) {
478
1.46k
                _sink->set_low_memory_mode(_state);
479
1.46k
                _root->set_low_memory_mode(_state);
480
1.46k
            }
481
815k
            DEFER_RELEASE_RESERVED();
482
815k
            _get_block_counter->update(1);
483
815k
            const auto reserve_size = _root->get_reserve_mem_size(_state);
484
815k
            _root->reset_reserve_mem_size(_state);
485
486
815k
            if (workload_group &&
487
815k
                _state->get_query_ctx()
488
2.28k
                        ->resource_ctx()
489
2.28k
                        ->task_controller()
490
2.28k
                        ->is_enable_reserve_memory() &&
491
815k
                reserve_size > 0) {
492
2.28k
                if (!_try_to_reserve_memory(reserve_size, _root)) {
493
819
                    continue;
494
819
                }
495
2.28k
            }
496
497
814k
            bool eos = false;
498
814k
            RETURN_IF_ERROR(_root->get_block_after_projects(_state, block, &eos));
499
814k
            _eos = eos;
500
814k
        }
501
502
815k
        if (!_block->empty() || _eos) {
503
832
            SCOPED_TIMER(_sink_timer);
504
832
            Status status = Status::OK();
505
832
            DEFER_RELEASE_RESERVED();
506
832
            COUNTER_UPDATE(_memory_reserve_times, 1);
507
832
            if (_state->get_query_ctx()
508
832
                        ->resource_ctx()
509
832
                        ->task_controller()
510
832
                        ->is_enable_reserve_memory() &&
511
832
                workload_group && !(_wake_up_early || _dry_run)) {
512
819
                const auto sink_reserve_size = _sink->get_reserve_mem_size(_state, _eos);
513
819
                if (sink_reserve_size > 0 &&
514
819
                    !_try_to_reserve_memory(sink_reserve_size, _sink.get())) {
515
817
                    continue;
516
817
                }
517
819
            }
518
519
15
            if (_eos) {
520
9
                RETURN_IF_ERROR(close(Status::OK(), false));
521
9
            }
522
523
15
            DBUG_EXECUTE_IF("PipelineTask::execute.sink_eos_sleep", {
524
15
                auto required_pipeline_id =
525
15
                        DebugPoints::instance()->get_debug_param_or_default<int32_t>(
526
15
                                "PipelineTask::execute.sink_eos_sleep", "pipeline_id", -1);
527
15
                auto required_task_id =
528
15
                        DebugPoints::instance()->get_debug_param_or_default<int32_t>(
529
15
                                "PipelineTask::execute.sink_eos_sleep", "task_id", -1);
530
15
                if (required_pipeline_id == pipeline_id() && required_task_id == task_id()) {
531
15
                    LOG(WARNING) << "PipelineTask::execute.sink_eos_sleep sleep 10s";
532
15
                    sleep(10);
533
15
                }
534
15
            });
535
536
15
            DBUG_EXECUTE_IF("PipelineTask::execute.terminate", {
537
15
                if (_eos) {
538
15
                    auto required_pipeline_id =
539
15
                            DebugPoints::instance()->get_debug_param_or_default<int32_t>(
540
15
                                    "PipelineTask::execute.terminate", "pipeline_id", -1);
541
15
                    auto required_task_id =
542
15
                            DebugPoints::instance()->get_debug_param_or_default<int32_t>(
543
15
                                    "PipelineTask::execute.terminate", "task_id", -1);
544
15
                    auto required_fragment_id =
545
15
                            DebugPoints::instance()->get_debug_param_or_default<int32_t>(
546
15
                                    "PipelineTask::execute.terminate", "fragment_id", -1);
547
15
                    if (required_pipeline_id == pipeline_id() && required_task_id == task_id() &&
548
15
                        fragment_context->get_fragment_id() == required_fragment_id) {
549
15
                        _wake_up_early = true;
550
15
                        terminate();
551
15
                    } else if (required_pipeline_id == pipeline_id() &&
552
15
                               fragment_context->get_fragment_id() == required_fragment_id) {
553
15
                        LOG(WARNING) << "PipelineTask::execute.terminate sleep 5s";
554
15
                        sleep(5);
555
15
                    }
556
15
                }
557
15
            });
558
15
            status = _sink->sink(_state, block, _eos);
559
560
15
            if (status.is<ErrorCode::END_OF_FILE>()) {
561
1
                set_wake_up_early();
562
1
                return Status::OK();
563
14
            } else if (!status) {
564
0
                return status;
565
0
            }
566
567
14
            if (_eos) { // just return, the scheduler will do finish work
568
8
                return Status::OK();
569
8
            }
570
14
        }
571
815k
    }
572
573
3
    RETURN_IF_ERROR(get_task_queue()->push_back(shared_from_this()));
574
3
    return Status::OK();
575
3
}
576
577
3.10k
bool PipelineTask::_try_to_reserve_memory(const size_t reserve_size, OperatorBase* op) {
578
3.10k
    auto st = thread_context()->thread_mem_tracker_mgr->try_reserve(reserve_size);
579
3.10k
    COUNTER_UPDATE(_memory_reserve_times, 1);
580
3.10k
    auto sink_revocable_mem_size = _sink->revocable_mem_size(_state);
581
3.10k
    if (st.ok() && _state->enable_force_spill() && _sink->is_spillable() &&
582
3.10k
        sink_revocable_mem_size >= vectorized::SpillStream::MIN_SPILL_WRITE_BATCH_MEM) {
583
0
        st = Status(ErrorCode::QUERY_MEMORY_EXCEEDED, "Force Spill");
584
0
    }
585
3.10k
    if (!st.ok()) {
586
3.10k
        COUNTER_UPDATE(_memory_reserve_failed_times, 1);
587
3.10k
        auto debug_msg = fmt::format(
588
3.10k
                "Query: {} , try to reserve: {}, operator name: {}, operator "
589
3.10k
                "id: {}, task id: {}, root revocable mem size: {}, sink revocable mem"
590
3.10k
                "size: {}, failed: {}",
591
3.10k
                print_id(_query_id), PrettyPrinter::print_bytes(reserve_size), op->get_name(),
592
3.10k
                op->node_id(), _state->task_id(),
593
3.10k
                PrettyPrinter::print_bytes(op->revocable_mem_size(_state)),
594
3.10k
                PrettyPrinter::print_bytes(sink_revocable_mem_size), st.to_string());
595
        // PROCESS_MEMORY_EXCEEDED error msg alread contains process_mem_log_str
596
3.10k
        if (!st.is<ErrorCode::PROCESS_MEMORY_EXCEEDED>()) {
597
3.10k
            debug_msg +=
598
3.10k
                    fmt::format(", debug info: {}", GlobalMemoryArbitrator::process_mem_log_str());
599
3.10k
        }
600
3.10k
        LOG_EVERY_N(INFO, 100) << debug_msg;
601
        // If sink has enough revocable memory, trigger revoke memory
602
3.10k
        if (sink_revocable_mem_size >= vectorized::SpillStream::MIN_SPILL_WRITE_BATCH_MEM) {
603
1.63k
            LOG(INFO) << fmt::format(
604
1.63k
                    "Query: {} sink: {}, node id: {}, task id: "
605
1.63k
                    "{}, revocable mem size: {}",
606
1.63k
                    print_id(_query_id), _sink->get_name(), _sink->node_id(), _state->task_id(),
607
1.63k
                    PrettyPrinter::print_bytes(sink_revocable_mem_size));
608
1.63k
            ExecEnv::GetInstance()->workload_group_mgr()->add_paused_query(
609
1.63k
                    _state->get_query_ctx()->resource_ctx()->shared_from_this(), reserve_size, st);
610
1.63k
            _spilling = true;
611
1.63k
            return false;
612
1.63k
        } else {
613
            // If reserve failed, not add this query to paused list, because it is very small, will not
614
            // consume a lot of memory. But need set low memory mode to indicate that the system should
615
            // not use too much memory.
616
1.46k
            _state->get_query_ctx()->set_low_memory_mode();
617
1.46k
        }
618
3.10k
    }
619
1.46k
    return true;
620
3.10k
}
621
622
253k
void PipelineTask::stop_if_finished() {
623
253k
    auto fragment = _fragment_context.lock();
624
253k
    if (!fragment) {
625
0
        return;
626
0
    }
627
253k
    SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(fragment->get_query_ctx()->query_mem_tracker());
628
253k
    if (auto sink = _sink) {
629
253k
        if (sink->is_finished(_state)) {
630
0
            set_wake_up_early();
631
0
            terminate();
632
0
        }
633
253k
    }
634
253k
}
635
636
1
Status PipelineTask::finalize() {
637
1
    auto fragment = _fragment_context.lock();
638
1
    if (!fragment) {
639
0
        return Status::OK();
640
0
    }
641
1
    SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(fragment->get_query_ctx()->query_mem_tracker());
642
1
    std::unique_lock<std::mutex> lc(_dependency_lock);
643
1
    RETURN_IF_ERROR(_state_transition(State::FINALIZED));
644
1
    _sink_shared_state.reset();
645
1
    _op_shared_states.clear();
646
1
    _shared_state_map.clear();
647
1
    _block.reset();
648
1
    _operators.clear();
649
1
    _sink.reset();
650
1
    _pipeline.reset();
651
1
    return Status::OK();
652
1
}
653
654
15
Status PipelineTask::close(Status exec_status, bool close_sink) {
655
15
    int64_t close_ns = 0;
656
15
    Status s;
657
15
    {
658
15
        SCOPED_RAW_TIMER(&close_ns);
659
15
        if (close_sink) {
660
6
            s = _sink->close(_state, exec_status);
661
6
        }
662
19
        for (auto& op : _operators) {
663
19
            auto tem = op->close(_state);
664
19
            if (!tem.ok() && s.ok()) {
665
0
                s = tem;
666
0
            }
667
19
        }
668
15
    }
669
15
    if (_opened) {
670
15
        COUNTER_UPDATE(_close_timer, close_ns);
671
15
        COUNTER_UPDATE(_task_profile->total_time_counter(), close_ns);
672
15
    }
673
674
15
    if (close_sink && _opened) {
675
6
        _task_profile->add_info_string("WakeUpEarly", std::to_string(_wake_up_early.load()));
676
6
        _fresh_profile_counter();
677
6
    }
678
679
15
    if (_task_queue) {
680
15
        _task_queue->update_statistics(this, close_ns);
681
15
    }
682
15
    if (close_sink) {
683
6
        RETURN_IF_ERROR(_state_transition(State::FINISHED));
684
6
    }
685
15
    return s;
686
15
}
687
688
31.4k
std::string PipelineTask::debug_string() {
689
31.4k
    fmt::memory_buffer debug_string_buffer;
690
691
31.4k
    fmt::format_to(debug_string_buffer, "QueryId: {}\n", print_id(_query_id));
692
31.4k
    fmt::format_to(debug_string_buffer, "InstanceId: {}\n",
693
31.4k
                   print_id(_state->fragment_instance_id()));
694
695
31.4k
    fmt::format_to(debug_string_buffer,
696
31.4k
                   "PipelineTask[id = {}, open = {}, eos = {}, state = {}, dry run = "
697
31.4k
                   "{}, _wake_up_early = {}, time elapsed since last state changing = {}s, spilling"
698
31.4k
                   " = {}, is running = {}]",
699
31.4k
                   _index, _opened, _eos, _to_string(_exec_state), _dry_run, _wake_up_early.load(),
700
31.4k
                   _state_change_watcher.elapsed_time() / NANOS_PER_SEC, _spilling, is_running());
701
31.4k
    std::unique_lock<std::mutex> lc(_dependency_lock);
702
31.4k
    auto* cur_blocked_dep = _blocked_dep;
703
31.4k
    auto fragment = _fragment_context.lock();
704
31.4k
    if (is_finalized() || !fragment) {
705
5
        fmt::format_to(debug_string_buffer, " pipeline name = {}", _pipeline_name);
706
5
        return fmt::to_string(debug_string_buffer);
707
5
    }
708
31.4k
    auto elapsed = fragment->elapsed_time() / NANOS_PER_SEC;
709
31.4k
    fmt::format_to(debug_string_buffer,
710
31.4k
                   " elapse time = {}s, block dependency = [{}]\noperators: ", elapsed,
711
31.4k
                   cur_blocked_dep && !is_finalized() ? cur_blocked_dep->debug_string() : "NULL");
712
713
62.8k
    for (size_t i = 0; i < _operators.size(); i++) {
714
31.4k
        fmt::format_to(debug_string_buffer, "\n{}",
715
31.4k
                       _opened && !is_finalized() ? _operators[i]->debug_string(_state, i)
716
31.4k
                                                  : _operators[i]->debug_string(i));
717
31.4k
    }
718
31.4k
    fmt::format_to(debug_string_buffer, "\n{}\n",
719
31.4k
                   _opened && !is_finalized() ? _sink->debug_string(_state, _operators.size())
720
31.4k
                                              : _sink->debug_string(_operators.size()));
721
722
31.4k
    fmt::format_to(debug_string_buffer, "\nRead Dependency Information: \n");
723
724
31.4k
    size_t i = 0;
725
62.8k
    for (; i < _read_dependencies.size(); i++) {
726
62.8k
        for (size_t j = 0; j < _read_dependencies[i].size(); j++) {
727
31.4k
            fmt::format_to(debug_string_buffer, "{}. {}\n", i,
728
31.4k
                           _read_dependencies[i][j]->debug_string(i + 1));
729
31.4k
        }
730
31.4k
    }
731
732
31.4k
    fmt::format_to(debug_string_buffer, "{}. {}\n", i,
733
31.4k
                   _memory_sufficient_dependency->debug_string(i++));
734
735
31.4k
    fmt::format_to(debug_string_buffer, "\nWrite Dependency Information: \n");
736
62.8k
    for (size_t j = 0; j < _write_dependencies.size(); j++, i++) {
737
31.4k
        fmt::format_to(debug_string_buffer, "{}. {}\n", i,
738
31.4k
                       _write_dependencies[j]->debug_string(i + 1));
739
31.4k
    }
740
741
31.4k
    fmt::format_to(debug_string_buffer, "\nExecution Dependency Information: \n");
742
94.2k
    for (size_t j = 0; j < _execution_deps.size(); j++, i++) {
743
62.8k
        fmt::format_to(debug_string_buffer, "{}. {}\n", i, _execution_deps[j]->debug_string(i + 1));
744
62.8k
    }
745
746
31.4k
    fmt::format_to(debug_string_buffer, "\nSpill Dependency Information: \n");
747
94.2k
    for (size_t j = 0; j < _spill_dependencies.size(); j++, i++) {
748
62.8k
        fmt::format_to(debug_string_buffer, "{}. {}\n", i,
749
62.8k
                       _spill_dependencies[j]->debug_string(i + 1));
750
62.8k
    }
751
752
31.4k
    fmt::format_to(debug_string_buffer, "Finish Dependency Information: \n");
753
94.2k
    for (size_t j = 0; j < _finish_dependencies.size(); j++, i++) {
754
62.8k
        fmt::format_to(debug_string_buffer, "{}. {}\n", i,
755
62.8k
                       _finish_dependencies[j]->debug_string(j + 1));
756
62.8k
    }
757
31.4k
    return fmt::to_string(debug_string_buffer);
758
31.4k
}
759
760
0
size_t PipelineTask::get_revocable_size() const {
761
0
    if (is_finalized() || _running || (_eos && !_spilling)) {
762
0
        return 0;
763
0
    }
764
765
0
    return _sink->revocable_mem_size(_state);
766
0
}
767
768
0
Status PipelineTask::revoke_memory(const std::shared_ptr<SpillContext>& spill_context) {
769
0
    if (is_finalized()) {
770
0
        if (spill_context) {
771
0
            spill_context->on_task_finished();
772
0
            VLOG_DEBUG << "Query: " << print_id(_state->query_id()) << ", task: " << ((void*)this)
773
0
                       << " finalized";
774
0
        }
775
0
        return Status::OK();
776
0
    }
777
778
0
    const auto revocable_size = _sink->revocable_mem_size(_state);
779
0
    if (revocable_size >= vectorized::SpillStream::MIN_SPILL_WRITE_BATCH_MEM) {
780
0
        RETURN_IF_ERROR(_sink->revoke_memory(_state, spill_context));
781
0
    } else if (spill_context) {
782
0
        spill_context->on_task_finished();
783
0
        LOG(INFO) << "Query: " << print_id(_state->query_id()) << ", task: " << ((void*)this)
784
0
                  << " has not enough data to revoke: " << revocable_size;
785
0
    }
786
0
    return Status::OK();
787
0
}
788
789
25
Status PipelineTask::wake_up(Dependency* dep) {
790
    // call by dependency
791
25
    DCHECK_EQ(_blocked_dep, dep) << "dep : " << dep->debug_string(0) << "task: " << debug_string();
792
25
    _blocked_dep = nullptr;
793
25
    auto holder = std::dynamic_pointer_cast<PipelineTask>(shared_from_this());
794
25
    RETURN_IF_ERROR(_state_transition(PipelineTask::State::RUNNABLE));
795
25
    RETURN_IF_ERROR(get_task_queue()->push_back(holder));
796
25
    return Status::OK();
797
25
}
798
799
96
Status PipelineTask::_state_transition(State new_state) {
800
96
    if (_exec_state != new_state) {
801
91
        _state_change_watcher.reset();
802
91
        _state_change_watcher.start();
803
91
    }
804
96
    _task_profile->add_info_string("TaskState", _to_string(new_state));
805
96
    _task_profile->add_info_string("BlockedByDependency", _blocked_dep ? _blocked_dep->name() : "");
806
96
    if (!LEGAL_STATE_TRANSITION[(int)new_state].contains(_exec_state)) {
807
17
        return Status::InternalError(
808
17
                "Task state transition from {} to {} is not allowed! Task info: {}",
809
17
                _to_string(_exec_state), _to_string(new_state), debug_string());
810
17
    }
811
79
    _exec_state = new_state;
812
79
    return Status::OK();
813
96
}
814
815
} // namespace doris::pipeline