Coverage Report

Created: 2025-07-28 22:10

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/root/doris/be/src/pipeline/pipeline_task.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "pipeline_task.h"
19
20
#include <fmt/core.h>
21
#include <fmt/format.h>
22
#include <gen_cpp/Metrics_types.h>
23
#include <glog/logging.h>
24
25
#include <ostream>
26
#include <vector>
27
28
#include "common/logging.h"
29
#include "common/status.h"
30
#include "pipeline/dependency.h"
31
#include "pipeline/exec/operator.h"
32
#include "pipeline/exec/scan_operator.h"
33
#include "pipeline/pipeline.h"
34
#include "pipeline/pipeline_fragment_context.h"
35
#include "pipeline/task_queue.h"
36
#include "pipeline/task_scheduler.h"
37
#include "runtime/descriptors.h"
38
#include "runtime/exec_env.h"
39
#include "runtime/query_context.h"
40
#include "runtime/thread_context.h"
41
#include "runtime/workload_group/workload_group_manager.h"
42
#include "util/container_util.hpp"
43
#include "util/defer_op.h"
44
#include "util/mem_info.h"
45
#include "util/runtime_profile.h"
46
#include "util/uid_util.h"
47
#include "vec/core/block.h"
48
#include "vec/spill/spill_stream.h"
49
50
namespace doris {
51
class RuntimeState;
52
} // namespace doris
53
54
namespace doris::pipeline {
55
#include "common/compile_check_begin.h"
56
57
PipelineTask::PipelineTask(PipelinePtr& pipeline, uint32_t task_id, RuntimeState* state,
58
                           std::shared_ptr<PipelineFragmentContext> fragment_context,
59
                           RuntimeProfile* parent_profile,
60
                           std::map<int, std::pair<std::shared_ptr<BasicSharedState>,
61
                                                   std::vector<std::shared_ptr<Dependency>>>>
62
                                   shared_state_map,
63
                           int task_idx)
64
        :
65
#ifdef BE_TEST
66
234k
          _query_id(fragment_context ? fragment_context->get_query_id() : TUniqueId()),
67
#else
68
          _query_id(fragment_context->get_query_id()),
69
#endif
70
234k
          _index(task_id),
71
234k
          _pipeline(pipeline),
72
234k
          _opened(false),
73
234k
          _state(state),
74
234k
          _fragment_context(fragment_context),
75
234k
          _parent_profile(parent_profile),
76
234k
          _operators(pipeline->operators()),
77
234k
          _source(_operators.front().get()),
78
234k
          _root(_operators.back().get()),
79
234k
          _sink(pipeline->sink_shared_pointer()),
80
234k
          _shared_state_map(std::move(shared_state_map)),
81
234k
          _task_idx(task_idx),
82
234k
          _memory_sufficient_dependency(state->get_query_ctx()->get_memory_sufficient_dependency()),
83
234k
          _pipeline_name(_pipeline->name()) {
84
234k
    _execution_dependencies.push_back(state->get_query_ctx()->get_execution_dependency());
85
234k
    if (!_shared_state_map.contains(_sink->dests_id().front())) {
86
234k
        auto shared_state = _sink->create_shared_state();
87
234k
        if (shared_state) {
88
87
            _sink_shared_state = shared_state;
89
87
        }
90
234k
    }
91
234k
}
92
93
Status PipelineTask::prepare(const std::vector<TScanRangeParams>& scan_range, const int sender_id,
94
45
                             const TDataSink& tsink) {
95
45
    DCHECK(_sink);
96
45
    _init_profile();
97
45
    SCOPED_TIMER(_task_profile->total_time_counter());
98
45
    SCOPED_CPU_TIMER(_task_cpu_timer);
99
45
    SCOPED_TIMER(_prepare_timer);
100
45
    DBUG_EXECUTE_IF("fault_inject::PipelineXTask::prepare", {
101
45
        Status status = Status::Error<INTERNAL_ERROR>("fault_inject pipeline_task prepare failed");
102
45
        return status;
103
45
    });
104
45
    {
105
        // set sink local state
106
45
        LocalSinkStateInfo info {_task_idx,         _task_profile.get(),
107
45
                                 sender_id,         get_sink_shared_state().get(),
108
45
                                 _shared_state_map, tsink};
109
45
        RETURN_IF_ERROR(_sink->setup_local_state(_state, info));
110
45
    }
111
112
45
    _scan_ranges = scan_range;
113
45
    auto* parent_profile = _state->get_sink_local_state()->operator_profile();
114
115
96
    for (int op_idx = cast_set<int>(_operators.size() - 1); op_idx >= 0; op_idx--) {
116
51
        auto& op = _operators[op_idx];
117
51
        LocalStateInfo info {parent_profile, _scan_ranges, get_op_shared_state(op->operator_id()),
118
51
                             _shared_state_map, _task_idx};
119
51
        RETURN_IF_ERROR(op->setup_local_state(_state, info));
120
51
        parent_profile = _state->get_local_state(op->operator_id())->operator_profile();
121
51
    }
122
45
    {
123
45
        const auto& deps =
124
45
                _state->get_local_state(_source->operator_id())->execution_dependencies();
125
45
        std::unique_lock<std::mutex> lc(_dependency_lock);
126
45
        std::copy(deps.begin(), deps.end(),
127
45
                  std::inserter(_execution_dependencies, _execution_dependencies.end()));
128
45
    }
129
45
    if (auto fragment = _fragment_context.lock()) {
130
42
        if (fragment->get_query_ctx()->is_cancelled()) {
131
0
            terminate();
132
0
            return fragment->get_query_ctx()->exec_status();
133
0
        }
134
42
    } else {
135
3
        return Status::InternalError("Fragment already finished! Query: {}", print_id(_query_id));
136
3
    }
137
42
    _block = doris::vectorized::Block::create_unique();
138
42
    return _state_transition(State::RUNNABLE);
139
45
}
140
141
39
Status PipelineTask::_extract_dependencies() {
142
39
    std::vector<std::vector<Dependency*>> read_dependencies;
143
39
    std::vector<Dependency*> write_dependencies;
144
39
    std::vector<Dependency*> finish_dependencies;
145
39
    std::vector<Dependency*> spill_dependencies;
146
39
    read_dependencies.resize(_operators.size());
147
39
    size_t i = 0;
148
45
    for (auto& op : _operators) {
149
45
        auto result = _state->get_local_state_result(op->operator_id());
150
45
        if (!result) {
151
3
            return result.error();
152
3
        }
153
42
        auto* local_state = result.value();
154
42
        read_dependencies[i] = local_state->dependencies();
155
42
        auto* fin_dep = local_state->finishdependency();
156
42
        if (fin_dep) {
157
21
            finish_dependencies.push_back(fin_dep);
158
21
        }
159
42
        if (auto* spill_dependency = local_state->spill_dependency()) {
160
21
            spill_dependencies.push_back(spill_dependency);
161
21
        }
162
42
        i++;
163
42
    }
164
36
    DBUG_EXECUTE_IF("fault_inject::PipelineXTask::_extract_dependencies", {
165
36
        Status status = Status::Error<INTERNAL_ERROR>(
166
36
                "fault_inject pipeline_task _extract_dependencies failed");
167
36
        return status;
168
36
    });
169
36
    {
170
36
        auto* local_state = _state->get_sink_local_state();
171
36
        write_dependencies = local_state->dependencies();
172
36
        auto* fin_dep = local_state->finishdependency();
173
36
        if (fin_dep) {
174
36
            finish_dependencies.push_back(fin_dep);
175
36
        }
176
36
        if (auto* spill_dependency = local_state->spill_dependency()) {
177
21
            spill_dependencies.push_back(spill_dependency);
178
21
        }
179
36
    }
180
36
    {
181
36
        std::unique_lock<std::mutex> lc(_dependency_lock);
182
36
        read_dependencies.swap(_read_dependencies);
183
36
        write_dependencies.swap(_write_dependencies);
184
36
        finish_dependencies.swap(_finish_dependencies);
185
36
        spill_dependencies.swap(_spill_dependencies);
186
36
    }
187
36
    return Status::OK();
188
36
}
189
190
18
bool PipelineTask::inject_shared_state(std::shared_ptr<BasicSharedState> shared_state) {
191
18
    if (!shared_state) {
192
3
        return false;
193
3
    }
194
    // Shared state is created by upstream task's sink operator and shared by source operator of
195
    // this task.
196
21
    for (auto& op : _operators) {
197
21
        if (shared_state->related_op_ids.contains(op->operator_id())) {
198
9
            _op_shared_states.insert({op->operator_id(), shared_state});
199
9
            return true;
200
9
        }
201
21
    }
202
    // Shared state is created by the first sink operator and shared by sink operator of this task.
203
    // For example, Set operations.
204
6
    if (shared_state->related_op_ids.contains(_sink->dests_id().front())) {
205
3
        DCHECK_EQ(_sink_shared_state, nullptr)
206
0
                << " Sink: " << _sink->get_name() << " dest id: " << _sink->dests_id().front();
207
3
        _sink_shared_state = shared_state;
208
3
        return true;
209
3
    }
210
3
    return false;
211
6
}
212
213
45
void PipelineTask::_init_profile() {
214
45
    _task_profile = std::make_unique<RuntimeProfile>(fmt::format("PipelineTask(index={})", _index));
215
45
    _parent_profile->add_child(_task_profile.get(), true, nullptr);
216
45
    _task_cpu_timer = ADD_TIMER(_task_profile, "TaskCpuTime");
217
218
45
    static const char* exec_time = "ExecuteTime";
219
45
    _exec_timer = ADD_TIMER(_task_profile, exec_time);
220
45
    _prepare_timer = ADD_CHILD_TIMER(_task_profile, "PrepareTime", exec_time);
221
45
    _open_timer = ADD_CHILD_TIMER(_task_profile, "OpenTime", exec_time);
222
45
    _get_block_timer = ADD_CHILD_TIMER(_task_profile, "GetBlockTime", exec_time);
223
45
    _get_block_counter = ADD_COUNTER(_task_profile, "GetBlockCounter", TUnit::UNIT);
224
45
    _sink_timer = ADD_CHILD_TIMER(_task_profile, "SinkTime", exec_time);
225
45
    _close_timer = ADD_CHILD_TIMER(_task_profile, "CloseTime", exec_time);
226
227
45
    _wait_worker_timer = ADD_TIMER_WITH_LEVEL(_task_profile, "WaitWorkerTime", 1);
228
229
45
    _schedule_counts = ADD_COUNTER(_task_profile, "NumScheduleTimes", TUnit::UNIT);
230
45
    _yield_counts = ADD_COUNTER(_task_profile, "NumYieldTimes", TUnit::UNIT);
231
45
    _core_change_times = ADD_COUNTER(_task_profile, "CoreChangeTimes", TUnit::UNIT);
232
45
    _memory_reserve_times = ADD_COUNTER(_task_profile, "MemoryReserveTimes", TUnit::UNIT);
233
45
    _memory_reserve_failed_times =
234
45
            ADD_COUNTER(_task_profile, "MemoryReserveFailedTimes", TUnit::UNIT);
235
45
}
236
237
18
void PipelineTask::_fresh_profile_counter() {
238
18
    COUNTER_SET(_schedule_counts, (int64_t)_schedule_time);
239
18
    COUNTER_SET(_wait_worker_timer, (int64_t)_wait_worker_watcher.elapsed_time());
240
18
}
241
242
36
Status PipelineTask::_open() {
243
36
    SCOPED_TIMER(_task_profile->total_time_counter());
244
36
    SCOPED_CPU_TIMER(_task_cpu_timer);
245
36
    SCOPED_TIMER(_open_timer);
246
36
    _dry_run = _sink->should_dry_run(_state);
247
42
    for (auto& o : _operators) {
248
42
        RETURN_IF_ERROR(_state->get_local_state(o->operator_id())->open(_state));
249
42
    }
250
36
    RETURN_IF_ERROR(_state->get_sink_local_state()->open(_state));
251
36
    RETURN_IF_ERROR(_extract_dependencies());
252
36
    DBUG_EXECUTE_IF("fault_inject::PipelineXTask::open", {
253
36
        Status status = Status::Error<INTERNAL_ERROR>("fault_inject pipeline_task open failed");
254
36
        return status;
255
36
    });
256
36
    _opened = true;
257
36
    return Status::OK();
258
36
}
259
260
180
Status PipelineTask::_prepare() {
261
180
    SCOPED_TIMER(_task_profile->total_time_counter());
262
180
    SCOPED_CPU_TIMER(_task_cpu_timer);
263
216
    for (auto& o : _operators) {
264
216
        RETURN_IF_ERROR(_state->get_local_state(o->operator_id())->prepare(_state));
265
216
    }
266
180
    RETURN_IF_ERROR(_state->get_sink_local_state()->prepare(_state));
267
180
    return Status::OK();
268
180
}
269
270
123
bool PipelineTask::_wait_to_start() {
271
    // Before task starting, we should make sure
272
    // 1. Execution dependency is ready (which is controlled by FE 2-phase commit)
273
    // 2. Runtime filter dependencies are ready
274
    // 3. All tablets are loaded into local storage
275
123
    return std::any_of(
276
123
            _execution_dependencies.begin(), _execution_dependencies.end(),
277
162
            [&](Dependency* dep) -> bool { return dep->is_blocked_by(shared_from_this()); });
278
123
}
279
280
48
bool PipelineTask::_is_pending_finish() {
281
    // Spilling may be in progress if eos is true.
282
48
    return std::any_of(_spill_dependencies.begin(), _spill_dependencies.end(),
283
48
                       [&](Dependency* dep) -> bool {
284
33
                           return dep->is_blocked_by(shared_from_this());
285
33
                       }) ||
286
48
           std::any_of(
287
45
                   _finish_dependencies.begin(), _finish_dependencies.end(),
288
57
                   [&](Dependency* dep) -> bool { return dep->is_blocked_by(shared_from_this()); });
289
48
}
290
291
6
bool PipelineTask::is_revoking() const {
292
    // Spilling may be in progress if eos is true.
293
6
    return std::any_of(_spill_dependencies.begin(), _spill_dependencies.end(),
294
9
                       [&](Dependency* dep) -> bool { return dep->is_blocked_by(); });
295
6
}
296
297
839k
bool PipelineTask::_is_blocked() {
298
    // `_dry_run = true` means we do not need data from source operator.
299
839k
    if (!_dry_run) {
300
1.67M
        for (int i = cast_set<int>(_read_dependencies.size() - 1); i >= 0; i--) {
301
            // `_read_dependencies` is organized according to operators. For each operator, running condition is met iff all dependencies are ready.
302
839k
            for (auto* dep : _read_dependencies[i]) {
303
839k
                if (dep->is_blocked_by(shared_from_this())) {
304
42
                    return true;
305
42
                }
306
839k
            }
307
            // If all dependencies are ready for this operator, we can execute this task if no datum is needed from upstream operators.
308
839k
            if (!_operators[i]->need_more_input_data(_state)) {
309
6
                break;
310
6
            }
311
839k
        }
312
839k
    }
313
839k
    return std::any_of(_spill_dependencies.begin(), _spill_dependencies.end(),
314
1.67M
                       [&](Dependency* dep) -> bool {
315
1.67M
                           return dep->is_blocked_by(shared_from_this());
316
1.67M
                       }) ||
317
839k
           _memory_sufficient_dependency->is_blocked_by(shared_from_this()) ||
318
839k
           std::any_of(
319
839k
                   _write_dependencies.begin(), _write_dependencies.end(),
320
839k
                   [&](Dependency* dep) -> bool { return dep->is_blocked_by(shared_from_this()); });
321
839k
}
322
323
15
void PipelineTask::terminate() {
324
    // We use a lock to assure all dependencies are not deconstructed here.
325
15
    std::unique_lock<std::mutex> lc(_dependency_lock);
326
15
    auto fragment = _fragment_context.lock();
327
15
    if (!is_finalized() && fragment) {
328
15
        try {
329
15
            DCHECK(_wake_up_early || fragment->is_canceled());
330
15
            std::for_each(_spill_dependencies.begin(), _spill_dependencies.end(),
331
30
                          [&](Dependency* dep) { dep->set_always_ready(); });
332
15
            std::for_each(_write_dependencies.begin(), _write_dependencies.end(),
333
15
                          [&](Dependency* dep) { dep->set_always_ready(); });
334
15
            std::for_each(_finish_dependencies.begin(), _finish_dependencies.end(),
335
30
                          [&](Dependency* dep) { dep->set_always_ready(); });
336
15
            std::for_each(_read_dependencies.begin(), _read_dependencies.end(),
337
15
                          [&](std::vector<Dependency*>& deps) {
338
15
                              std::for_each(deps.begin(), deps.end(),
339
15
                                            [&](Dependency* dep) { dep->set_always_ready(); });
340
15
                          });
341
            // All `_execution_deps` will never be set blocking from ready. So we just set ready here.
342
15
            std::for_each(_execution_dependencies.begin(), _execution_dependencies.end(),
343
30
                          [&](Dependency* dep) { dep->set_ready(); });
344
15
            _memory_sufficient_dependency->set_ready();
345
15
        } catch (const doris::Exception& e) {
346
0
            LOG(WARNING) << "Terminate failed: " << e.code() << ", " << e.to_string();
347
0
        }
348
15
    }
349
15
}
350
351
/**
352
 * `_eos` indicates whether the execution phase is done. `done` indicates whether we could close
353
 * this task.
354
 *
355
 * For example,
356
 * 1. if `_eos` is false which means we should continue to get next block so we cannot close (e.g.
357
 *    `done` is false)
358
 * 2. if `_eos` is true which means all blocks from source are exhausted but `_is_pending_finish()`
359
 *    is true which means we should wait for a pending dependency ready (maybe a running rpc), so we
360
 *    cannot close (e.g. `done` is false)
361
 * 3. if `_eos` is true which means all blocks from source are exhausted and `_is_pending_finish()`
362
 *    is false which means we can close immediately (e.g. `done` is true)
363
 * @param done
364
 * @return
365
 */
366
99
Status PipelineTask::execute(bool* done) {
367
99
    if (_exec_state != State::RUNNABLE || _blocked_dep != nullptr) [[unlikely]] {
368
3
        return Status::InternalError("Pipeline task is not runnable! Task info: {}",
369
3
                                     debug_string());
370
3
    }
371
96
    auto fragment_context = _fragment_context.lock();
372
96
    DCHECK(fragment_context);
373
96
    int64_t time_spent = 0;
374
96
    ThreadCpuStopWatch cpu_time_stop_watch;
375
96
    cpu_time_stop_watch.start();
376
96
    SCOPED_ATTACH_TASK(_state);
377
96
    Defer running_defer {[&]() {
378
96
        if (_task_queue) {
379
96
            _task_queue->update_statistics(this, time_spent);
380
96
        }
381
96
        int64_t delta_cpu_time = cpu_time_stop_watch.elapsed_time();
382
96
        _task_cpu_timer->update(delta_cpu_time);
383
96
        fragment_context->get_query_ctx()->resource_ctx()->cpu_context()->update_cpu_cost_ms(
384
96
                delta_cpu_time);
385
386
        // If task is woke up early, we should terminate all operators, and this task could be closed immediately.
387
96
        if (_wake_up_early) {
388
9
            terminate();
389
9
            THROW_IF_ERROR(_root->terminate(_state));
390
9
            THROW_IF_ERROR(_sink->terminate(_state));
391
9
            _eos = true;
392
9
            *done = true;
393
87
        } else if (_eos && !_spilling &&
394
87
                   (fragment_context->is_canceled() || !_is_pending_finish())) {
395
24
            *done = true;
396
24
        }
397
96
    }};
398
96
    const auto query_id = _state->query_id();
399
    // If this task is already EOS and block is empty (which means we already output all blocks),
400
    // just return here.
401
96
    if (_eos && !_spilling) {
402
3
        return Status::OK();
403
3
    }
404
    // If this task is blocked by a spilling request and waken up immediately, the spilling
405
    // dependency will not block this task and we should just run here.
406
93
    if (!_block->empty()) {
407
0
        LOG(INFO) << "Query: " << print_id(query_id) << " has pending block, size: "
408
0
                  << PrettyPrinter::print_bytes(_block->allocated_bytes());
409
0
        DCHECK(_spilling);
410
0
    }
411
412
93
    SCOPED_TIMER(_task_profile->total_time_counter());
413
93
    SCOPED_TIMER(_exec_timer);
414
415
93
    if (!_wake_up_early) {
416
93
        RETURN_IF_ERROR(_prepare());
417
93
    }
418
93
    DBUG_EXECUTE_IF("fault_inject::PipelineXTask::execute", {
419
93
        Status status = Status::Error<INTERNAL_ERROR>("fault_inject pipeline_task execute failed");
420
93
        return status;
421
93
    });
422
    // `_wake_up_early` must be after `_wait_to_start()`
423
93
    if (_wait_to_start() || _wake_up_early) {
424
6
        return Status::OK();
425
6
    }
426
93
    RETURN_IF_ERROR(_prepare());
427
428
    // The status must be runnable
429
87
    if (!_opened && !fragment_context->is_canceled()) {
430
33
        DBUG_EXECUTE_IF("PipelineTask::execute.open_sleep", {
431
33
            auto required_pipeline_id =
432
33
                    DebugPoints::instance()->get_debug_param_or_default<int32_t>(
433
33
                            "PipelineTask::execute.open_sleep", "pipeline_id", -1);
434
33
            auto required_task_id = DebugPoints::instance()->get_debug_param_or_default<int32_t>(
435
33
                    "PipelineTask::execute.open_sleep", "task_id", -1);
436
33
            if (required_pipeline_id == pipeline_id() && required_task_id == task_id()) {
437
33
                LOG(WARNING) << "PipelineTask::execute.open_sleep sleep 5s";
438
33
                sleep(5);
439
33
            }
440
33
        });
441
442
33
        SCOPED_RAW_TIMER(&time_spent);
443
33
        RETURN_IF_ERROR(_open());
444
33
    }
445
446
839k
    while (!fragment_context->is_canceled()) {
447
839k
        SCOPED_RAW_TIMER(&time_spent);
448
839k
        Defer defer {[&]() {
449
            // If this run is pended by a spilling request, the block will be output in next run.
450
839k
            if (!_spilling) {
451
833k
                _block->clear_column_data(_root->row_desc().num_materialized_slots());
452
833k
            }
453
839k
        }};
454
        // `_wake_up_early` must be after `_is_blocked()`
455
839k
        if (_is_blocked() || _wake_up_early) {
456
48
            return Status::OK();
457
48
        }
458
459
        /// When a task is cancelled,
460
        /// its blocking state will be cleared and it will transition to a ready state (though it is not truly ready).
461
        /// Here, checking whether it is cancelled to prevent tasks in a blocking state from being re-executed.
462
839k
        if (fragment_context->is_canceled()) {
463
0
            break;
464
0
        }
465
466
839k
        if (time_spent > _exec_time_slice) {
467
9
            COUNTER_UPDATE(_yield_counts, 1);
468
9
            break;
469
9
        }
470
839k
        auto* block = _block.get();
471
472
839k
        DBUG_EXECUTE_IF("fault_inject::PipelineXTask::executing", {
473
839k
            Status status =
474
839k
                    Status::Error<INTERNAL_ERROR>("fault_inject pipeline_task executing failed");
475
839k
            return status;
476
839k
        });
477
478
        // `_sink->is_finished(_state)` means sink operator should be finished
479
839k
        if (_sink->is_finished(_state)) {
480
3
            set_wake_up_early();
481
3
            return Status::OK();
482
3
        }
483
484
        // `_dry_run` means sink operator need no more data
485
839k
        _eos = _dry_run || _eos;
486
839k
        _spilling = false;
487
839k
        auto workload_group = _state->workload_group();
488
        // If last run is pended by a spilling request, `_block` is produced with some rows in last
489
        // run, so we will resume execution using the block.
490
839k
        if (!_eos && _block->empty()) {
491
836k
            SCOPED_TIMER(_get_block_timer);
492
836k
            if (_state->low_memory_mode()) {
493
4.63k
                _sink->set_low_memory_mode(_state);
494
4.63k
                _root->set_low_memory_mode(_state);
495
4.63k
            }
496
836k
            DEFER_RELEASE_RESERVED();
497
836k
            _get_block_counter->update(1);
498
836k
            const auto reserve_size = _root->get_reserve_mem_size(_state);
499
836k
            _root->reset_reserve_mem_size(_state);
500
501
836k
            if (workload_group &&
502
836k
                _state->get_query_ctx()
503
7.82k
                        ->resource_ctx()
504
7.82k
                        ->task_controller()
505
7.82k
                        ->is_enable_reserve_memory() &&
506
836k
                reserve_size > 0) {
507
7.82k
                if (!_try_to_reserve_memory(reserve_size, _root)) {
508
3.18k
                    continue;
509
3.18k
                }
510
7.82k
            }
511
512
833k
            bool eos = false;
513
833k
            RETURN_IF_ERROR(_root->get_block_after_projects(_state, block, &eos));
514
833k
            RETURN_IF_ERROR(block->check_type_and_column());
515
833k
            _eos = eos;
516
833k
        }
517
518
836k
        if (!_block->empty() || _eos) {
519
3.18k
            SCOPED_TIMER(_sink_timer);
520
3.18k
            Status status = Status::OK();
521
3.18k
            DEFER_RELEASE_RESERVED();
522
3.18k
            COUNTER_UPDATE(_memory_reserve_times, 1);
523
3.18k
            if (_state->get_query_ctx()
524
3.18k
                        ->resource_ctx()
525
3.18k
                        ->task_controller()
526
3.18k
                        ->is_enable_reserve_memory() &&
527
3.18k
                workload_group && !(_wake_up_early || _dry_run)) {
528
3.14k
                const auto sink_reserve_size = _sink->get_reserve_mem_size(_state, _eos);
529
3.14k
                if (sink_reserve_size > 0 &&
530
3.14k
                    !_try_to_reserve_memory(sink_reserve_size, _sink.get())) {
531
3.13k
                    continue;
532
3.13k
                }
533
3.14k
            }
534
535
45
            if (_eos) {
536
27
                RETURN_IF_ERROR(close(Status::OK(), false));
537
27
            }
538
539
45
            DBUG_EXECUTE_IF("PipelineTask::execute.sink_eos_sleep", {
540
45
                auto required_pipeline_id =
541
45
                        DebugPoints::instance()->get_debug_param_or_default<int32_t>(
542
45
                                "PipelineTask::execute.sink_eos_sleep", "pipeline_id", -1);
543
45
                auto required_task_id =
544
45
                        DebugPoints::instance()->get_debug_param_or_default<int32_t>(
545
45
                                "PipelineTask::execute.sink_eos_sleep", "task_id", -1);
546
45
                if (required_pipeline_id == pipeline_id() && required_task_id == task_id()) {
547
45
                    LOG(WARNING) << "PipelineTask::execute.sink_eos_sleep sleep 10s";
548
45
                    sleep(10);
549
45
                }
550
45
            });
551
552
45
            DBUG_EXECUTE_IF("PipelineTask::execute.terminate", {
553
45
                if (_eos) {
554
45
                    auto required_pipeline_id =
555
45
                            DebugPoints::instance()->get_debug_param_or_default<int32_t>(
556
45
                                    "PipelineTask::execute.terminate", "pipeline_id", -1);
557
45
                    auto required_task_id =
558
45
                            DebugPoints::instance()->get_debug_param_or_default<int32_t>(
559
45
                                    "PipelineTask::execute.terminate", "task_id", -1);
560
45
                    auto required_fragment_id =
561
45
                            DebugPoints::instance()->get_debug_param_or_default<int32_t>(
562
45
                                    "PipelineTask::execute.terminate", "fragment_id", -1);
563
45
                    if (required_pipeline_id == pipeline_id() && required_task_id == task_id() &&
564
45
                        fragment_context->get_fragment_id() == required_fragment_id) {
565
45
                        _wake_up_early = true;
566
45
                        terminate();
567
45
                    } else if (required_pipeline_id == pipeline_id() &&
568
45
                               fragment_context->get_fragment_id() == required_fragment_id) {
569
45
                        LOG(WARNING) << "PipelineTask::execute.terminate sleep 5s";
570
45
                        sleep(5);
571
45
                    }
572
45
                }
573
45
            });
574
45
            RETURN_IF_ERROR(block->check_type_and_column());
575
45
            status = _sink->sink(_state, block, _eos);
576
577
45
            if (status.is<ErrorCode::END_OF_FILE>()) {
578
3
                set_wake_up_early();
579
3
                return Status::OK();
580
42
            } else if (!status) {
581
0
                return status;
582
0
            }
583
584
42
            if (_eos) { // just return, the scheduler will do finish work
585
24
                return Status::OK();
586
24
            }
587
42
        }
588
836k
    }
589
590
87
    RETURN_IF_ERROR(get_task_queue()->push_back(shared_from_this()));
591
9
    return Status::OK();
592
9
}
593
594
10.9k
bool PipelineTask::_try_to_reserve_memory(const size_t reserve_size, OperatorBase* op) {
595
10.9k
    auto st = thread_context()->thread_mem_tracker_mgr->try_reserve(reserve_size);
596
10.9k
    COUNTER_UPDATE(_memory_reserve_times, 1);
597
10.9k
    auto sink_revocable_mem_size = _sink->revocable_mem_size(_state);
598
10.9k
    if (st.ok() && _state->enable_force_spill() && _sink->is_spillable() &&
599
10.9k
        sink_revocable_mem_size >= vectorized::SpillStream::MIN_SPILL_WRITE_BATCH_MEM) {
600
0
        st = Status(ErrorCode::QUERY_MEMORY_EXCEEDED, "Force Spill");
601
0
    }
602
10.9k
    if (!st.ok()) {
603
10.9k
        COUNTER_UPDATE(_memory_reserve_failed_times, 1);
604
10.9k
        auto debug_msg = fmt::format(
605
10.9k
                "Query: {} , try to reserve: {}, operator name: {}, operator "
606
10.9k
                "id: {}, task id: {}, root revocable mem size: {}, sink revocable mem"
607
10.9k
                "size: {}, failed: {}",
608
10.9k
                print_id(_query_id), PrettyPrinter::print_bytes(reserve_size), op->get_name(),
609
10.9k
                op->node_id(), _state->task_id(),
610
10.9k
                PrettyPrinter::print_bytes(op->revocable_mem_size(_state)),
611
10.9k
                PrettyPrinter::print_bytes(sink_revocable_mem_size), st.to_string());
612
        // PROCESS_MEMORY_EXCEEDED error msg alread contains process_mem_log_str
613
10.9k
        if (!st.is<ErrorCode::PROCESS_MEMORY_EXCEEDED>()) {
614
10.9k
            debug_msg +=
615
10.9k
                    fmt::format(", debug info: {}", GlobalMemoryArbitrator::process_mem_log_str());
616
10.9k
        }
617
10.9k
        LOG_EVERY_N(INFO, 100) << debug_msg;
618
        // If sink has enough revocable memory, trigger revoke memory
619
10.9k
        if (sink_revocable_mem_size >= vectorized::SpillStream::MIN_SPILL_WRITE_BATCH_MEM) {
620
6.32k
            LOG(INFO) << fmt::format(
621
6.32k
                    "Query: {} sink: {}, node id: {}, task id: "
622
6.32k
                    "{}, revocable mem size: {}",
623
6.32k
                    print_id(_query_id), _sink->get_name(), _sink->node_id(), _state->task_id(),
624
6.32k
                    PrettyPrinter::print_bytes(sink_revocable_mem_size));
625
6.32k
            ExecEnv::GetInstance()->workload_group_mgr()->add_paused_query(
626
6.32k
                    _state->get_query_ctx()->resource_ctx()->shared_from_this(), reserve_size, st);
627
6.32k
            _spilling = true;
628
6.32k
            return false;
629
6.32k
        } else {
630
            // If reserve failed, not add this query to paused list, because it is very small, will not
631
            // consume a lot of memory. But need set low memory mode to indicate that the system should
632
            // not use too much memory.
633
4.64k
            _state->get_query_ctx()->set_low_memory_mode();
634
4.64k
        }
635
10.9k
    }
636
4.64k
    return true;
637
10.9k
}
638
639
459k
void PipelineTask::stop_if_finished() {
640
459k
    auto fragment = _fragment_context.lock();
641
459k
    if (!fragment) {
642
0
        return;
643
0
    }
644
459k
    SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(fragment->get_query_ctx()->query_mem_tracker());
645
459k
    if (auto sink = _sink) {
646
459k
        if (sink->is_finished(_state)) {
647
3
            set_wake_up_early();
648
3
            terminate();
649
3
        }
650
459k
    }
651
459k
}
652
653
3
Status PipelineTask::finalize() {
654
3
    auto fragment = _fragment_context.lock();
655
3
    if (!fragment) {
656
0
        return Status::OK();
657
0
    }
658
3
    SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(fragment->get_query_ctx()->query_mem_tracker());
659
3
    std::unique_lock<std::mutex> lc(_dependency_lock);
660
3
    RETURN_IF_ERROR(_state_transition(State::FINALIZED));
661
3
    _sink_shared_state.reset();
662
3
    _op_shared_states.clear();
663
3
    _shared_state_map.clear();
664
3
    _block.reset();
665
3
    _operators.clear();
666
3
    _sink.reset();
667
3
    _pipeline.reset();
668
3
    return Status::OK();
669
3
}
670
671
45
Status PipelineTask::close(Status exec_status, bool close_sink) {
672
45
    int64_t close_ns = 0;
673
45
    Status s;
674
45
    {
675
45
        SCOPED_RAW_TIMER(&close_ns);
676
45
        if (close_sink) {
677
18
            s = _sink->close(_state, exec_status);
678
18
        }
679
57
        for (auto& op : _operators) {
680
57
            auto tem = op->close(_state);
681
57
            if (!tem.ok() && s.ok()) {
682
0
                s = tem;
683
0
            }
684
57
        }
685
45
    }
686
45
    if (_opened) {
687
45
        COUNTER_UPDATE(_close_timer, close_ns);
688
45
        COUNTER_UPDATE(_task_profile->total_time_counter(), close_ns);
689
45
    }
690
691
45
    if (close_sink && _opened) {
692
18
        _task_profile->add_info_string("WakeUpEarly", std::to_string(_wake_up_early.load()));
693
18
        _fresh_profile_counter();
694
18
    }
695
696
45
    if (_task_queue) {
697
45
        _task_queue->update_statistics(this, close_ns);
698
45
    }
699
45
    if (close_sink) {
700
18
        RETURN_IF_ERROR(_state_transition(State::FINISHED));
701
18
    }
702
45
    return s;
703
45
}
704
705
59.3k
std::string PipelineTask::debug_string() {
706
59.3k
    fmt::memory_buffer debug_string_buffer;
707
708
59.3k
    fmt::format_to(debug_string_buffer, "QueryId: {}\n", print_id(_query_id));
709
59.3k
    fmt::format_to(debug_string_buffer, "InstanceId: {}\n",
710
59.3k
                   print_id(_state->fragment_instance_id()));
711
712
59.3k
    fmt::format_to(debug_string_buffer,
713
59.3k
                   "PipelineTask[id = {}, open = {}, eos = {}, state = {}, dry run = "
714
59.3k
                   "{}, _wake_up_early = {}, time elapsed since last state changing = {}s, spilling"
715
59.3k
                   " = {}, is running = {}]",
716
59.3k
                   _index, _opened, _eos, _to_string(_exec_state), _dry_run, _wake_up_early.load(),
717
59.3k
                   _state_change_watcher.elapsed_time() / NANOS_PER_SEC, _spilling, is_running());
718
59.3k
    std::unique_lock<std::mutex> lc(_dependency_lock);
719
59.3k
    auto* cur_blocked_dep = _blocked_dep;
720
59.3k
    auto fragment = _fragment_context.lock();
721
59.3k
    if (is_finalized() || !fragment) {
722
15
        fmt::format_to(debug_string_buffer, " pipeline name = {}", _pipeline_name);
723
15
        return fmt::to_string(debug_string_buffer);
724
15
    }
725
59.3k
    auto elapsed = fragment->elapsed_time() / NANOS_PER_SEC;
726
59.3k
    fmt::format_to(debug_string_buffer,
727
59.3k
                   " elapse time = {}s, block dependency = [{}]\noperators: ", elapsed,
728
59.3k
                   cur_blocked_dep && !is_finalized() ? cur_blocked_dep->debug_string() : "NULL");
729
730
118k
    for (size_t i = 0; i < _operators.size(); i++) {
731
59.3k
        fmt::format_to(debug_string_buffer, "\n{}",
732
59.3k
                       _opened && !is_finalized()
733
59.3k
                               ? _operators[i]->debug_string(_state, cast_set<int>(i))
734
59.3k
                               : _operators[i]->debug_string(cast_set<int>(i)));
735
59.3k
    }
736
59.3k
    fmt::format_to(debug_string_buffer, "\n{}\n",
737
59.3k
                   _opened && !is_finalized()
738
59.3k
                           ? _sink->debug_string(_state, cast_set<int>(_operators.size()))
739
59.3k
                           : _sink->debug_string(cast_set<int>(_operators.size())));
740
741
59.3k
    fmt::format_to(debug_string_buffer, "\nRead Dependency Information: \n");
742
743
59.3k
    size_t i = 0;
744
118k
    for (; i < _read_dependencies.size(); i++) {
745
118k
        for (size_t j = 0; j < _read_dependencies[i].size(); j++) {
746
59.3k
            fmt::format_to(debug_string_buffer, "{}. {}\n", i,
747
59.3k
                           _read_dependencies[i][j]->debug_string(cast_set<int>(i) + 1));
748
59.3k
        }
749
59.3k
    }
750
751
59.3k
    fmt::format_to(debug_string_buffer, "{}. {}\n", i,
752
59.3k
                   _memory_sufficient_dependency->debug_string(cast_set<int>(i++)));
753
754
59.3k
    fmt::format_to(debug_string_buffer, "\nWrite Dependency Information: \n");
755
118k
    for (size_t j = 0; j < _write_dependencies.size(); j++, i++) {
756
59.3k
        fmt::format_to(debug_string_buffer, "{}. {}\n", i,
757
59.3k
                       _write_dependencies[j]->debug_string(cast_set<int>(j) + 1));
758
59.3k
    }
759
760
59.3k
    fmt::format_to(debug_string_buffer, "\nExecution Dependency Information: \n");
761
178k
    for (size_t j = 0; j < _execution_dependencies.size(); j++, i++) {
762
118k
        fmt::format_to(debug_string_buffer, "{}. {}\n", i,
763
118k
                       _execution_dependencies[j]->debug_string(cast_set<int>(i) + 1));
764
118k
    }
765
766
59.3k
    fmt::format_to(debug_string_buffer, "\nSpill Dependency Information: \n");
767
178k
    for (size_t j = 0; j < _spill_dependencies.size(); j++, i++) {
768
118k
        fmt::format_to(debug_string_buffer, "{}. {}\n", i,
769
118k
                       _spill_dependencies[j]->debug_string(cast_set<int>(i) + 1));
770
118k
    }
771
772
59.3k
    fmt::format_to(debug_string_buffer, "Finish Dependency Information: \n");
773
178k
    for (size_t j = 0; j < _finish_dependencies.size(); j++, i++) {
774
118k
        fmt::format_to(debug_string_buffer, "{}. {}\n", i,
775
118k
                       _finish_dependencies[j]->debug_string(cast_set<int>(i) + 1));
776
118k
    }
777
59.3k
    return fmt::to_string(debug_string_buffer);
778
59.3k
}
779
780
0
size_t PipelineTask::get_revocable_size() const {
781
0
    if (is_finalized() || _running || (_eos && !_spilling)) {
782
0
        return 0;
783
0
    }
784
785
0
    return _sink->revocable_mem_size(_state);
786
0
}
787
788
0
Status PipelineTask::revoke_memory(const std::shared_ptr<SpillContext>& spill_context) {
789
0
    if (is_finalized()) {
790
0
        if (spill_context) {
791
0
            spill_context->on_task_finished();
792
0
            VLOG_DEBUG << "Query: " << print_id(_state->query_id()) << ", task: " << ((void*)this)
793
0
                       << " finalized";
794
0
        }
795
0
        return Status::OK();
796
0
    }
797
798
0
    const auto revocable_size = _sink->revocable_mem_size(_state);
799
0
    if (revocable_size >= vectorized::SpillStream::MIN_SPILL_WRITE_BATCH_MEM) {
800
0
        RETURN_IF_ERROR(_sink->revoke_memory(_state, spill_context));
801
0
    } else if (spill_context) {
802
0
        spill_context->on_task_finished();
803
0
        LOG(INFO) << "Query: " << print_id(_state->query_id()) << ", task: " << ((void*)this)
804
0
                  << " has not enough data to revoke: " << revocable_size;
805
0
    }
806
0
    return Status::OK();
807
0
}
808
809
75
Status PipelineTask::wake_up(Dependency* dep) {
810
    // call by dependency
811
75
    DCHECK_EQ(_blocked_dep, dep) << "dep : " << dep->debug_string(0) << "task: " << debug_string();
812
75
    _blocked_dep = nullptr;
813
75
    auto holder = std::dynamic_pointer_cast<PipelineTask>(shared_from_this());
814
75
    RETURN_IF_ERROR(_state_transition(PipelineTask::State::RUNNABLE));
815
75
    RETURN_IF_ERROR(get_task_queue()->push_back(holder));
816
75
    return Status::OK();
817
75
}
818
819
288
Status PipelineTask::_state_transition(State new_state) {
820
288
    if (_exec_state != new_state) {
821
273
        _state_change_watcher.reset();
822
273
        _state_change_watcher.start();
823
273
    }
824
288
    _task_profile->add_info_string("TaskState", _to_string(new_state));
825
288
    _task_profile->add_info_string("BlockedByDependency", _blocked_dep ? _blocked_dep->name() : "");
826
288
    if (!LEGAL_STATE_TRANSITION[(int)new_state].contains(_exec_state)) {
827
51
        return Status::InternalError(
828
51
                "Task state transition from {} to {} is not allowed! Task info: {}",
829
51
                _to_string(_exec_state), _to_string(new_state), debug_string());
830
51
    }
831
237
    _exec_state = new_state;
832
237
    return Status::OK();
833
288
}
834
835
#include "common/compile_check_end.h"
836
} // namespace doris::pipeline