Coverage Report

Created: 2025-07-24 22:14

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/root/doris/be/src/runtime/query_context.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "runtime/query_context.h"
19
20
#include <fmt/core.h>
21
#include <gen_cpp/FrontendService_types.h>
22
#include <gen_cpp/RuntimeProfile_types.h>
23
#include <gen_cpp/Types_types.h>
24
#include <glog/logging.h>
25
26
#include <algorithm>
27
#include <exception>
28
#include <memory>
29
#include <mutex>
30
#include <utility>
31
#include <vector>
32
33
#include "common/logging.h"
34
#include "common/status.h"
35
#include "olap/olap_common.h"
36
#include "pipeline/dependency.h"
37
#include "pipeline/pipeline_fragment_context.h"
38
#include "runtime/exec_env.h"
39
#include "runtime/fragment_mgr.h"
40
#include "runtime/memory/heap_profiler.h"
41
#include "runtime/runtime_query_statistics_mgr.h"
42
#include "runtime/runtime_state.h"
43
#include "runtime/thread_context.h"
44
#include "runtime/workload_group/workload_group_manager.h"
45
#include "runtime/workload_management/query_task_controller.h"
46
#include "runtime_filter/runtime_filter_definitions.h"
47
#include "util/mem_info.h"
48
#include "util/uid_util.h"
49
#include "vec/spill/spill_stream_manager.h"
50
51
namespace doris {
52
53
class DelayReleaseToken : public Runnable {
54
    ENABLE_FACTORY_CREATOR(DelayReleaseToken);
55
56
public:
57
0
    DelayReleaseToken(std::unique_ptr<ThreadPoolToken>&& token) { token_ = std::move(token); }
58
    ~DelayReleaseToken() override = default;
59
0
    void run() override {}
60
    std::unique_ptr<ThreadPoolToken> token_;
61
};
62
63
0
const std::string toString(QuerySource queryType) {
64
0
    switch (queryType) {
65
0
    case QuerySource::INTERNAL_FRONTEND:
66
0
        return "INTERNAL_FRONTEND";
67
0
    case QuerySource::STREAM_LOAD:
68
0
        return "STREAM_LOAD";
69
0
    case QuerySource::GROUP_COMMIT_LOAD:
70
0
        return "EXTERNAL_QUERY";
71
0
    case QuerySource::ROUTINE_LOAD:
72
0
        return "ROUTINE_LOAD";
73
0
    case QuerySource::EXTERNAL_CONNECTOR:
74
0
        return "EXTERNAL_CONNECTOR";
75
0
    default:
76
0
        return "UNKNOWN";
77
0
    }
78
0
}
79
80
std::shared_ptr<QueryContext> QueryContext::create(TUniqueId query_id, ExecEnv* exec_env,
81
                                                   const TQueryOptions& query_options,
82
                                                   TNetworkAddress coord_addr, bool is_nereids,
83
                                                   TNetworkAddress current_connect_fe,
84
101
                                                   QuerySource query_type) {
85
101
    auto ctx = QueryContext::create_shared(query_id, exec_env, query_options, coord_addr,
86
101
                                           is_nereids, current_connect_fe, query_type);
87
101
    ctx->init_query_task_controller();
88
101
    return ctx;
89
101
}
90
91
QueryContext::QueryContext(TUniqueId query_id, ExecEnv* exec_env,
92
                           const TQueryOptions& query_options, TNetworkAddress coord_addr,
93
                           bool is_nereids, TNetworkAddress current_connect_fe,
94
                           QuerySource query_source)
95
338k
        : _timeout_second(-1),
96
338k
          _query_id(std::move(query_id)),
97
338k
          _exec_env(exec_env),
98
338k
          _is_nereids(is_nereids),
99
338k
          _query_options(query_options),
100
338k
          _query_source(query_source) {
101
338k
    _init_resource_context();
102
338k
    SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(query_mem_tracker());
103
338k
    _query_watcher.start();
104
338k
    _execution_dependency =
105
338k
            pipeline::Dependency::create_unique(-1, -1, "ExecutionDependency", false);
106
338k
    _memory_sufficient_dependency =
107
338k
            pipeline::Dependency::create_unique(-1, -1, "MemorySufficientDependency", true);
108
109
338k
    _runtime_filter_mgr = std::make_unique<RuntimeFilterMgr>(true);
110
111
338k
    _timeout_second = query_options.execution_timeout;
112
113
338k
    bool is_query_type_valid = query_options.query_type == TQueryType::SELECT ||
114
338k
                               query_options.query_type == TQueryType::LOAD ||
115
338k
                               query_options.query_type == TQueryType::EXTERNAL;
116
338k
    DCHECK_EQ(is_query_type_valid, true);
117
118
338k
    this->coord_addr = coord_addr;
119
    // current_connect_fe is used for report query statistics
120
338k
    this->current_connect_fe = current_connect_fe;
121
    // external query has no current_connect_fe
122
338k
    if (query_options.query_type != TQueryType::EXTERNAL) {
123
191
        bool is_report_fe_addr_valid =
124
191
                !this->current_connect_fe.hostname.empty() && this->current_connect_fe.port != 0;
125
191
        DCHECK_EQ(is_report_fe_addr_valid, true);
126
191
    }
127
338k
    clock_gettime(CLOCK_MONOTONIC, &this->_query_arrival_timestamp);
128
338k
    DorisMetrics::instance()->query_ctx_cnt->increment(1);
129
338k
}
130
131
338k
void QueryContext::_init_query_mem_tracker() {
132
338k
    bool has_query_mem_limit = _query_options.__isset.mem_limit && (_query_options.mem_limit > 0);
133
338k
    int64_t bytes_limit = has_query_mem_limit ? _query_options.mem_limit : -1;
134
338k
    if (bytes_limit > MemInfo::mem_limit() || bytes_limit == -1) {
135
0
        VLOG_NOTICE << "Query memory limit " << PrettyPrinter::print(bytes_limit, TUnit::BYTES)
136
0
                    << " exceeds process memory limit of "
137
0
                    << PrettyPrinter::print(MemInfo::mem_limit(), TUnit::BYTES)
138
0
                    << " OR is -1. Using process memory limit instead.";
139
0
        bytes_limit = MemInfo::mem_limit();
140
0
    }
141
    // If the query is a pure load task(streamload, routine load, group commit), then it should not use
142
    // memlimit per query to limit their memory usage.
143
338k
    if (is_pure_load_task()) {
144
338k
        bytes_limit = MemInfo::mem_limit();
145
338k
    }
146
338k
    std::shared_ptr<MemTrackerLimiter> query_mem_tracker;
147
338k
    if (_query_options.query_type == TQueryType::SELECT) {
148
188
        query_mem_tracker = MemTrackerLimiter::create_shared(
149
188
                MemTrackerLimiter::Type::QUERY, fmt::format("Query#Id={}", print_id(_query_id)),
150
188
                bytes_limit);
151
338k
    } else if (_query_options.query_type == TQueryType::LOAD) {
152
3
        query_mem_tracker = MemTrackerLimiter::create_shared(
153
3
                MemTrackerLimiter::Type::LOAD, fmt::format("Load#Id={}", print_id(_query_id)),
154
3
                bytes_limit);
155
338k
    } else if (_query_options.query_type == TQueryType::EXTERNAL) { // spark/flink/etc..
156
338k
        query_mem_tracker = MemTrackerLimiter::create_shared(
157
338k
                MemTrackerLimiter::Type::QUERY, fmt::format("External#Id={}", print_id(_query_id)),
158
338k
                bytes_limit);
159
338k
    } else {
160
0
        LOG(FATAL) << "__builtin_unreachable";
161
0
        __builtin_unreachable();
162
0
    }
163
338k
    if (_query_options.__isset.is_report_success && _query_options.is_report_success) {
164
0
        query_mem_tracker->enable_print_log_usage();
165
0
    }
166
167
    // If enable reserve memory, not enable check limit, because reserve memory will check it.
168
    // If reserve enabled, even if the reserved memory size is smaller than the actual requested memory,
169
    // and the query memory consumption is larger than the limit, we do not expect the query to fail
170
    // after `check_limit` returns an error, but to run as long as possible,
171
    // and will enter the paused state and try to spill when the query reserves next time.
172
    // If the workload group or process runs out of memory, it will be forced to cancel.
173
338k
    query_mem_tracker->set_enable_check_limit(!(_query_options.__isset.enable_reserve_memory &&
174
338k
                                                _query_options.enable_reserve_memory));
175
338k
    _resource_ctx->memory_context()->set_mem_tracker(query_mem_tracker);
176
338k
}
177
178
338k
void QueryContext::_init_resource_context() {
179
338k
    _resource_ctx = ResourceContext::create_shared();
180
338k
    _init_query_mem_tracker();
181
338k
}
182
183
338k
void QueryContext::init_query_task_controller() {
184
338k
    _resource_ctx->set_task_controller(QueryTaskController::create(this));
185
338k
    _resource_ctx->task_controller()->set_task_id(_query_id);
186
338k
    _resource_ctx->task_controller()->set_fe_addr(current_connect_fe);
187
338k
    _resource_ctx->task_controller()->set_query_type(_query_options.query_type);
188
#ifndef BE_TEST
189
    _exec_env->runtime_query_statistics_mgr()->register_resource_context(print_id(_query_id),
190
                                                                         _resource_ctx);
191
#endif
192
338k
}
193
194
338k
QueryContext::~QueryContext() {
195
338k
    SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(query_mem_tracker());
196
    // query mem tracker consumption is equal to 0, it means that after QueryContext is created,
197
    // it is found that query already exists in _query_ctx_map, and query mem tracker is not used.
198
    // query mem tracker consumption is not equal to 0 after use, because there is memory consumed
199
    // on query mem tracker, released on other trackers.
200
338k
    std::string mem_tracker_msg;
201
338k
    if (query_mem_tracker()->peak_consumption() != 0) {
202
48
        mem_tracker_msg = fmt::format(
203
48
                "deregister query/load memory tracker, queryId={}, Limit={}, CurrUsed={}, "
204
48
                "PeakUsed={}",
205
48
                print_id(_query_id), PrettyPrinter::print_bytes(query_mem_tracker()->limit()),
206
48
                PrettyPrinter::print_bytes(query_mem_tracker()->consumption()),
207
48
                PrettyPrinter::print_bytes(query_mem_tracker()->peak_consumption()));
208
48
    }
209
338k
    [[maybe_unused]] uint64_t group_id = 0;
210
338k
    if (workload_group()) {
211
22
        group_id = workload_group()->id(); // before remove
212
22
    }
213
214
338k
    _resource_ctx->task_controller()->finish();
215
216
338k
    if (enable_profile()) {
217
0
        _report_query_profile();
218
0
    }
219
220
#ifndef BE_TEST
221
    if (ExecEnv::GetInstance()->pipeline_tracer_context()->enabled()) [[unlikely]] {
222
        try {
223
            ExecEnv::GetInstance()->pipeline_tracer_context()->end_query(_query_id, group_id);
224
        } catch (std::exception& e) {
225
            LOG(WARNING) << "Dump trace log failed bacause " << e.what();
226
        }
227
    }
228
#endif
229
338k
    _runtime_filter_mgr.reset();
230
338k
    _execution_dependency.reset();
231
338k
    _runtime_predicates.clear();
232
338k
    file_scan_range_params_map.clear();
233
338k
    obj_pool.clear();
234
338k
    _merge_controller_handler.reset();
235
236
#ifndef BE_TEST
237
    _exec_env->spill_stream_mgr()->async_cleanup_query(_query_id);
238
#endif
239
338k
    DorisMetrics::instance()->query_ctx_cnt->increment(-1);
240
    // the only one msg shows query's end. any other msg should append to it if need.
241
338k
    LOG_INFO("Query {} deconstructed, mem_tracker: {}", print_id(this->_query_id), mem_tracker_msg);
242
338k
}
243
244
18
void QueryContext::set_ready_to_execute(Status reason) {
245
18
    set_execution_dependency_ready();
246
18
    _exec_status.update(reason);
247
18
}
248
249
0
void QueryContext::set_ready_to_execute_only() {
250
0
    set_execution_dependency_ready();
251
0
}
252
253
18
void QueryContext::set_execution_dependency_ready() {
254
18
    _execution_dependency->set_ready();
255
18
}
256
257
18
void QueryContext::set_memory_sufficient(bool sufficient) {
258
18
    if (sufficient) {
259
8
        {
260
8
            _memory_sufficient_dependency->set_ready();
261
8
            _resource_ctx->task_controller()->reset_paused_reason();
262
8
        }
263
10
    } else {
264
10
        _memory_sufficient_dependency->block();
265
10
        _resource_ctx->task_controller()->add_paused_count();
266
10
    }
267
18
}
268
269
18
void QueryContext::cancel(Status new_status, int fragment_id) {
270
18
    if (!_exec_status.update(new_status)) {
271
0
        return;
272
0
    }
273
    // Tasks should be always runnable.
274
18
    _execution_dependency->set_always_ready();
275
18
    _memory_sufficient_dependency->set_always_ready();
276
18
    if ((new_status.is<ErrorCode::MEM_LIMIT_EXCEEDED>() ||
277
18
         new_status.is<ErrorCode::MEM_ALLOC_FAILED>()) &&
278
18
        _query_options.__isset.dump_heap_profile_when_mem_limit_exceeded &&
279
18
        _query_options.dump_heap_profile_when_mem_limit_exceeded) {
280
        // if query is cancelled because of query mem limit exceeded, dump heap profile
281
        // at the time of cancellation can get the most accurate memory usage for problem analysis
282
0
        auto wg = workload_group();
283
0
        auto log_str = fmt::format(
284
0
                "Query {} canceled because of memory limit exceeded, dumping memory "
285
0
                "detail profiles. wg: {}. {}",
286
0
                print_id(_query_id), wg ? wg->debug_string() : "null",
287
0
                doris::ProcessProfile::instance()->memory_profile()->process_memory_detail_str());
288
0
        LOG_LONG_STRING(INFO, log_str);
289
0
        std::string dot = HeapProfiler::instance()->dump_heap_profile_to_dot();
290
0
        if (!dot.empty()) {
291
0
            dot += "\n-------------------------------------------------------\n";
292
0
            dot += "Copy the text after `digraph` in the above output to "
293
0
                   "http://www.webgraphviz.com to generate a dot graph.\n"
294
0
                   "after start heap profiler, if there is no operation, will print `No nodes "
295
0
                   "to "
296
0
                   "print`."
297
0
                   "If there are many errors: `addr2line: Dwarf Error`,"
298
0
                   "or other FAQ, reference doc: "
299
0
                   "https://doris.apache.org/community/developer-guide/debug-tool/#4-qa\n";
300
0
            auto log_str =
301
0
                    fmt::format("Query {}, dump heap profile to dot: {}", print_id(_query_id), dot);
302
0
            LOG_LONG_STRING(INFO, log_str);
303
0
        }
304
0
    }
305
306
18
    set_ready_to_execute(new_status);
307
18
    cancel_all_pipeline_context(new_status, fragment_id);
308
18
}
309
310
0
void QueryContext::set_load_error_url(std::string error_url) {
311
0
    std::lock_guard<std::mutex> lock(_error_url_lock);
312
0
    _load_error_url = error_url;
313
0
}
314
315
0
std::string QueryContext::get_load_error_url() {
316
0
    std::lock_guard<std::mutex> lock(_error_url_lock);
317
0
    return _load_error_url;
318
0
}
319
320
18
void QueryContext::cancel_all_pipeline_context(const Status& reason, int fragment_id) {
321
18
    std::vector<std::weak_ptr<pipeline::PipelineFragmentContext>> ctx_to_cancel;
322
18
    {
323
18
        std::lock_guard<std::mutex> lock(_pipeline_map_write_lock);
324
18
        for (auto& [f_id, f_context] : _fragment_id_to_pipeline_ctx) {
325
0
            if (fragment_id == f_id) {
326
0
                continue;
327
0
            }
328
0
            ctx_to_cancel.push_back(f_context);
329
0
        }
330
18
    }
331
18
    for (auto& f_context : ctx_to_cancel) {
332
0
        if (auto pipeline_ctx = f_context.lock()) {
333
0
            pipeline_ctx->cancel(reason);
334
0
        }
335
0
    }
336
18
}
337
338
0
std::string QueryContext::print_all_pipeline_context() {
339
0
    std::vector<std::weak_ptr<pipeline::PipelineFragmentContext>> ctx_to_print;
340
0
    fmt::memory_buffer debug_string_buffer;
341
0
    size_t i = 0;
342
0
    {
343
0
        fmt::format_to(debug_string_buffer, "{} pipeline fragment contexts in query {}. \n",
344
0
                       _fragment_id_to_pipeline_ctx.size(), print_id(_query_id));
345
346
0
        {
347
0
            std::lock_guard<std::mutex> lock(_pipeline_map_write_lock);
348
0
            for (auto& [f_id, f_context] : _fragment_id_to_pipeline_ctx) {
349
0
                ctx_to_print.push_back(f_context);
350
0
            }
351
0
        }
352
0
        for (auto& f_context : ctx_to_print) {
353
0
            if (auto pipeline_ctx = f_context.lock()) {
354
0
                auto elapsed = pipeline_ctx->elapsed_time() / 1000000000.0;
355
0
                fmt::format_to(debug_string_buffer,
356
0
                               "No.{} (elapse_second={}s, fragment_id={}) : {}\n", i, elapsed,
357
0
                               pipeline_ctx->get_fragment_id(), pipeline_ctx->debug_string());
358
0
                i++;
359
0
            }
360
0
        }
361
0
    }
362
0
    return fmt::to_string(debug_string_buffer);
363
0
}
364
365
void QueryContext::set_pipeline_context(
366
0
        const int fragment_id, std::shared_ptr<pipeline::PipelineFragmentContext> pip_ctx) {
367
0
    std::lock_guard<std::mutex> lock(_pipeline_map_write_lock);
368
0
    _fragment_id_to_pipeline_ctx.insert({fragment_id, pip_ctx});
369
0
}
370
371
0
doris::pipeline::TaskScheduler* QueryContext::get_pipe_exec_scheduler() {
372
0
    if (!_task_scheduler) {
373
0
        throw Exception(Status::InternalError("task_scheduler is null"));
374
0
    }
375
0
    return _task_scheduler;
376
0
}
377
378
22
Status QueryContext::set_workload_group(WorkloadGroupPtr& wg) {
379
22
    _resource_ctx->set_workload_group(wg);
380
    // Should add query first, the workload group will not be deleted,
381
    // then visit workload group's resource
382
    // see task_group_manager::delete_workload_group_by_ids
383
22
    RETURN_IF_ERROR(workload_group()->add_resource_ctx(_query_id, _resource_ctx));
384
385
22
    workload_group()->get_query_scheduler(&_task_scheduler, &_scan_task_scheduler,
386
22
                                          &_remote_scan_task_scheduler);
387
22
    return Status::OK();
388
22
}
389
390
void QueryContext::add_fragment_profile(
391
        int fragment_id, const std::vector<std::shared_ptr<TRuntimeProfileTree>>& pipeline_profiles,
392
0
        std::shared_ptr<TRuntimeProfileTree> load_channel_profile) {
393
0
    if (pipeline_profiles.empty()) {
394
0
        std::string msg = fmt::format("Add pipeline profile failed, query {}, fragment {}",
395
0
                                      print_id(this->_query_id), fragment_id);
396
0
        LOG_ERROR(msg);
397
0
        DCHECK(false) << msg;
398
0
        return;
399
0
    }
400
401
0
#ifndef NDEBUG
402
0
    for (const auto& p : pipeline_profiles) {
403
0
        DCHECK(p != nullptr) << fmt::format("Add pipeline profile failed, query {}, fragment {}",
404
0
                                            print_id(this->_query_id), fragment_id);
405
0
    }
406
0
#endif
407
408
0
    std::lock_guard<std::mutex> l(_profile_mutex);
409
0
    VLOG_ROW << fmt::format(
410
0
            "Query add fragment profile, query {}, fragment {}, pipeline profile count {} ",
411
0
            print_id(this->_query_id), fragment_id, pipeline_profiles.size());
412
413
0
    _profile_map.insert(std::make_pair(fragment_id, pipeline_profiles));
414
415
0
    if (load_channel_profile != nullptr) {
416
0
        _load_channel_profile_map.insert(std::make_pair(fragment_id, load_channel_profile));
417
0
    }
418
0
}
419
420
0
void QueryContext::_report_query_profile() {
421
0
    std::lock_guard<std::mutex> lg(_profile_mutex);
422
423
0
    for (auto& [fragment_id, fragment_profile] : _profile_map) {
424
0
        std::shared_ptr<TRuntimeProfileTree> load_channel_profile = nullptr;
425
426
0
        if (_load_channel_profile_map.contains(fragment_id)) {
427
0
            load_channel_profile = _load_channel_profile_map[fragment_id];
428
0
        }
429
430
0
        ExecEnv::GetInstance()->runtime_query_statistics_mgr()->register_fragment_profile(
431
0
                _query_id, this->coord_addr, fragment_id, fragment_profile, load_channel_profile);
432
0
    }
433
434
0
    ExecEnv::GetInstance()->runtime_query_statistics_mgr()->trigger_profile_reporting();
435
0
}
436
437
std::unordered_map<int, std::vector<std::shared_ptr<TRuntimeProfileTree>>>
438
0
QueryContext::_collect_realtime_query_profile() {
439
0
    std::unordered_map<int, std::vector<std::shared_ptr<TRuntimeProfileTree>>> res;
440
0
    std::lock_guard<std::mutex> lock(_pipeline_map_write_lock);
441
0
    for (const auto& [fragment_id, fragment_ctx_wptr] : _fragment_id_to_pipeline_ctx) {
442
0
        if (auto fragment_ctx = fragment_ctx_wptr.lock()) {
443
0
            if (fragment_ctx == nullptr) {
444
0
                std::string msg =
445
0
                        fmt::format("PipelineFragmentContext is nullptr, query {} fragment_id: {}",
446
0
                                    print_id(_query_id), fragment_id);
447
0
                LOG_ERROR(msg);
448
0
                DCHECK(false) << msg;
449
0
                continue;
450
0
            }
451
452
0
            auto profile = fragment_ctx->collect_realtime_profile();
453
454
0
            if (profile.empty()) {
455
0
                std::string err_msg = fmt::format(
456
0
                        "Get nothing when collecting profile, query {}, fragment_id: {}",
457
0
                        print_id(_query_id), fragment_id);
458
0
                LOG_ERROR(err_msg);
459
0
                DCHECK(false) << err_msg;
460
0
                continue;
461
0
            }
462
463
0
            res.insert(std::make_pair(fragment_id, profile));
464
0
        }
465
0
    }
466
467
0
    return res;
468
0
}
469
470
0
TReportExecStatusParams QueryContext::get_realtime_exec_status() {
471
0
    TReportExecStatusParams exec_status;
472
473
0
    auto realtime_query_profile = _collect_realtime_query_profile();
474
0
    std::vector<std::shared_ptr<TRuntimeProfileTree>> load_channel_profiles;
475
476
0
    for (auto load_channel_profile : _load_channel_profile_map) {
477
0
        if (load_channel_profile.second != nullptr) {
478
0
            load_channel_profiles.push_back(load_channel_profile.second);
479
0
        }
480
0
    }
481
482
0
    exec_status = RuntimeQueryStatisticsMgr::create_report_exec_status_params(
483
0
            this->_query_id, std::move(realtime_query_profile), std::move(load_channel_profiles),
484
0
            /*is_done=*/false);
485
486
0
    return exec_status;
487
0
}
488
489
} // namespace doris