/root/doris/be/src/runtime/query_context.cpp

Source (jump to first uncovered line)
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.

#include "runtime/query_context.h"

#include <fmt/core.h>
#include <gen_cpp/FrontendService_types.h>
#include <gen_cpp/RuntimeProfile_types.h>
#include <gen_cpp/Types_types.h>
#include <glog/logging.h>

#include <algorithm>
#include <exception>
#include <memory>
#include <mutex>
#include <utility>
#include <vector>

#include "common/logging.h"
#include "common/status.h"
#include "olap/olap_common.h"
#include "pipeline/dependency.h"
#include "pipeline/pipeline_fragment_context.h"
#include "runtime/exec_env.h"
#include "runtime/fragment_mgr.h"
#include "runtime/memory/heap_profiler.h"
#include "runtime/runtime_query_statistics_mgr.h"
#include "runtime/runtime_state.h"
#include "runtime/thread_context.h"
#include "runtime/workload_group/workload_group_manager.h"
#include "runtime/workload_management/query_task_controller.h"
#include "runtime_filter/runtime_filter_definitions.h"
#include "util/mem_info.h"
#include "util/uid_util.h"
#include "vec/spill/spill_stream_manager.h"

namespace doris {

class DelayReleaseToken : public Runnable {
    ENABLE_FACTORY_CREATOR(DelayReleaseToken);

public:
    DelayReleaseToken(std::unique_ptr<ThreadPoolToken>&& token) { token_ = std::move(token); }
    ~DelayReleaseToken() override = default;
    void run() override {}
    std::unique_ptr<ThreadPoolToken> token_;
};

const std::string toString(QuerySource queryType) {
    switch (queryType) {
    case QuerySource::INTERNAL_FRONTEND:
        return "INTERNAL_FRONTEND";
    case QuerySource::STREAM_LOAD:
        return "STREAM_LOAD";
    case QuerySource::GROUP_COMMIT_LOAD:
        return "EXTERNAL_QUERY";
    case QuerySource::ROUTINE_LOAD:
        return "ROUTINE_LOAD";
    case QuerySource::EXTERNAL_CONNECTOR:
        return "EXTERNAL_CONNECTOR";
    default:
        return "UNKNOWN";
    }
}

std::shared_ptr<QueryContext> QueryContext::create(TUniqueId query_id, ExecEnv* exec_env,
                                                   const TQueryOptions& query_options,
                                                   TNetworkAddress coord_addr, bool is_nereids,
                                                   TNetworkAddress current_connect_fe,
                                                   QuerySource query_type) {
    auto ctx = QueryContext::create_shared(query_id, exec_env, query_options, coord_addr,
                                           is_nereids, current_connect_fe, query_type);
    ctx->init_query_task_controller();
    return ctx;
}

QueryContext::QueryContext(TUniqueId query_id, ExecEnv* exec_env,
                           const TQueryOptions& query_options, TNetworkAddress coord_addr,
                           bool is_nereids, TNetworkAddress current_connect_fe,
                           QuerySource query_source)
        : _timeout_second(-1),
          _query_id(std::move(query_id)),
          _exec_env(exec_env),
          _is_nereids(is_nereids),
          _query_options(query_options),
          _query_source(query_source) {
    _init_resource_context();
    SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(query_mem_tracker());
    _query_watcher.start();
    _execution_dependency =
            pipeline::Dependency::create_unique(-1, -1, "ExecutionDependency", false);
    _memory_sufficient_dependency =
            pipeline::Dependency::create_unique(-1, -1, "MemorySufficientDependency", true);

    _runtime_filter_mgr = std::make_unique<RuntimeFilterMgr>(true);

    _timeout_second = query_options.execution_timeout;

    bool is_query_type_valid = query_options.query_type == TQueryType::SELECT ||
                               query_options.query_type == TQueryType::LOAD ||
                               query_options.query_type == TQueryType::EXTERNAL;
    DCHECK_EQ(is_query_type_valid, true);

    this->coord_addr = coord_addr;
    // current_connect_fe is used for report query statistics
    this->current_connect_fe = current_connect_fe;
    // external query has no current_connect_fe
    if (query_options.query_type != TQueryType::EXTERNAL) {
        bool is_report_fe_addr_valid =
                !this->current_connect_fe.hostname.empty() && this->current_connect_fe.port != 0;
        DCHECK_EQ(is_report_fe_addr_valid, true);
    }
    clock_gettime(CLOCK_MONOTONIC, &this->_query_arrival_timestamp);
    DorisMetrics::instance()->query_ctx_cnt->increment(1);
}

void QueryContext::_init_query_mem_tracker() {
    bool has_query_mem_limit = _query_options.__isset.mem_limit && (_query_options.mem_limit > 0);
    int64_t bytes_limit = has_query_mem_limit ? _query_options.mem_limit : -1;
    if (bytes_limit > MemInfo::mem_limit() || bytes_limit == -1) {
        VLOG_NOTICE << "Query memory limit " << PrettyPrinter::print(bytes_limit, TUnit::BYTES)
                    << " exceeds process memory limit of "
                    << PrettyPrinter::print(MemInfo::mem_limit(), TUnit::BYTES)
                    << " OR is -1. Using process memory limit instead.";
        bytes_limit = MemInfo::mem_limit();
    }
    // If the query is a pure load task(streamload, routine load, group commit), then it should not use
    // memlimit per query to limit their memory usage.
    if (is_pure_load_task()) {
        bytes_limit = MemInfo::mem_limit();
    }
    std::shared_ptr<MemTrackerLimiter> query_mem_tracker;
    if (_query_options.query_type == TQueryType::SELECT) {
        query_mem_tracker = MemTrackerLimiter::create_shared(
                MemTrackerLimiter::Type::QUERY, fmt::format("Query#Id={}", print_id(_query_id)),
                bytes_limit);
    } else if (_query_options.query_type == TQueryType::LOAD) {
        query_mem_tracker = MemTrackerLimiter::create_shared(
                MemTrackerLimiter::Type::LOAD, fmt::format("Load#Id={}", print_id(_query_id)),
                bytes_limit);
    } else if (_query_options.query_type == TQueryType::EXTERNAL) { // spark/flink/etc..
        query_mem_tracker = MemTrackerLimiter::create_shared(
                MemTrackerLimiter::Type::QUERY, fmt::format("External#Id={}", print_id(_query_id)),
                bytes_limit);
    } else {
        LOG(FATAL) << "__builtin_unreachable";
        __builtin_unreachable();
    }
    if (_query_options.__isset.is_report_success && _query_options.is_report_success) {
        query_mem_tracker->enable_print_log_usage();
    }

    // If enable reserve memory, not enable check limit, because reserve memory will check it.
    // If reserve enabled, even if the reserved memory size is smaller than the actual requested memory,
    // and the query memory consumption is larger than the limit, we do not expect the query to fail
    // after `check_limit` returns an error, but to run as long as possible,
    // and will enter the paused state and try to spill when the query reserves next time.
    // If the workload group or process runs out of memory, it will be forced to cancel.
    query_mem_tracker->set_enable_check_limit(!(_query_options.__isset.enable_reserve_memory &&
                                                _query_options.enable_reserve_memory));
    _resource_ctx->memory_context()->set_mem_tracker(query_mem_tracker);
}

void QueryContext::_init_resource_context() {
    _resource_ctx = ResourceContext::create_shared();
    _init_query_mem_tracker();
}

void QueryContext::init_query_task_controller() {
    _resource_ctx->set_task_controller(QueryTaskController::create(this));
    _resource_ctx->task_controller()->set_task_id(_query_id);
    _resource_ctx->task_controller()->set_fe_addr(current_connect_fe);
    _resource_ctx->task_controller()->set_query_type(_query_options.query_type);
#ifndef BE_TEST
    _exec_env->runtime_query_statistics_mgr()->register_resource_context(print_id(_query_id),
                                                                         _resource_ctx);
#endif
}

QueryContext::~QueryContext() {
    SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(query_mem_tracker());
    // query mem tracker consumption is equal to 0, it means that after QueryContext is created,
    // it is found that query already exists in _query_ctx_map, and query mem tracker is not used.
    // query mem tracker consumption is not equal to 0 after use, because there is memory consumed
    // on query mem tracker, released on other trackers.
    std::string mem_tracker_msg;
    if (query_mem_tracker()->peak_consumption() != 0) {
        mem_tracker_msg = fmt::format(
                "deregister query/load memory tracker, queryId={}, Limit={}, CurrUsed={}, "
                "PeakUsed={}",
                print_id(_query_id), MemCounter::print_bytes(query_mem_tracker()->limit()),
                MemCounter::print_bytes(query_mem_tracker()->consumption()),
                MemCounter::print_bytes(query_mem_tracker()->peak_consumption()));
    }
    [[maybe_unused]] uint64_t group_id = 0;
    if (workload_group()) {
        group_id = workload_group()->id(); // before remove
    }

    _resource_ctx->task_controller()->finish();

    if (enable_profile()) {
        _report_query_profile();
    }

    // Not release the the thread token in query context's dector method, because the query
    // conext may be dectored in the thread token it self. It is very dangerous and may core.
    // And also thread token need shutdown, it may take some time, may cause the thread that
    // release the token hang, the thread maybe a pipeline task scheduler thread.
    if (_thread_token) {
        Status submit_st = ExecEnv::GetInstance()->lazy_release_obj_pool()->submit(
                DelayReleaseToken::create_shared(std::move(_thread_token)));
        if (!submit_st.ok()) {
            LOG(WARNING) << "Failed to release query context thread token, query_id "
                         << print_id(_query_id) << ", error status " << submit_st;
        }
    }
#ifndef BE_TEST
    if (ExecEnv::GetInstance()->pipeline_tracer_context()->enabled()) [[unlikely]] {
        try {
            ExecEnv::GetInstance()->pipeline_tracer_context()->end_query(_query_id, group_id);
        } catch (std::exception& e) {
            LOG(WARNING) << "Dump trace log failed bacause " << e.what();
        }
    }
#endif
    _runtime_filter_mgr.reset();
    _execution_dependency.reset();
    _runtime_predicates.clear();
    file_scan_range_params_map.clear();
    obj_pool.clear();
    _merge_controller_handler.reset();

#ifndef BE_TEST
    _exec_env->spill_stream_mgr()->async_cleanup_query(_query_id);
#endif
    DorisMetrics::instance()->query_ctx_cnt->increment(-1);
    // the only one msg shows query's end. any other msg should append to it if need.
    LOG_INFO("Query {} deconstructed, mem_tracker: {}", print_id(this->_query_id), mem_tracker_msg);
}

void QueryContext::set_ready_to_execute(Status reason) {
    set_execution_dependency_ready();
    _exec_status.update(reason);
}

void QueryContext::set_ready_to_execute_only() {
    set_execution_dependency_ready();
}

void QueryContext::set_execution_dependency_ready() {
    _execution_dependency->set_ready();
}

void QueryContext::set_memory_sufficient(bool sufficient) {
    if (sufficient) {
        {
            _memory_sufficient_dependency->set_ready();
            _resource_ctx->task_controller()->reset_paused_reason();
        }
    } else {
        _memory_sufficient_dependency->block();
        _resource_ctx->task_controller()->add_paused_count();
    }
}

void QueryContext::cancel(Status new_status, int fragment_id) {
    if (!_exec_status.update(new_status)) {
        return;
    }
    // Tasks should be always runnable.
    _execution_dependency->set_always_ready();
    _memory_sufficient_dependency->set_always_ready();
    if ((new_status.is<ErrorCode::MEM_LIMIT_EXCEEDED>() ||
         new_status.is<ErrorCode::MEM_ALLOC_FAILED>()) &&
        _query_options.__isset.dump_heap_profile_when_mem_limit_exceeded &&
        _query_options.dump_heap_profile_when_mem_limit_exceeded) {
        // if query is cancelled because of query mem limit exceeded, dump heap profile
        // at the time of cancellation can get the most accurate memory usage for problem analysis
        auto wg = workload_group();
        auto log_str = fmt::format(
                "Query {} canceled because of memory limit exceeded, dumping memory "
                "detail profiles. wg: {}. {}",
                print_id(_query_id), wg ? wg->debug_string() : "null",
                doris::ProcessProfile::instance()->memory_profile()->process_memory_detail_str());
        LOG_LONG_STRING(INFO, log_str);
        std::string dot = HeapProfiler::instance()->dump_heap_profile_to_dot();
        if (!dot.empty()) {
            dot += "\n-------------------------------------------------------\n";
            dot += "Copy the text after `digraph` in the above output to "
                   "http://www.webgraphviz.com to generate a dot graph.\n"
                   "after start heap profiler, if there is no operation, will print `No nodes "
                   "to "
                   "print`."
                   "If there are many errors: `addr2line: Dwarf Error`,"
                   "or other FAQ, reference doc: "
                   "https://doris.apache.org/community/developer-guide/debug-tool/#4-qa\n";
            auto log_str =
                    fmt::format("Query {}, dump heap profile to dot: {}", print_id(_query_id), dot);
            LOG_LONG_STRING(INFO, log_str);
        }
    }

    set_ready_to_execute(new_status);
    cancel_all_pipeline_context(new_status, fragment_id);
}

void QueryContext::set_load_error_url(std::string error_url) {
    std::lock_guard<std::mutex> lock(_error_url_lock);
    _load_error_url = error_url;
}

std::string QueryContext::get_load_error_url() {
    std::lock_guard<std::mutex> lock(_error_url_lock);
    return _load_error_url;
}

void QueryContext::cancel_all_pipeline_context(const Status& reason, int fragment_id) {
    std::vector<std::weak_ptr<pipeline::PipelineFragmentContext>> ctx_to_cancel;
    {
        std::lock_guard<std::mutex> lock(_pipeline_map_write_lock);
        for (auto& [f_id, f_context] : _fragment_id_to_pipeline_ctx) {
            if (fragment_id == f_id) {
                continue;
            }
            ctx_to_cancel.push_back(f_context);
        }
    }
    for (auto& f_context : ctx_to_cancel) {
        if (auto pipeline_ctx = f_context.lock()) {
            pipeline_ctx->cancel(reason);
        }
    }
}

std::string QueryContext::print_all_pipeline_context() {
    std::vector<std::weak_ptr<pipeline::PipelineFragmentContext>> ctx_to_print;
    fmt::memory_buffer debug_string_buffer;
    size_t i = 0;
    {
        fmt::format_to(debug_string_buffer, "{} pipeline fragment contexts in query {}. \n",
                       _fragment_id_to_pipeline_ctx.size(), print_id(_query_id));

        {
            std::lock_guard<std::mutex> lock(_pipeline_map_write_lock);
            for (auto& [f_id, f_context] : _fragment_id_to_pipeline_ctx) {
                ctx_to_print.push_back(f_context);
            }
        }
        for (auto& f_context : ctx_to_print) {
            if (auto pipeline_ctx = f_context.lock()) {
                auto elapsed = pipeline_ctx->elapsed_time() / 1000000000.0;
                fmt::format_to(debug_string_buffer,
                               "No.{} (elapse_second={}s, fragment_id={}) : {}\n", i, elapsed,
                               pipeline_ctx->get_fragment_id(), pipeline_ctx->debug_string());
                i++;
            }
        }
    }
    return fmt::to_string(debug_string_buffer);
}

void QueryContext::set_pipeline_context(
        const int fragment_id, std::shared_ptr<pipeline::PipelineFragmentContext> pip_ctx) {
    std::lock_guard<std::mutex> lock(_pipeline_map_write_lock);
    _fragment_id_to_pipeline_ctx.insert({fragment_id, pip_ctx});
}

doris::pipeline::TaskScheduler* QueryContext::get_pipe_exec_scheduler() {
    if (workload_group()) {
        if (_task_scheduler) {
            return _task_scheduler;
        }
    }
    return _exec_env->pipeline_task_scheduler();
}

void QueryContext::set_workload_group(WorkloadGroupPtr& wg) {
    _resource_ctx->set_workload_group(wg);
    // Should add query first, then the workload group will not be deleted.
    // see task_group_manager::delete_workload_group_by_ids
    workload_group()->get_query_scheduler(&_task_scheduler, &_scan_task_scheduler,
                                          &_remote_scan_task_scheduler);
}

void QueryContext::add_fragment_profile(
        int fragment_id, const std::vector<std::shared_ptr<TRuntimeProfileTree>>& pipeline_profiles,
        std::shared_ptr<TRuntimeProfileTree> load_channel_profile) {
    if (pipeline_profiles.empty()) {
        std::string msg = fmt::format("Add pipeline profile failed, query {}, fragment {}",
                                      print_id(this->_query_id), fragment_id);
        LOG_ERROR(msg);
        DCHECK(false) << msg;
        return;
    }

#ifndef NDEBUG
    for (const auto& p : pipeline_profiles) {
        DCHECK(p != nullptr) << fmt::format("Add pipeline profile failed, query {}, fragment {}",
                                            print_id(this->_query_id), fragment_id);
    }
#endif

    std::lock_guard<std::mutex> l(_profile_mutex);
    VLOG_ROW << fmt::format(
            "Query add fragment profile, query {}, fragment {}, pipeline profile count {} ",
            print_id(this->_query_id), fragment_id, pipeline_profiles.size());

    _profile_map.insert(std::make_pair(fragment_id, pipeline_profiles));

    if (load_channel_profile != nullptr) {
        _load_channel_profile_map.insert(std::make_pair(fragment_id, load_channel_profile));
    }
}

void QueryContext::_report_query_profile() {
    std::lock_guard<std::mutex> lg(_profile_mutex);

    for (auto& [fragment_id, fragment_profile] : _profile_map) {
        std::shared_ptr<TRuntimeProfileTree> load_channel_profile = nullptr;

        if (_load_channel_profile_map.contains(fragment_id)) {
            load_channel_profile = _load_channel_profile_map[fragment_id];
        }

        ExecEnv::GetInstance()->runtime_query_statistics_mgr()->register_fragment_profile(
                _query_id, this->coord_addr, fragment_id, fragment_profile, load_channel_profile);
    }

    ExecEnv::GetInstance()->runtime_query_statistics_mgr()->trigger_report_profile();
}

std::unordered_map<int, std::vector<std::shared_ptr<TRuntimeProfileTree>>>
QueryContext::_collect_realtime_query_profile() {
    std::unordered_map<int, std::vector<std::shared_ptr<TRuntimeProfileTree>>> res;
    std::lock_guard<std::mutex> lock(_pipeline_map_write_lock);
    for (const auto& [fragment_id, fragment_ctx_wptr] : _fragment_id_to_pipeline_ctx) {
        if (auto fragment_ctx = fragment_ctx_wptr.lock()) {
            if (fragment_ctx == nullptr) {
                std::string msg =
                        fmt::format("PipelineFragmentContext is nullptr, query {} fragment_id: {}",
                                    print_id(_query_id), fragment_id);
                LOG_ERROR(msg);
                DCHECK(false) << msg;
                continue;
            }

            auto profile = fragment_ctx->collect_realtime_profile();

            if (profile.empty()) {
                std::string err_msg = fmt::format(
                        "Get nothing when collecting profile, query {}, fragment_id: {}",
                        print_id(_query_id), fragment_id);
                LOG_ERROR(err_msg);
                DCHECK(false) << err_msg;
                continue;
            }

            res.insert(std::make_pair(fragment_id, profile));
        }
    }

    return res;
}

TReportExecStatusParams QueryContext::get_realtime_exec_status() {
    TReportExecStatusParams exec_status;

    auto realtime_query_profile = _collect_realtime_query_profile();
    std::vector<std::shared_ptr<TRuntimeProfileTree>> load_channel_profiles;

    for (auto load_channel_profile : _load_channel_profile_map) {
        if (load_channel_profile.second != nullptr) {
            load_channel_profiles.push_back(load_channel_profile.second);
        }
    }

    exec_status = RuntimeQueryStatisticsMgr::create_report_exec_status_params(
            this->_query_id, std::move(realtime_query_profile), std::move(load_channel_profiles),
            /*is_done=*/false);

    return exec_status;
}

} // namespace doris

Coverage Report

Created: 2025-04-28 02:04

Line	Count	Source (jump to first uncovered line)
1		// Licensed to the Apache Software Foundation (ASF) under one
2		// or more contributor license agreements. See the NOTICE file
3		// distributed with this work for additional information
4		// regarding copyright ownership. The ASF licenses this file
5		// to you under the Apache License, Version 2.0 (the
6		// "License"); you may not use this file except in compliance
7		// with the License. You may obtain a copy of the License at
8		//
9		// http://www.apache.org/licenses/LICENSE-2.0
10		//
11		// Unless required by applicable law or agreed to in writing,
12		// software distributed under the License is distributed on an
13		// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14		// KIND, either express or implied. See the License for the
15		// specific language governing permissions and limitations
16		// under the License.
17
18		#include "runtime/query_context.h"
19
20		#include <fmt/core.h>
21		#include <gen_cpp/FrontendService_types.h>
22		#include <gen_cpp/RuntimeProfile_types.h>
23		#include <gen_cpp/Types_types.h>
24		#include <glog/logging.h>
25
26		#include <algorithm>
27		#include <exception>
28		#include <memory>
29		#include <mutex>
30		#include <utility>
31		#include <vector>
32
33		#include "common/logging.h"
34		#include "common/status.h"
35		#include "olap/olap_common.h"
36		#include "pipeline/dependency.h"
37		#include "pipeline/pipeline_fragment_context.h"
38		#include "runtime/exec_env.h"
39		#include "runtime/fragment_mgr.h"
40		#include "runtime/memory/heap_profiler.h"
41		#include "runtime/runtime_query_statistics_mgr.h"
42		#include "runtime/runtime_state.h"
43		#include "runtime/thread_context.h"
44		#include "runtime/workload_group/workload_group_manager.h"
45		#include "runtime/workload_management/query_task_controller.h"
46		#include "runtime_filter/runtime_filter_definitions.h"
47		#include "util/mem_info.h"
48		#include "util/uid_util.h"
49		#include "vec/spill/spill_stream_manager.h"
50
51		namespace doris {
52
53		class DelayReleaseToken : public Runnable {
54		ENABLE_FACTORY_CREATOR(DelayReleaseToken);
55
56		public:
57	0	DelayReleaseToken(std::unique_ptr<ThreadPoolToken>&& token) { token_ = std::move(token); }
58	0	~DelayReleaseToken() override = default;
59	0	void run() override {}
60		std::unique_ptr<ThreadPoolToken> token_;
61		};
62
63	0	const std::string toString(QuerySource queryType) {
64	0	switch (queryType) {
65	0	case QuerySource::INTERNAL_FRONTEND:
66	0	return "INTERNAL_FRONTEND";
67	0	case QuerySource::STREAM_LOAD:
68	0	return "STREAM_LOAD";
69	0	case QuerySource::GROUP_COMMIT_LOAD:
70	0	return "EXTERNAL_QUERY";
71	0	case QuerySource::ROUTINE_LOAD:
72	0	return "ROUTINE_LOAD";
73	0	case QuerySource::EXTERNAL_CONNECTOR:
74	0	return "EXTERNAL_CONNECTOR";
75	0	default:
76	0	return "UNKNOWN";
77	0	}
78	0	}
79
80		std::shared_ptr<QueryContext> QueryContext::create(TUniqueId query_id, ExecEnv* exec_env,
81		const TQueryOptions& query_options,
82		TNetworkAddress coord_addr, bool is_nereids,
83		TNetworkAddress current_connect_fe,
84	100	QuerySource query_type) {
85	100	auto ctx = QueryContext::create_shared(query_id, exec_env, query_options, coord_addr,
86	100	is_nereids, current_connect_fe, query_type);
87	100	ctx->init_query_task_controller();
88	100	return ctx;
89	100	}
90
91		QueryContext::QueryContext(TUniqueId query_id, ExecEnv* exec_env,
92		const TQueryOptions& query_options, TNetworkAddress coord_addr,
93		bool is_nereids, TNetworkAddress current_connect_fe,
94		QuerySource query_source)
95		: _timeout_second(-1),
96		_query_id(std::move(query_id)),
97		_exec_env(exec_env),
98		_is_nereids(is_nereids),
99		_query_options(query_options),
100	78.6k	_query_source(query_source) {
101	78.6k	_init_resource_context();
102	78.6k	SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(query_mem_tracker());
103	78.6k	_query_watcher.start();
104	78.6k	_execution_dependency =
105	78.6k	pipeline::Dependency::create_unique(-1, -1, "ExecutionDependency", false);
106	78.6k	_memory_sufficient_dependency =
107	78.6k	pipeline::Dependency::create_unique(-1, -1, "MemorySufficientDependency", true);
108
109	78.6k	_runtime_filter_mgr = std::make_unique<RuntimeFilterMgr>(true);
110
111	78.6k	_timeout_second = query_options.execution_timeout;
112
113	78.6k	bool is_query_type_valid = query_options.query_type == TQueryType::SELECT \|\|
114	78.6k	query_options.query_type == TQueryType::LOAD \|\|
115	78.6k	query_options.query_type == TQueryType::EXTERNAL;
116	78.6k	DCHECK_EQ(is_query_type_valid, true);
117
118	78.6k	this->coord_addr = coord_addr;
119		// current_connect_fe is used for report query statistics
120	78.6k	this->current_connect_fe = current_connect_fe;
121		// external query has no current_connect_fe
122	78.6k	if (query_options.query_type != TQueryType::EXTERNAL) {
123	189	bool is_report_fe_addr_valid =
124	189	!this->current_connect_fe.hostname.empty() && this->current_connect_fe.port != 0;
125	189	DCHECK_EQ(is_report_fe_addr_valid, true);
126	189	}
127	78.6k	clock_gettime(CLOCK_MONOTONIC, &this->_query_arrival_timestamp);
128	78.6k	DorisMetrics::instance()->query_ctx_cnt->increment(1);
129	78.6k	}
130
131	78.6k	void QueryContext::_init_query_mem_tracker() {
132	78.6k	bool has_query_mem_limit = _query_options.__isset.mem_limit && (_query_options.mem_limit > 0);
133	78.6k	int64_t bytes_limit = has_query_mem_limit ? _query_options.mem_limit : -1;
134	78.6k	if (bytes_limit > MemInfo::mem_limit() \|\| bytes_limit == -1) {
135	0	VLOG_NOTICE << "Query memory limit " << PrettyPrinter::print(bytes_limit, TUnit::BYTES)
136	0	<< " exceeds process memory limit of "
137	0	<< PrettyPrinter::print(MemInfo::mem_limit(), TUnit::BYTES)
138	0	<< " OR is -1. Using process memory limit instead.";
139	0	bytes_limit = MemInfo::mem_limit();
140	0	}
141		// If the query is a pure load task(streamload, routine load, group commit), then it should not use
142		// memlimit per query to limit their memory usage.
143	78.6k	if (is_pure_load_task()) {
144	78.4k	bytes_limit = MemInfo::mem_limit();
145	78.4k	}
146	78.6k	std::shared_ptr<MemTrackerLimiter> query_mem_tracker;
147	78.6k	if (_query_options.query_type == TQueryType::SELECT) {
148	186	query_mem_tracker = MemTrackerLimiter::create_shared(
149	186	MemTrackerLimiter::Type::QUERY, fmt::format("Query#Id={}", print_id(_query_id)),
150	186	bytes_limit);
151	78.4k	} else if (_query_options.query_type == TQueryType::LOAD) {
152	3	query_mem_tracker = MemTrackerLimiter::create_shared(
153	3	MemTrackerLimiter::Type::LOAD, fmt::format("Load#Id={}", print_id(_query_id)),
154	3	bytes_limit);
155	78.4k	} else if (_query_options.query_type == TQueryType::EXTERNAL) { // spark/flink/etc..
156	78.4k	query_mem_tracker = MemTrackerLimiter::create_shared(
157	78.4k	MemTrackerLimiter::Type::QUERY, fmt::format("External#Id={}", print_id(_query_id)),
158	78.4k	bytes_limit);
159	78.4k	} else {
160	0	LOG(FATAL) << "__builtin_unreachable";
161	0	__builtin_unreachable();
162	0	}
163	78.6k	if (_query_options.__isset.is_report_success && _query_options.is_report_success) {
164	0	query_mem_tracker->enable_print_log_usage();
165	0	}
166
167		// If enable reserve memory, not enable check limit, because reserve memory will check it.
168		// If reserve enabled, even if the reserved memory size is smaller than the actual requested memory,
169		// and the query memory consumption is larger than the limit, we do not expect the query to fail
170		// after `check_limit` returns an error, but to run as long as possible,
171		// and will enter the paused state and try to spill when the query reserves next time.
172		// If the workload group or process runs out of memory, it will be forced to cancel.
173	78.6k	query_mem_tracker->set_enable_check_limit(!(_query_options.__isset.enable_reserve_memory &&
174	78.6k	_query_options.enable_reserve_memory));
175	78.6k	_resource_ctx->memory_context()->set_mem_tracker(query_mem_tracker);
176	78.6k	}
177
178	78.6k	void QueryContext::_init_resource_context() {
179	78.6k	_resource_ctx = ResourceContext::create_shared();
180	78.6k	_init_query_mem_tracker();
181	78.6k	}
182
183	78.5k	void QueryContext::init_query_task_controller() {
184	78.5k	_resource_ctx->set_task_controller(QueryTaskController::create(this));
185	78.5k	_resource_ctx->task_controller()->set_task_id(_query_id);
186	78.5k	_resource_ctx->task_controller()->set_fe_addr(current_connect_fe);
187	78.5k	_resource_ctx->task_controller()->set_query_type(_query_options.query_type);
188		#ifndef BE_TEST
189		_exec_env->runtime_query_statistics_mgr()->register_resource_context(print_id(_query_id),
190		_resource_ctx);
191		#endif
192	78.5k	}
193
194	78.6k	QueryContext::~QueryContext() {
195	78.6k	SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(query_mem_tracker());
196		// query mem tracker consumption is equal to 0, it means that after QueryContext is created,
197		// it is found that query already exists in _query_ctx_map, and query mem tracker is not used.
198		// query mem tracker consumption is not equal to 0 after use, because there is memory consumed
199		// on query mem tracker, released on other trackers.
200	78.6k	std::string mem_tracker_msg;
201	78.6k	if (query_mem_tracker()->peak_consumption() != 0) {
202	30	mem_tracker_msg = fmt::format(
203	30	"deregister query/load memory tracker, queryId={}, Limit={}, CurrUsed={}, "
204	30	"PeakUsed={}",
205	30	print_id(_query_id), MemCounter::print_bytes(query_mem_tracker()->limit()),
206	30	MemCounter::print_bytes(query_mem_tracker()->consumption()),
207	30	MemCounter::print_bytes(query_mem_tracker()->peak_consumption()));
208	30	}
209	78.6k	[[maybe_unused]] uint64_t group_id = 0;
210	78.6k	if (workload_group()) {
211	22	group_id = workload_group()->id(); // before remove
212	22	}
213
214	78.6k	_resource_ctx->task_controller()->finish();
215
216	78.6k	if (enable_profile()) {
217	0	_report_query_profile();
218	0	}
219
220		// Not release the the thread token in query context's dector method, because the query
221		// conext may be dectored in the thread token it self. It is very dangerous and may core.
222		// And also thread token need shutdown, it may take some time, may cause the thread that
223		// release the token hang, the thread maybe a pipeline task scheduler thread.
224	78.6k	if (_thread_token) {
225	0	Status submit_st = ExecEnv::GetInstance()->lazy_release_obj_pool()->submit(
226	0	DelayReleaseToken::create_shared(std::move(_thread_token)));
227	0	if (!submit_st.ok()) {
228	0	LOG(WARNING) << "Failed to release query context thread token, query_id "
229	0	<< print_id(_query_id) << ", error status " << submit_st;
230	0	}
231	0	}
232		#ifndef BE_TEST
233		if (ExecEnv::GetInstance()->pipeline_tracer_context()->enabled()) [[unlikely]] {
234		try {
235		ExecEnv::GetInstance()->pipeline_tracer_context()->end_query(_query_id, group_id);
236		} catch (std::exception& e) {
237		LOG(WARNING) << "Dump trace log failed bacause " << e.what();
238		}
239		}
240		#endif
241	78.6k	_runtime_filter_mgr.reset();
242	78.6k	_execution_dependency.reset();
243	78.6k	_runtime_predicates.clear();
244	78.6k	file_scan_range_params_map.clear();
245	78.6k	obj_pool.clear();
246	78.6k	_merge_controller_handler.reset();
247
248		#ifndef BE_TEST
249		_exec_env->spill_stream_mgr()->async_cleanup_query(_query_id);
250		#endif
251	78.6k	DorisMetrics::instance()->query_ctx_cnt->increment(-1);
252		// the only one msg shows query's end. any other msg should append to it if need.
253	78.6k	LOG_INFO("Query {} deconstructed, mem_tracker: {}", print_id(this->_query_id), mem_tracker_msg);
254	78.6k	}
255
256	18	void QueryContext::set_ready_to_execute(Status reason) {
257	18	set_execution_dependency_ready();
258	18	_exec_status.update(reason);
259	18	}
260
261	0	void QueryContext::set_ready_to_execute_only() {
262	0	set_execution_dependency_ready();
263	0	}
264
265	18	void QueryContext::set_execution_dependency_ready() {
266	18	_execution_dependency->set_ready();
267	18	}
268
269	18	void QueryContext::set_memory_sufficient(bool sufficient) {
270	18	if (sufficient) {
271	8	{
272	8	_memory_sufficient_dependency->set_ready();
273	8	_resource_ctx->task_controller()->reset_paused_reason();
274	8	}
275	10	} else {
276	10	_memory_sufficient_dependency->block();
277	10	_resource_ctx->task_controller()->add_paused_count();
278	10	}
279	18	}
280
281	18	void QueryContext::cancel(Status new_status, int fragment_id) {
282	18	if (!_exec_status.update(new_status)) {
283	0	return;
284	0	}
285		// Tasks should be always runnable.
286	18	_execution_dependency->set_always_ready();
287	18	_memory_sufficient_dependency->set_always_ready();
288	18	if ((new_status.is<ErrorCode::MEM_LIMIT_EXCEEDED>() \|\|
289	18	new_status.is<ErrorCode::MEM_ALLOC_FAILED>()) &&
290	18	_query_options.__isset.dump_heap_profile_when_mem_limit_exceeded &&
291	18	_query_options.dump_heap_profile_when_mem_limit_exceeded) {
292		// if query is cancelled because of query mem limit exceeded, dump heap profile
293		// at the time of cancellation can get the most accurate memory usage for problem analysis
294	0	auto wg = workload_group();
295	0	auto log_str = fmt::format(
296	0	"Query {} canceled because of memory limit exceeded, dumping memory "
297	0	"detail profiles. wg: {}. {}",
298	0	print_id(_query_id), wg ? wg->debug_string() : "null",
299	0	doris::ProcessProfile::instance()->memory_profile()->process_memory_detail_str());
300	0	LOG_LONG_STRING(INFO, log_str);
301	0	std::string dot = HeapProfiler::instance()->dump_heap_profile_to_dot();
302	0	if (!dot.empty()) {
303	0	dot += "\n-------------------------------------------------------\n";
304	0	dot += "Copy the text after `digraph` in the above output to "
305	0	"http://www.webgraphviz.com to generate a dot graph.\n"
306	0	"after start heap profiler, if there is no operation, will print `No nodes "
307	0	"to "
308	0	"print`."
309	0	"If there are many errors: `addr2line: Dwarf Error`,"
310	0	"or other FAQ, reference doc: "
311	0	"https://doris.apache.org/community/developer-guide/debug-tool/#4-qa\n";
312	0	auto log_str =
313	0	fmt::format("Query {}, dump heap profile to dot: {}", print_id(_query_id), dot);
314	0	LOG_LONG_STRING(INFO, log_str);
315	0	}
316	0	}
317
318	18	set_ready_to_execute(new_status);
319	18	cancel_all_pipeline_context(new_status, fragment_id);
320	18	}
321
322	0	void QueryContext::set_load_error_url(std::string error_url) {
323	0	std::lock_guard<std::mutex> lock(_error_url_lock);
324	0	_load_error_url = error_url;
325	0	}
326
327	0	std::string QueryContext::get_load_error_url() {
328	0	std::lock_guard<std::mutex> lock(_error_url_lock);
329	0	return _load_error_url;
330	0	}
331
332	18	void QueryContext::cancel_all_pipeline_context(const Status& reason, int fragment_id) {
333	18	std::vector<std::weak_ptr<pipeline::PipelineFragmentContext>> ctx_to_cancel;
334	18	{
335	18	std::lock_guard<std::mutex> lock(_pipeline_map_write_lock);
336	18	for (auto& [f_id, f_context] : _fragment_id_to_pipeline_ctx) {
337	0	if (fragment_id == f_id) {
338	0	continue;
339	0	}
340	0	ctx_to_cancel.push_back(f_context);
341	0	}
342	18	}
343	18	for (auto& f_context : ctx_to_cancel) {
344	0	if (auto pipeline_ctx = f_context.lock()) {
345	0	pipeline_ctx->cancel(reason);
346	0	}
347	0	}
348	18	}
349
350	0	std::string QueryContext::print_all_pipeline_context() {
351	0	std::vector<std::weak_ptr<pipeline::PipelineFragmentContext>> ctx_to_print;
352	0	fmt::memory_buffer debug_string_buffer;
353	0	size_t i = 0;
354	0	{
355	0	fmt::format_to(debug_string_buffer, "{} pipeline fragment contexts in query {}. \n",
356	0	_fragment_id_to_pipeline_ctx.size(), print_id(_query_id));
357
358	0	{
359	0	std::lock_guard<std::mutex> lock(_pipeline_map_write_lock);
360	0	for (auto& [f_id, f_context] : _fragment_id_to_pipeline_ctx) {
361	0	ctx_to_print.push_back(f_context);
362	0	}
363	0	}
364	0	for (auto& f_context : ctx_to_print) {
365	0	if (auto pipeline_ctx = f_context.lock()) {
366	0	auto elapsed = pipeline_ctx->elapsed_time() / 1000000000.0;
367	0	fmt::format_to(debug_string_buffer,
368	0	"No.{} (elapse_second={}s, fragment_id={}) : {}\n", i, elapsed,
369	0	pipeline_ctx->get_fragment_id(), pipeline_ctx->debug_string());
370	0	i++;
371	0	}
372	0	}
373	0	}
374	0	return fmt::to_string(debug_string_buffer);
375	0	}
376
377		void QueryContext::set_pipeline_context(
378	0	const int fragment_id, std::shared_ptr<pipeline::PipelineFragmentContext> pip_ctx) {
379	0	std::lock_guard<std::mutex> lock(_pipeline_map_write_lock);
380	0	_fragment_id_to_pipeline_ctx.insert({fragment_id, pip_ctx});
381	0	}
382
383	0	doris::pipeline::TaskScheduler* QueryContext::get_pipe_exec_scheduler() {
384	0	if (workload_group()) {
385	0	if (_task_scheduler) {
386	0	return _task_scheduler;
387	0	}
388	0	}
389	0	return _exec_env->pipeline_task_scheduler();
390	0	}
391
392	22	void QueryContext::set_workload_group(WorkloadGroupPtr& wg) {
393	22	_resource_ctx->set_workload_group(wg);
394		// Should add query first, then the workload group will not be deleted.
395		// see task_group_manager::delete_workload_group_by_ids
396	22	workload_group()->get_query_scheduler(&_task_scheduler, &_scan_task_scheduler,
397	22	&_remote_scan_task_scheduler);
398	22	}
399
400		void QueryContext::add_fragment_profile(
401		int fragment_id, const std::vector<std::shared_ptr<TRuntimeProfileTree>>& pipeline_profiles,
402	0	std::shared_ptr<TRuntimeProfileTree> load_channel_profile) {
403	0	if (pipeline_profiles.empty()) {
404	0	std::string msg = fmt::format("Add pipeline profile failed, query {}, fragment {}",
405	0	print_id(this->_query_id), fragment_id);
406	0	LOG_ERROR(msg);
407	0	DCHECK(false) << msg;
408	0	return;
409	0	}
410
411	0	#ifndef NDEBUG
412	0	for (const auto& p : pipeline_profiles) {
413	0	DCHECK(p != nullptr) << fmt::format("Add pipeline profile failed, query {}, fragment {}",
414	0	print_id(this->_query_id), fragment_id);
415	0	}
416	0	#endif
417
418	0	std::lock_guard<std::mutex> l(_profile_mutex);
419	0	VLOG_ROW << fmt::format(
420	0	"Query add fragment profile, query {}, fragment {}, pipeline profile count {} ",
421	0	print_id(this->_query_id), fragment_id, pipeline_profiles.size());
422
423	0	_profile_map.insert(std::make_pair(fragment_id, pipeline_profiles));
424
425	0	if (load_channel_profile != nullptr) {
426	0	_load_channel_profile_map.insert(std::make_pair(fragment_id, load_channel_profile));
427	0	}
428	0	}
429
430	0	void QueryContext::_report_query_profile() {
431	0	std::lock_guard<std::mutex> lg(_profile_mutex);
432
433	0	for (auto& [fragment_id, fragment_profile] : _profile_map) {
434	0	std::shared_ptr<TRuntimeProfileTree> load_channel_profile = nullptr;
435
436	0	if (_load_channel_profile_map.contains(fragment_id)) {
437	0	load_channel_profile = _load_channel_profile_map[fragment_id];
438	0	}
439
440	0	ExecEnv::GetInstance()->runtime_query_statistics_mgr()->register_fragment_profile(
441	0	_query_id, this->coord_addr, fragment_id, fragment_profile, load_channel_profile);
442	0	}
443
444	0	ExecEnv::GetInstance()->runtime_query_statistics_mgr()->trigger_report_profile();
445	0	}
446
447		std::unordered_map<int, std::vector<std::shared_ptr<TRuntimeProfileTree>>>
448	0	QueryContext::_collect_realtime_query_profile() {
449	0	std::unordered_map<int, std::vector<std::shared_ptr<TRuntimeProfileTree>>> res;
450	0	std::lock_guard<std::mutex> lock(_pipeline_map_write_lock);
451	0	for (const auto& [fragment_id, fragment_ctx_wptr] : _fragment_id_to_pipeline_ctx) {
452	0	if (auto fragment_ctx = fragment_ctx_wptr.lock()) {
453	0	if (fragment_ctx == nullptr) {
454	0	std::string msg =
455	0	fmt::format("PipelineFragmentContext is nullptr, query {} fragment_id: {}",
456	0	print_id(_query_id), fragment_id);
457	0	LOG_ERROR(msg);
458	0	DCHECK(false) << msg;
459	0	continue;
460	0	}
461
462	0	auto profile = fragment_ctx->collect_realtime_profile();
463
464	0	if (profile.empty()) {
465	0	std::string err_msg = fmt::format(
466	0	"Get nothing when collecting profile, query {}, fragment_id: {}",
467	0	print_id(_query_id), fragment_id);
468	0	LOG_ERROR(err_msg);
469	0	DCHECK(false) << err_msg;
470	0	continue;
471	0	}
472
473	0	res.insert(std::make_pair(fragment_id, profile));
474	0	}
475	0	}
476
477	0	return res;
478	0	}
479
480	0	TReportExecStatusParams QueryContext::get_realtime_exec_status() {
481	0	TReportExecStatusParams exec_status;
482
483	0	auto realtime_query_profile = _collect_realtime_query_profile();
484	0	std::vector<std::shared_ptr<TRuntimeProfileTree>> load_channel_profiles;
485
486	0	for (auto load_channel_profile : _load_channel_profile_map) {
487	0	if (load_channel_profile.second != nullptr) {
488	0	load_channel_profiles.push_back(load_channel_profile.second);
489	0	}
490	0	}
491
492	0	exec_status = RuntimeQueryStatisticsMgr::create_report_exec_status_params(
493	0	this->_query_id, std::move(realtime_query_profile), std::move(load_channel_profiles),
494	0	/is_done=/false);
495
496	0	return exec_status;
497	0	}
498
499		} // namespace doris