/root/doris/be/src/runtime/load_channel.cpp

Source (jump to first uncovered line)
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.

#include "runtime/load_channel.h"

#include <gen_cpp/internal_service.pb.h>
#include <glog/logging.h>

#include "bvar/bvar.h"
#include "olap/storage_engine.h"
#include "runtime/exec_env.h"
#include "runtime/fragment_mgr.h"
#include "runtime/memory/mem_tracker.h"
#include "runtime/tablets_channel.h"
#include "runtime/thread_context.h"
#include "runtime/workload_group/workload_group_manager.h"

namespace doris {

bvar::Adder<int64_t> g_loadchannel_cnt("loadchannel_cnt");

LoadChannel::LoadChannel(const UniqueId& load_id, int64_t timeout_s, bool is_high_priority,
                         std::string sender_ip, int64_t backend_id, bool enable_profile)
        : _load_id(load_id),
          _timeout_s(timeout_s),
          _is_high_priority(is_high_priority),
          _sender_ip(std::move(sender_ip)),
          _backend_id(backend_id),
          _enable_profile(enable_profile) {
    std::shared_ptr<QueryContext> query_context =
            ExecEnv::GetInstance()->fragment_mgr()->get_query_context(_load_id.to_thrift());
    if (query_context != nullptr) {
        _query_thread_context = {_load_id.to_thrift(), query_context->query_mem_tracker,
                                 query_context->workload_group()};
    } else {
        _query_thread_context = {
                _load_id.to_thrift(),
                MemTrackerLimiter::create_shared(
                        MemTrackerLimiter::Type::LOAD,
                        fmt::format("(FromLoadChannel)Load#Id={}", _load_id.to_string()))};
    }
    g_loadchannel_cnt << 1;
    // _last_updated_time should be set before being inserted to
    // _load_channels in load_channel_mgr, or it may be erased
    // immediately by gc thread.
    _last_updated_time.store(time(nullptr));
    _init_profile();
}

LoadChannel::~LoadChannel() {
    g_loadchannel_cnt << -1;
    std::stringstream rows_str;
    for (const auto& entry : _tablets_channels_rows) {
        rows_str << ", index id: " << entry.first << ", total_received_rows: " << entry.second.first
                 << ", num_rows_filtered: " << entry.second.second;
    }
    LOG(INFO) << "load channel removed"
              << " load_id=" << _load_id << ", is high priority=" << _is_high_priority
              << ", sender_ip=" << _sender_ip << rows_str.str();
}

void LoadChannel::_init_profile() {
    _profile = std::make_unique<RuntimeProfile>("LoadChannels");
    _mgr_add_batch_timer = ADD_TIMER(_profile, "LoadChannelMgrAddBatchTime");
    _handle_mem_limit_timer = ADD_TIMER(_profile, "HandleMemLimitTime");
    _self_profile =
            _profile->create_child(fmt::format("LoadChannel load_id={} (host={}, backend_id={})",
                                               _load_id.to_string(), _sender_ip, _backend_id),
                                   true, true);
    _add_batch_number_counter = ADD_COUNTER(_self_profile, "NumberBatchAdded", TUnit::UNIT);
    _peak_memory_usage_counter = ADD_COUNTER(_self_profile, "PeakMemoryUsage", TUnit::BYTES);
    _add_batch_timer = ADD_TIMER(_self_profile, "AddBatchTime");
    _handle_eos_timer = ADD_CHILD_TIMER(_self_profile, "HandleEosTime", "AddBatchTime");
    _add_batch_times = ADD_COUNTER(_self_profile, "AddBatchTimes", TUnit::UNIT);
}

Status LoadChannel::open(const PTabletWriterOpenRequest& params) {
    SCOPED_ATTACH_TASK(_query_thread_context);
    int64_t index_id = params.index_id();
    std::shared_ptr<BaseTabletsChannel> channel;
    {
        std::lock_guard<std::mutex> l(_lock);
        auto it = _tablets_channels.find(index_id);
        if (it != _tablets_channels.end()) {
            channel = it->second;
        } else {
            // just for VLOG
            if (_txn_id == 0) [[unlikely]] {
                _txn_id = params.txn_id();
            }
            // create a new tablets channel
            TabletsChannelKey key(params.id(), index_id);
            // TODO(plat1ko): CloudTabletsChannel
            channel = std::make_shared<TabletsChannel>(*StorageEngine::instance(), key, _load_id,
                                                       _is_high_priority, _self_profile);
            {
                std::lock_guard<std::mutex> l(_tablets_channels_lock);
                _tablets_channels.insert({index_id, channel});
            }
        }
    }

    if (params.is_incremental()) {
        // incremental open would ensure not to open tablet repeatedly
        RETURN_IF_ERROR(channel->incremental_open(params));
    } else {
        RETURN_IF_ERROR(channel->open(params));
    }

    _opened = true;
    _last_updated_time.store(time(nullptr));
    return Status::OK();
}

Status LoadChannel::_get_tablets_channel(std::shared_ptr<BaseTabletsChannel>& channel,
                                         bool& is_finished, const int64_t index_id) {
    std::lock_guard<std::mutex> l(_lock);
    auto it = _tablets_channels.find(index_id);
    if (it == _tablets_channels.end()) {
        if (_finished_channel_ids.find(index_id) != _finished_channel_ids.end()) {
            // this channel is already finished, just return OK
            is_finished = true;
            return Status::OK();
        }
        std::stringstream ss;
        ss << "load channel " << _load_id << " add batch with unknown index id: " << index_id;
        return Status::InternalError(ss.str());
    }

    is_finished = false;
    channel = it->second;
    return Status::OK();
}

Status LoadChannel::add_batch(const PTabletWriterAddBlockRequest& request,
                              PTabletWriterAddBlockResult* response) {
    SCOPED_TIMER(_add_batch_timer);
    COUNTER_UPDATE(_add_batch_times, 1);
    SCOPED_ATTACH_TASK(_query_thread_context);
    int64_t index_id = request.index_id();
    // 1. get tablets channel
    std::shared_ptr<BaseTabletsChannel> channel;
    bool is_finished = false;
    Status st = _get_tablets_channel(channel, is_finished, index_id);
    if (!st.ok() || is_finished) {
        return st;
    }

    // 2. add block to tablets channel
    if (request.has_block()) {
        RETURN_IF_ERROR(channel->add_batch(request, response));
        _add_batch_number_counter->update(1);
    }

    // 3. handle eos
    // if channel is incremental, maybe hang on close until all close request arrived.
    if (request.has_eos() && request.eos()) {
        st = _handle_eos(channel.get(), request, response);
        _report_profile(response);
        if (!st.ok()) {
            return st;
        }
    } else if (_add_batch_number_counter->value() % 100 == 1) {
        _report_profile(response);
    }
    _last_updated_time.store(time(nullptr));
    return st;
}

Status LoadChannel::_handle_eos(BaseTabletsChannel* channel,
                                const PTabletWriterAddBlockRequest& request,
                                PTabletWriterAddBlockResult* response) {
    _self_profile->add_info_string("EosHost", fmt::format("{}", request.backend_id()));
    bool finished = false;
    auto index_id = request.index_id();

    RETURN_IF_ERROR(channel->close(this, request, response, &finished));

    // for init node, we close waiting(hang on) all close request and let them return together.
    if (request.has_hang_wait() && request.hang_wait()) {
        DCHECK(!channel->is_incremental_channel());
        VLOG_DEBUG << fmt::format("txn {}: reciever index {} close waiting by sender {}", _txn_id,
                                  request.index_id(), request.sender_id());
        int count = 0;
        while (!channel->is_finished()) {
            bthread_usleep(1000);
            count++;
        }
        // now maybe finished or cancelled.
        VLOG_TRACE << "reciever close wait finished!" << request.sender_id();
        if (count >= 1000 * _timeout_s) { // maybe config::streaming_load_rpc_max_alive_time_sec
            return Status::InternalError("Tablets channel didn't wait all close");
        }
    }

    if (finished) {
        std::lock_guard<std::mutex> l(_lock);
        {
            std::lock_guard<std::mutex> l(_tablets_channels_lock);
            _tablets_channels_rows.insert(std::make_pair(
                    index_id,
                    std::make_pair(channel->total_received_rows(), channel->num_rows_filtered())));
            _tablets_channels.erase(index_id);
        }
        LOG(INFO) << "txn " << _txn_id << " closed tablets_channel " << index_id;
        _finished_channel_ids.emplace(index_id);
    }
    return Status::OK();
}

void LoadChannel::_report_profile(PTabletWriterAddBlockResult* response) {
    if (!_enable_profile) {
        return;
    }

    // TabletSink and LoadChannel in BE are M: N relationship,
    // Every once in a while LoadChannel will randomly return its own runtime profile to a TabletSink,
    // so usually all LoadChannel runtime profiles are saved on each TabletSink,
    // and the timeliness of the same LoadChannel profile saved on different TabletSinks is different,
    // and each TabletSink will periodically send fe reports all the LoadChannel profiles saved by itself,
    // and ensures to update the latest LoadChannel profile according to the timestamp.
    _self_profile->set_timestamp(_last_updated_time);

    {
        std::lock_guard<std::mutex> l(_tablets_channels_lock);
        for (auto& it : _tablets_channels) {
            it.second->refresh_profile();
        }
    }

    TRuntimeProfileTree tprofile;
    ThriftSerializer ser(false, 4096);
    uint8_t* buf = nullptr;
    uint32_t len = 0;
    std::lock_guard<SpinLock> l(_profile_serialize_lock);
    _profile->to_thrift(&tprofile);
    auto st = ser.serialize(&tprofile, &len, &buf);
    if (st.ok()) {
        response->set_load_channel_profile(std::string((const char*)buf, len));
    } else {
        LOG(WARNING) << "load channel TRuntimeProfileTree serialize failed, errmsg=" << st;
    }
}

bool LoadChannel::is_finished() {
    if (!_opened) {
        return false;
    }
    std::lock_guard<std::mutex> l(_lock);
    return _tablets_channels.empty();
}

Status LoadChannel::cancel() {
    std::lock_guard<std::mutex> l(_lock);
    for (auto& it : _tablets_channels) {
        static_cast<void>(it.second->cancel());
    }
    return Status::OK();
}

} // namespace doris

Coverage Report

Created: 2024-11-20 15:53

Line	Count	Source (jump to first uncovered line)
1		// Licensed to the Apache Software Foundation (ASF) under one
2		// or more contributor license agreements. See the NOTICE file
3		// distributed with this work for additional information
4		// regarding copyright ownership. The ASF licenses this file
5		// to you under the Apache License, Version 2.0 (the
6		// "License"); you may not use this file except in compliance
7		// with the License. You may obtain a copy of the License at
8		//
9		// http://www.apache.org/licenses/LICENSE-2.0
10		//
11		// Unless required by applicable law or agreed to in writing,
12		// software distributed under the License is distributed on an
13		// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14		// KIND, either express or implied. See the License for the
15		// specific language governing permissions and limitations
16		// under the License.
17
18		#include "runtime/load_channel.h"
19
20		#include <gen_cpp/internal_service.pb.h>
21		#include <glog/logging.h>
22
23		#include "bvar/bvar.h"
24		#include "olap/storage_engine.h"
25		#include "runtime/exec_env.h"
26		#include "runtime/fragment_mgr.h"
27		#include "runtime/memory/mem_tracker.h"
28		#include "runtime/tablets_channel.h"
29		#include "runtime/thread_context.h"
30		#include "runtime/workload_group/workload_group_manager.h"
31
32		namespace doris {
33
34		bvar::Adder<int64_t> g_loadchannel_cnt("loadchannel_cnt");
35
36		LoadChannel::LoadChannel(const UniqueId& load_id, int64_t timeout_s, bool is_high_priority,
37		std::string sender_ip, int64_t backend_id, bool enable_profile)
38		: _load_id(load_id),
39		_timeout_s(timeout_s),
40		_is_high_priority(is_high_priority),
41		_sender_ip(std::move(sender_ip)),
42		_backend_id(backend_id),
43	0	_enable_profile(enable_profile) {
44	0	std::shared_ptr<QueryContext> query_context =
45	0	ExecEnv::GetInstance()->fragment_mgr()->get_query_context(_load_id.to_thrift());
46	0	if (query_context != nullptr) {
47	0	_query_thread_context = {_load_id.to_thrift(), query_context->query_mem_tracker,
48	0	query_context->workload_group()};
49	0	} else {
50	0	_query_thread_context = {
51	0	_load_id.to_thrift(),
52	0	MemTrackerLimiter::create_shared(
53	0	MemTrackerLimiter::Type::LOAD,
54	0	fmt::format("(FromLoadChannel)Load#Id={}", _load_id.to_string()))};
55	0	}
56	0	g_loadchannel_cnt << 1;
57		// _last_updated_time should be set before being inserted to
58		// _load_channels in load_channel_mgr, or it may be erased
59		// immediately by gc thread.
60	0	_last_updated_time.store(time(nullptr));
61	0	_init_profile();
62	0	}
63
64	0	LoadChannel::~LoadChannel() {
65	0	g_loadchannel_cnt << -1;
66	0	std::stringstream rows_str;
67	0	for (const auto& entry : _tablets_channels_rows) {
68	0	rows_str << ", index id: " << entry.first << ", total_received_rows: " << entry.second.first
69	0	<< ", num_rows_filtered: " << entry.second.second;
70	0	}
71	0	LOG(INFO) << "load channel removed"
72	0	<< " load_id=" << _load_id << ", is high priority=" << _is_high_priority
73	0	<< ", sender_ip=" << _sender_ip << rows_str.str();
74	0	}
75
76	0	void LoadChannel::_init_profile() {
77	0	_profile = std::make_unique<RuntimeProfile>("LoadChannels");
78	0	_mgr_add_batch_timer = ADD_TIMER(_profile, "LoadChannelMgrAddBatchTime");
79	0	_handle_mem_limit_timer = ADD_TIMER(_profile, "HandleMemLimitTime");
80	0	_self_profile =
81	0	_profile->create_child(fmt::format("LoadChannel load_id={} (host={}, backend_id={})",
82	0	_load_id.to_string(), _sender_ip, _backend_id),
83	0	true, true);
84	0	_add_batch_number_counter = ADD_COUNTER(_self_profile, "NumberBatchAdded", TUnit::UNIT);
85	0	_peak_memory_usage_counter = ADD_COUNTER(_self_profile, "PeakMemoryUsage", TUnit::BYTES);
86	0	_add_batch_timer = ADD_TIMER(_self_profile, "AddBatchTime");
87	0	_handle_eos_timer = ADD_CHILD_TIMER(_self_profile, "HandleEosTime", "AddBatchTime");
88	0	_add_batch_times = ADD_COUNTER(_self_profile, "AddBatchTimes", TUnit::UNIT);
89	0	}
90
91	0	Status LoadChannel::open(const PTabletWriterOpenRequest& params) {
92	0	SCOPED_ATTACH_TASK(_query_thread_context);
93	0	int64_t index_id = params.index_id();
94	0	std::shared_ptr<BaseTabletsChannel> channel;
95	0	{
96	0	std::lock_guard<std::mutex> l(_lock);
97	0	auto it = _tablets_channels.find(index_id);
98	0	if (it != _tablets_channels.end()) {
99	0	channel = it->second;
100	0	} else {
101		// just for VLOG
102	0	if (_txn_id == 0) [[unlikely]] {
103	0	_txn_id = params.txn_id();
104	0	}
105		// create a new tablets channel
106	0	TabletsChannelKey key(params.id(), index_id);
107		// TODO(plat1ko): CloudTabletsChannel
108	0	channel = std::make_shared<TabletsChannel>(*StorageEngine::instance(), key, _load_id,
109	0	_is_high_priority, _self_profile);
110	0	{
111	0	std::lock_guard<std::mutex> l(_tablets_channels_lock);
112	0	_tablets_channels.insert({index_id, channel});
113	0	}
114	0	}
115	0	}
116
117	0	if (params.is_incremental()) {
118		// incremental open would ensure not to open tablet repeatedly
119	0	RETURN_IF_ERROR(channel->incremental_open(params));
120	0	} else {
121	0	RETURN_IF_ERROR(channel->open(params));
122	0	}
123
124	0	_opened = true;
125	0	_last_updated_time.store(time(nullptr));
126	0	return Status::OK();
127	0	}
128
129		Status LoadChannel::_get_tablets_channel(std::shared_ptr<BaseTabletsChannel>& channel,
130	0	bool& is_finished, const int64_t index_id) {
131	0	std::lock_guard<std::mutex> l(_lock);
132	0	auto it = _tablets_channels.find(index_id);
133	0	if (it == _tablets_channels.end()) {
134	0	if (_finished_channel_ids.find(index_id) != _finished_channel_ids.end()) {
135		// this channel is already finished, just return OK
136	0	is_finished = true;
137	0	return Status::OK();
138	0	}
139	0	std::stringstream ss;
140	0	ss << "load channel " << _load_id << " add batch with unknown index id: " << index_id;
141	0	return Status::InternalError(ss.str());
142	0	}
143
144	0	is_finished = false;
145	0	channel = it->second;
146	0	return Status::OK();
147	0	}
148
149		Status LoadChannel::add_batch(const PTabletWriterAddBlockRequest& request,
150	0	PTabletWriterAddBlockResult* response) {
151	0	SCOPED_TIMER(_add_batch_timer);
152	0	COUNTER_UPDATE(_add_batch_times, 1);
153	0	SCOPED_ATTACH_TASK(_query_thread_context);
154	0	int64_t index_id = request.index_id();
155		// 1. get tablets channel
156	0	std::shared_ptr<BaseTabletsChannel> channel;
157	0	bool is_finished = false;
158	0	Status st = _get_tablets_channel(channel, is_finished, index_id);
159	0	if (!st.ok() \|\| is_finished) {
160	0	return st;
161	0	}
162
163		// 2. add block to tablets channel
164	0	if (request.has_block()) {
165	0	RETURN_IF_ERROR(channel->add_batch(request, response));
166	0	_add_batch_number_counter->update(1);
167	0	}
168
169		// 3. handle eos
170		// if channel is incremental, maybe hang on close until all close request arrived.
171	0	if (request.has_eos() && request.eos()) {
172	0	st = _handle_eos(channel.get(), request, response);
173	0	_report_profile(response);
174	0	if (!st.ok()) {
175	0	return st;
176	0	}
177	0	} else if (_add_batch_number_counter->value() % 100 == 1) {
178	0	_report_profile(response);
179	0	}
180	0	_last_updated_time.store(time(nullptr));
181	0	return st;
182	0	}
183
184		Status LoadChannel::_handle_eos(BaseTabletsChannel* channel,
185		const PTabletWriterAddBlockRequest& request,
186	0	PTabletWriterAddBlockResult* response) {
187	0	_self_profile->add_info_string("EosHost", fmt::format("{}", request.backend_id()));
188	0	bool finished = false;
189	0	auto index_id = request.index_id();
190
191	0	RETURN_IF_ERROR(channel->close(this, request, response, &finished));
192
193		// for init node, we close waiting(hang on) all close request and let them return together.
194	0	if (request.has_hang_wait() && request.hang_wait()) {
195	0	DCHECK(!channel->is_incremental_channel());
196	0	VLOG_DEBUG << fmt::format("txn {}: reciever index {} close waiting by sender {}", _txn_id,
197	0	request.index_id(), request.sender_id());
198	0	int count = 0;
199	0	while (!channel->is_finished()) {
200	0	bthread_usleep(1000);
201	0	count++;
202	0	}
203		// now maybe finished or cancelled.
204	0	VLOG_TRACE << "reciever close wait finished!" << request.sender_id();
205	0	if (count >= 1000 * _timeout_s) { // maybe config::streaming_load_rpc_max_alive_time_sec
206	0	return Status::InternalError("Tablets channel didn't wait all close");
207	0	}
208	0	}
209
210	0	if (finished) {
211	0	std::lock_guard<std::mutex> l(_lock);
212	0	{
213	0	std::lock_guard<std::mutex> l(_tablets_channels_lock);
214	0	_tablets_channels_rows.insert(std::make_pair(
215	0	index_id,
216	0	std::make_pair(channel->total_received_rows(), channel->num_rows_filtered())));
217	0	_tablets_channels.erase(index_id);
218	0	}
219	0	LOG(INFO) << "txn " << _txn_id << " closed tablets_channel " << index_id;
220	0	_finished_channel_ids.emplace(index_id);
221	0	}
222	0	return Status::OK();
223	0	}
224
225	0	void LoadChannel::_report_profile(PTabletWriterAddBlockResult* response) {
226	0	if (!_enable_profile) {
227	0	return;
228	0	}
229
230		// TabletSink and LoadChannel in BE are M: N relationship,
231		// Every once in a while LoadChannel will randomly return its own runtime profile to a TabletSink,
232		// so usually all LoadChannel runtime profiles are saved on each TabletSink,
233		// and the timeliness of the same LoadChannel profile saved on different TabletSinks is different,
234		// and each TabletSink will periodically send fe reports all the LoadChannel profiles saved by itself,
235		// and ensures to update the latest LoadChannel profile according to the timestamp.
236	0	_self_profile->set_timestamp(_last_updated_time);
237
238	0	{
239	0	std::lock_guard<std::mutex> l(_tablets_channels_lock);
240	0	for (auto& it : _tablets_channels) {
241	0	it.second->refresh_profile();
242	0	}
243	0	}
244
245	0	TRuntimeProfileTree tprofile;
246	0	ThriftSerializer ser(false, 4096);
247	0	uint8_t* buf = nullptr;
248	0	uint32_t len = 0;
249	0	std::lock_guard<SpinLock> l(_profile_serialize_lock);
250	0	_profile->to_thrift(&tprofile);
251	0	auto st = ser.serialize(&tprofile, &len, &buf);
252	0	if (st.ok()) {
253	0	response->set_load_channel_profile(std::string((const char*)buf, len));
254	0	} else {
255	0	LOG(WARNING) << "load channel TRuntimeProfileTree serialize failed, errmsg=" << st;
256	0	}
257	0	}
258
259	0	bool LoadChannel::is_finished() {
260	0	if (!_opened) {
261	0	return false;
262	0	}
263	0	std::lock_guard<std::mutex> l(_lock);
264	0	return _tablets_channels.empty();
265	0	}
266
267	0	Status LoadChannel::cancel() {
268	0	std::lock_guard<std::mutex> l(_lock);
269	0	for (auto& it : _tablets_channels) {
270	0	static_cast<void>(it.second->cancel());
271	0	}
272	0	return Status::OK();
273	0	}
274
275		} // namespace doris