Coverage Report

Created: 2026-05-25 15:28

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/service/internal_service.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "service/internal_service.h"
19
20
#include <assert.h>
21
#include <brpc/closure_guard.h>
22
#include <brpc/controller.h>
23
#include <bthread/bthread.h>
24
#include <bthread/types.h>
25
#include <butil/errno.h>
26
#include <butil/iobuf.h>
27
#include <fcntl.h>
28
#include <fmt/core.h>
29
#include <gen_cpp/DataSinks_types.h>
30
#include <gen_cpp/MasterService_types.h>
31
#include <gen_cpp/PaloInternalService_types.h>
32
#include <gen_cpp/PlanNodes_types.h>
33
#include <gen_cpp/Status_types.h>
34
#include <gen_cpp/Types_types.h>
35
#include <gen_cpp/internal_service.pb.h>
36
#include <gen_cpp/olap_file.pb.h>
37
#include <gen_cpp/segment_v2.pb.h>
38
#include <gen_cpp/types.pb.h>
39
#include <google/protobuf/stubs/callback.h>
40
#include <stddef.h>
41
#include <stdint.h>
42
#include <sys/stat.h>
43
44
#include <algorithm>
45
#include <exception>
46
#include <filesystem>
47
#include <memory>
48
#include <set>
49
#include <sstream>
50
#include <string>
51
#include <utility>
52
#include <vector>
53
54
#include "cloud/cloud_storage_engine.h"
55
#include "cloud/cloud_tablet_mgr.h"
56
#include "cloud/config.h"
57
#include "common/config.h"
58
#include "common/exception.h"
59
#include "common/logging.h"
60
#include "common/metrics/doris_metrics.h"
61
#include "common/metrics/metrics.h"
62
#include "common/signal_handler.h"
63
#include "common/status.h"
64
#include "core/block/block.h"
65
#include "core/data_type/data_type.h"
66
#include "exec/common/variant_util.h"
67
#include "exec/exchange/vdata_stream_mgr.h"
68
#include "exec/rowid_fetcher.h"
69
#include "exec/sink/writer/varrow_flight_result_writer.h"
70
#include "exec/sink/writer/vmysql_result_writer.h"
71
#include "exprs/function/dictionary_factory.h"
72
#include "format/arrow/arrow_row_batch.h"
73
#include "format/csv/csv_reader.h"
74
#include "format/generic_reader.h"
75
#include "format/jni/jni_reader.h"
76
#include "format/json/new_json_reader.h"
77
#include "format/native/native_reader.h"
78
#include "format/orc/vorc_reader.h"
79
#include "format/parquet/vparquet_reader.h"
80
#include "format/text/text_reader.h"
81
#ifdef BUILD_RUST_READERS
82
#include "format/lance/lance_rust_reader.h"
83
#endif
84
#include "io/fs/local_file_system.h"
85
#include "io/fs/stream_load_pipe.h"
86
#include "io/io_common.h"
87
#include "load/channel/load_channel_mgr.h"
88
#include "load/channel/load_stream_mgr.h"
89
#include "load/delta_writer/delta_writer.h"
90
#include "load/group_commit/wal/wal_manager.h"
91
#include "load/routine_load/routine_load_task_executor.h"
92
#include "load/stream_load/new_load_stream_mgr.h"
93
#include "load/stream_load/stream_load_context.h"
94
#include "runtime/cache/result_cache.h"
95
#include "runtime/cdc_client_mgr.h"
96
#include "runtime/descriptors.h"
97
#include "runtime/exec_env.h"
98
#include "runtime/fold_constant_executor.h"
99
#include "runtime/fragment_mgr.h"
100
#include "runtime/result_block_buffer.h"
101
#include "runtime/result_buffer_mgr.h"
102
#include "runtime/runtime_profile.h"
103
#include "runtime/thread_context.h"
104
#include "runtime/workload_group/workload_group.h"
105
#include "runtime/workload_group/workload_group_manager.h"
106
#include "service/backend_options.h"
107
#include "service/http/http_client.h"
108
#include "service/point_query_executor.h"
109
#include "storage/data_dir.h"
110
#include "storage/index/inverted/inverted_index_desc.h"
111
#include "storage/olap_common.h"
112
#include "storage/olap_define.h"
113
#include "storage/rowset/beta_rowset.h"
114
#include "storage/rowset/rowset.h"
115
#include "storage/rowset/rowset_factory.h"
116
#include "storage/rowset/rowset_meta.h"
117
#include "storage/segment/column_reader.h"
118
#include "storage/storage_engine.h"
119
#include "storage/tablet/tablet_fwd.h"
120
#include "storage/tablet/tablet_manager.h"
121
#include "storage/tablet/tablet_schema.h"
122
#include "storage/txn/txn_manager.h"
123
#include "util/async_io.h"
124
#include "util/brpc_client_cache.h"
125
#include "util/brpc_closure.h"
126
#include "util/jdbc_utils.h"
127
#include "util/jsonb/serialize.h"
128
#include "util/md5.h"
129
#include "util/network_util.h"
130
#include "util/proto_util.h"
131
#include "util/stopwatch.hpp"
132
#include "util/string_util.h"
133
#include "util/thrift_util.h"
134
#include "util/time.h"
135
#include "util/uid_util.h"
136
137
namespace google {
138
namespace protobuf {
139
class RpcController;
140
} // namespace protobuf
141
} // namespace google
142
143
namespace doris {
144
#include "common/compile_check_avoid_begin.h"
145
using namespace ErrorCode;
146
147
const uint32_t DOWNLOAD_FILE_MAX_RETRY = 3;
148
149
DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(heavy_work_pool_queue_size, MetricUnit::NOUNIT);
150
DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(light_work_pool_queue_size, MetricUnit::NOUNIT);
151
DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(heavy_work_active_threads, MetricUnit::NOUNIT);
152
DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(light_work_active_threads, MetricUnit::NOUNIT);
153
154
DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(heavy_work_pool_max_queue_size, MetricUnit::NOUNIT);
155
DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(light_work_pool_max_queue_size, MetricUnit::NOUNIT);
156
DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(heavy_work_max_threads, MetricUnit::NOUNIT);
157
DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(light_work_max_threads, MetricUnit::NOUNIT);
158
159
DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(arrow_flight_work_pool_queue_size, MetricUnit::NOUNIT);
160
DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(arrow_flight_work_active_threads, MetricUnit::NOUNIT);
161
DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(arrow_flight_work_pool_max_queue_size, MetricUnit::NOUNIT);
162
DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(arrow_flight_work_max_threads, MetricUnit::NOUNIT);
163
164
static bvar::LatencyRecorder g_process_remote_fetch_rowsets_latency("process_remote_fetch_rowsets");
165
166
bthread_key_t btls_key;
167
168
1.73M
static void thread_context_deleter(void* d) {
169
1.73M
    delete static_cast<ThreadContext*>(d);
170
1.73M
}
171
172
template <typename T>
173
concept CanCancel = requires(T* response) { response->mutable_status(); };
174
175
template <typename T>
176
0
void offer_failed(T* response, google::protobuf::Closure* done, const FifoThreadPool& pool) {
177
0
    brpc::ClosureGuard closure_guard(done);
178
0
    LOG(WARNING) << "fail to offer request to the work pool, pool=" << pool.get_info();
179
0
}
Unexecuted instantiation: _ZN5doris12offer_failedINS_25PTabletWriterCancelResultEEEvPT_PN6google8protobuf7ClosureERKNS_14WorkThreadPoolILb0EEE
Unexecuted instantiation: _ZN5doris12offer_failedINS_14PCacheResponseEEEvPT_PN6google8protobuf7ClosureERKNS_14WorkThreadPoolILb0EEE
Unexecuted instantiation: _ZN5doris12offer_failedINS_17PFetchCacheResultEEEvPT_PN6google8protobuf7ClosureERKNS_14WorkThreadPoolILb0EEE
180
181
template <CanCancel T>
182
0
void offer_failed(T* response, google::protobuf::Closure* done, const FifoThreadPool& pool) {
183
0
    brpc::ClosureGuard closure_guard(done);
184
    // Should use status to generate protobuf message, because it will encoding Backend Info
185
    // into the error message and then we could know which backend's pool is full.
186
0
    Status st = Status::Error<TStatusCode::CANCELLED>(
187
0
            "fail to offer request to the work pool, pool={}", pool.get_info());
188
0
    st.to_protobuf(response->mutable_status());
189
0
    LOG(WARNING) << "cancelled due to fail to offer request to the work pool, pool="
190
0
                 << pool.get_info();
191
0
}
Unexecuted instantiation: _ZN5doris12offer_failedITkNS_9CanCancelENS_23PTabletWriterOpenResultEEEvPT_PN6google8protobuf7ClosureERKNS_14WorkThreadPoolILb0EEE
Unexecuted instantiation: _ZN5doris12offer_failedITkNS_9CanCancelENS_23PExecPlanFragmentResultEEEvPT_PN6google8protobuf7ClosureERKNS_14WorkThreadPoolILb0EEE
Unexecuted instantiation: _ZN5doris12offer_failedITkNS_9CanCancelENS_23POpenLoadStreamResponseEEEvPT_PN6google8protobuf7ClosureERKNS_14WorkThreadPoolILb0EEE
Unexecuted instantiation: _ZN5doris12offer_failedITkNS_9CanCancelENS_27PTabletWriterAddBlockResultEEEvPT_PN6google8protobuf7ClosureERKNS_14WorkThreadPoolILb0EEE
Unexecuted instantiation: _ZN5doris12offer_failedITkNS_9CanCancelENS_25PCancelPlanFragmentResultEEEvPT_PN6google8protobuf7ClosureERKNS_14WorkThreadPoolILb0EEE
Unexecuted instantiation: _ZN5doris12offer_failedITkNS_9CanCancelENS_21PFetchArrowDataResultEEEvPT_PN6google8protobuf7ClosureERKNS_14WorkThreadPoolILb0EEE
Unexecuted instantiation: _ZN5doris12offer_failedITkNS_9CanCancelENS_26POutfileWriteSuccessResultEEEvPT_PN6google8protobuf7ClosureERKNS_14WorkThreadPoolILb0EEE
Unexecuted instantiation: _ZN5doris12offer_failedITkNS_9CanCancelENS_23PFetchTableSchemaResultEEEvPT_PN6google8protobuf7ClosureERKNS_14WorkThreadPoolILb0EEE
Unexecuted instantiation: _ZN5doris12offer_failedITkNS_9CanCancelENS_29PFetchArrowFlightSchemaResultEEEvPT_PN6google8protobuf7ClosureERKNS_14WorkThreadPoolILb0EEE
Unexecuted instantiation: _ZN5doris12offer_failedITkNS_9CanCancelENS_24PTabletKeyLookupResponseEEEvPT_PN6google8protobuf7ClosureERKNS_14WorkThreadPoolILb0EEE
Unexecuted instantiation: _ZN5doris12offer_failedITkNS_9CanCancelENS_25PJdbcTestConnectionResultEEEvPT_PN6google8protobuf7ClosureERKNS_14WorkThreadPoolILb0EEE
Unexecuted instantiation: _ZN5doris12offer_failedITkNS_9CanCancelENS_20PFetchColIdsResponseEEEvPT_PN6google8protobuf7ClosureERKNS_14WorkThreadPoolILb0EEE
Unexecuted instantiation: _ZN5doris12offer_failedITkNS_9CanCancelENS_26PFetchRemoteSchemaResponseEEEvPT_PN6google8protobuf7ClosureERKNS_14WorkThreadPoolILb0EEE
Unexecuted instantiation: _ZN5doris12offer_failedITkNS_9CanCancelENS_12PProxyResultEEEvPT_PN6google8protobuf7ClosureERKNS_14WorkThreadPoolILb0EEE
Unexecuted instantiation: _ZN5doris12offer_failedITkNS_9CanCancelENS_20PMergeFilterResponseEEEvPT_PN6google8protobuf7ClosureERKNS_14WorkThreadPoolILb0EEE
Unexecuted instantiation: _ZN5doris12offer_failedITkNS_9CanCancelENS_23PSendFilterSizeResponseEEEvPT_PN6google8protobuf7ClosureERKNS_14WorkThreadPoolILb0EEE
Unexecuted instantiation: _ZN5doris12offer_failedITkNS_9CanCancelENS_23PSyncFilterSizeResponseEEEvPT_PN6google8protobuf7ClosureERKNS_14WorkThreadPoolILb0EEE
Unexecuted instantiation: _ZN5doris12offer_failedITkNS_9CanCancelENS_22PPublishFilterResponseEEEvPT_PN6google8protobuf7ClosureERKNS_14WorkThreadPoolILb0EEE
Unexecuted instantiation: _ZN5doris12offer_failedITkNS_9CanCancelENS_15PSendDataResultEEEvPT_PN6google8protobuf7ClosureERKNS_14WorkThreadPoolILb0EEE
Unexecuted instantiation: _ZN5doris12offer_failedITkNS_9CanCancelENS_13PCommitResultEEEvPT_PN6google8protobuf7ClosureERKNS_14WorkThreadPoolILb0EEE
Unexecuted instantiation: _ZN5doris12offer_failedITkNS_9CanCancelENS_15PRollbackResultEEEvPT_PN6google8protobuf7ClosureERKNS_14WorkThreadPoolILb0EEE
Unexecuted instantiation: _ZN5doris12offer_failedITkNS_9CanCancelENS_19PConstantExprResultEEEvPT_PN6google8protobuf7ClosureERKNS_14WorkThreadPoolILb0EEE
Unexecuted instantiation: _ZN5doris12offer_failedITkNS_9CanCancelENS_26PTransmitRecCTEBlockResultEEEvPT_PN6google8protobuf7ClosureERKNS_14WorkThreadPoolILb0EEE
Unexecuted instantiation: _ZN5doris12offer_failedITkNS_9CanCancelENS_20PRerunFragmentResultEEEvPT_PN6google8protobuf7ClosureERKNS_14WorkThreadPoolILb0EEE
Unexecuted instantiation: _ZN5doris12offer_failedITkNS_9CanCancelENS_20PResetGlobalRfResultEEEvPT_PN6google8protobuf7ClosureERKNS_14WorkThreadPoolILb0EEE
Unexecuted instantiation: _ZN5doris12offer_failedITkNS_9CanCancelENS_19PTransmitDataResultEEEvPT_PN6google8protobuf7ClosureERKNS_14WorkThreadPoolILb0EEE
Unexecuted instantiation: _ZN5doris12offer_failedITkNS_9CanCancelENS_24PCheckRPCChannelResponseEEEvPT_PN6google8protobuf7ClosureERKNS_14WorkThreadPoolILb0EEE
Unexecuted instantiation: _ZN5doris12offer_failedITkNS_9CanCancelENS_24PResetRPCChannelResponseEEEvPT_PN6google8protobuf7ClosureERKNS_14WorkThreadPoolILb0EEE
Unexecuted instantiation: _ZN5doris12offer_failedITkNS_9CanCancelENS_23PTabletWriteSlaveResultEEEvPT_PN6google8protobuf7ClosureERKNS_14WorkThreadPoolILb0EEE
Unexecuted instantiation: _ZN5doris12offer_failedITkNS_9CanCancelENS_27PTabletWriteSlaveDoneResultEEEvPT_PN6google8protobuf7ClosureERKNS_14WorkThreadPoolILb0EEE
Unexecuted instantiation: _ZN5doris12offer_failedITkNS_9CanCancelENS_17PMultiGetResponseEEEvPT_PN6google8protobuf7ClosureERKNS_14WorkThreadPoolILb0EEE
Unexecuted instantiation: _ZN5doris12offer_failedITkNS_9CanCancelENS_13PGlobResponseEEEvPT_PN6google8protobuf7ClosureERKNS_14WorkThreadPoolILb0EEE
Unexecuted instantiation: _ZN5doris12offer_failedITkNS_9CanCancelENS_26PGroupCommitInsertResponseEEEvPT_PN6google8protobuf7ClosureERKNS_14WorkThreadPoolILb0EEE
Unexecuted instantiation: _ZN5doris12offer_failedITkNS_9CanCancelENS_24PGetWalQueueSizeResponseEEEvPT_PN6google8protobuf7ClosureERKNS_14WorkThreadPoolILb0EEE
Unexecuted instantiation: _ZN5doris12offer_failedITkNS_9CanCancelENS_22PGetBeResourceResponseEEEvPT_PN6google8protobuf7ClosureERKNS_14WorkThreadPoolILb0EEE
Unexecuted instantiation: _ZN5doris12offer_failedITkNS_9CanCancelENS_23PRequestCdcClientResultEEEvPT_PN6google8protobuf7ClosureERKNS_14WorkThreadPoolILb0EEE
192
193
template <typename T>
194
class NewHttpClosure : public ::google::protobuf::Closure {
195
public:
196
    NewHttpClosure(google::protobuf::Closure* done) : _done(done) {}
197
0
    NewHttpClosure(T* request, google::protobuf::Closure* done) : _request(request), _done(done) {}
Unexecuted instantiation: _ZN5doris14NewHttpClosureINS_28PTabletWriterAddBlockRequestEEC2EPS1_PN6google8protobuf7ClosureE
Unexecuted instantiation: _ZN5doris14NewHttpClosureINS_19PTransmitDataParamsEEC2EPS1_PN6google8protobuf7ClosureE
198
199
0
    void Run() override {
200
0
        if (_request != nullptr) {
201
0
            delete _request;
202
0
            _request = nullptr;
203
0
        }
204
0
        if (_done != nullptr) {
205
0
            _done->Run();
206
0
        }
207
0
        delete this;
208
0
    }
Unexecuted instantiation: _ZN5doris14NewHttpClosureINS_28PTabletWriterAddBlockRequestEE3RunEv
Unexecuted instantiation: _ZN5doris14NewHttpClosureINS_19PTransmitDataParamsEE3RunEv
209
210
private:
211
    T* _request = nullptr;
212
    google::protobuf::Closure* _done = nullptr;
213
};
214
215
PInternalService::PInternalService(ExecEnv* exec_env)
216
7
        : _exec_env(exec_env),
217
          // heavy threadpool is used for load process and other process that will read disk or access network.
218
7
          _heavy_work_pool(config::brpc_heavy_work_pool_threads != -1
219
7
                                   ? config::brpc_heavy_work_pool_threads
220
7
                                   : std::max(128, CpuInfo::num_cores() * 4),
221
7
                           config::brpc_heavy_work_pool_max_queue_size != -1
222
7
                                   ? config::brpc_heavy_work_pool_max_queue_size
223
7
                                   : std::max(10240, CpuInfo::num_cores() * 320),
224
7
                           "brpc_heavy"),
225
226
          // light threadpool should be only used in query processing logic. All hanlers should be very light, not locked, not access disk.
227
7
          _light_work_pool(config::brpc_light_work_pool_threads != -1
228
7
                                   ? config::brpc_light_work_pool_threads
229
7
                                   : std::max(128, CpuInfo::num_cores() * 4),
230
7
                           config::brpc_light_work_pool_max_queue_size != -1
231
7
                                   ? config::brpc_light_work_pool_max_queue_size
232
7
                                   : std::max(10240, CpuInfo::num_cores() * 320),
233
7
                           "brpc_light"),
234
7
          _arrow_flight_work_pool(config::brpc_arrow_flight_work_pool_threads != -1
235
7
                                          ? config::brpc_arrow_flight_work_pool_threads
236
7
                                          : std::max(512, CpuInfo::num_cores() * 2),
237
7
                                  config::brpc_arrow_flight_work_pool_max_queue_size != -1
238
7
                                          ? config::brpc_arrow_flight_work_pool_max_queue_size
239
7
                                          : std::max(20480, CpuInfo::num_cores() * 640),
240
7
                                  "brpc_arrow_flight") {
241
7
    REGISTER_HOOK_METRIC(heavy_work_pool_queue_size,
242
7
                         [this]() { return _heavy_work_pool.get_queue_size(); });
243
7
    REGISTER_HOOK_METRIC(light_work_pool_queue_size,
244
7
                         [this]() { return _light_work_pool.get_queue_size(); });
245
7
    REGISTER_HOOK_METRIC(heavy_work_active_threads,
246
7
                         [this]() { return _heavy_work_pool.get_active_threads(); });
247
7
    REGISTER_HOOK_METRIC(light_work_active_threads,
248
7
                         [this]() { return _light_work_pool.get_active_threads(); });
249
250
7
    REGISTER_HOOK_METRIC(heavy_work_pool_max_queue_size,
251
7
                         []() { return config::brpc_heavy_work_pool_max_queue_size; });
252
7
    REGISTER_HOOK_METRIC(light_work_pool_max_queue_size,
253
7
                         []() { return config::brpc_light_work_pool_max_queue_size; });
254
7
    REGISTER_HOOK_METRIC(heavy_work_max_threads,
255
7
                         []() { return config::brpc_heavy_work_pool_threads; });
256
7
    REGISTER_HOOK_METRIC(light_work_max_threads,
257
7
                         []() { return config::brpc_light_work_pool_threads; });
258
259
7
    REGISTER_HOOK_METRIC(arrow_flight_work_pool_queue_size,
260
7
                         [this]() { return _arrow_flight_work_pool.get_queue_size(); });
261
7
    REGISTER_HOOK_METRIC(arrow_flight_work_active_threads,
262
7
                         [this]() { return _arrow_flight_work_pool.get_active_threads(); });
263
7
    REGISTER_HOOK_METRIC(arrow_flight_work_pool_max_queue_size,
264
7
                         []() { return config::brpc_arrow_flight_work_pool_max_queue_size; });
265
7
    REGISTER_HOOK_METRIC(arrow_flight_work_max_threads,
266
7
                         []() { return config::brpc_arrow_flight_work_pool_threads; });
267
268
7
    _exec_env->load_stream_mgr()->set_heavy_work_pool(&_heavy_work_pool);
269
270
7
    CHECK_EQ(0, bthread_key_create(&btls_key, thread_context_deleter));
271
7
    CHECK_EQ(0, bthread_key_create(&AsyncIO::btls_io_ctx_key, AsyncIO::io_ctx_key_deleter));
272
7
}
273
274
PInternalServiceImpl::PInternalServiceImpl(StorageEngine& engine, ExecEnv* exec_env)
275
6
        : PInternalService(exec_env), _engine(engine) {}
276
277
3
PInternalServiceImpl::~PInternalServiceImpl() = default;
278
279
3
PInternalService::~PInternalService() {
280
3
    DEREGISTER_HOOK_METRIC(heavy_work_pool_queue_size);
281
3
    DEREGISTER_HOOK_METRIC(light_work_pool_queue_size);
282
3
    DEREGISTER_HOOK_METRIC(heavy_work_active_threads);
283
3
    DEREGISTER_HOOK_METRIC(light_work_active_threads);
284
285
3
    DEREGISTER_HOOK_METRIC(heavy_work_pool_max_queue_size);
286
3
    DEREGISTER_HOOK_METRIC(light_work_pool_max_queue_size);
287
3
    DEREGISTER_HOOK_METRIC(heavy_work_max_threads);
288
3
    DEREGISTER_HOOK_METRIC(light_work_max_threads);
289
290
3
    DEREGISTER_HOOK_METRIC(arrow_flight_work_pool_queue_size);
291
3
    DEREGISTER_HOOK_METRIC(arrow_flight_work_active_threads);
292
3
    DEREGISTER_HOOK_METRIC(arrow_flight_work_pool_max_queue_size);
293
3
    DEREGISTER_HOOK_METRIC(arrow_flight_work_max_threads);
294
295
3
    CHECK_EQ(0, bthread_key_delete(btls_key));
296
3
    CHECK_EQ(0, bthread_key_delete(AsyncIO::btls_io_ctx_key));
297
3
}
298
299
void PInternalService::tablet_writer_open(google::protobuf::RpcController* controller,
300
                                          const PTabletWriterOpenRequest* request,
301
                                          PTabletWriterOpenResult* response,
302
44.8k
                                          google::protobuf::Closure* done) {
303
44.9k
    bool ret = _heavy_work_pool.try_offer([this, request, response, done]() {
304
18.4E
        VLOG_RPC << "tablet writer open, id=" << request->id()
305
18.4E
                 << ", index_id=" << request->index_id() << ", txn_id=" << request->txn_id();
306
44.9k
        signal::SignalTaskIdKeeper keeper(request->id());
307
44.9k
        brpc::ClosureGuard closure_guard(done);
308
44.9k
        auto st = _exec_env->load_channel_mgr()->open(*request);
309
44.9k
        if (!st.ok()) {
310
0
            LOG(WARNING) << "load channel open failed, message=" << st << ", id=" << request->id()
311
0
                         << ", index_id=" << request->index_id()
312
0
                         << ", txn_id=" << request->txn_id();
313
0
        }
314
44.9k
        st.to_protobuf(response->mutable_status());
315
44.9k
    });
316
44.8k
    if (!ret) {
317
0
        offer_failed(response, done, _heavy_work_pool);
318
0
        return;
319
0
    }
320
44.8k
}
321
322
void PInternalService::exec_plan_fragment(google::protobuf::RpcController* controller,
323
                                          const PExecPlanFragmentRequest* request,
324
                                          PExecPlanFragmentResult* response,
325
168k
                                          google::protobuf::Closure* done) {
326
168k
    timeval tv {};
327
168k
    gettimeofday(&tv, nullptr);
328
168k
    response->set_received_time(tv.tv_sec * 1000LL + tv.tv_usec / 1000);
329
168k
    bool ret = _light_work_pool.try_offer([this, controller, request, response, done]() {
330
168k
        _exec_plan_fragment_in_pthread(controller, request, response, done);
331
168k
    });
332
168k
    if (!ret) {
333
0
        offer_failed(response, done, _light_work_pool);
334
0
        return;
335
0
    }
336
168k
}
337
338
void PInternalService::_exec_plan_fragment_in_pthread(google::protobuf::RpcController* controller,
339
                                                      const PExecPlanFragmentRequest* request,
340
                                                      PExecPlanFragmentResult* response,
341
288k
                                                      google::protobuf::Closure* done) {
342
288k
    timeval tv1 {};
343
288k
    gettimeofday(&tv1, nullptr);
344
288k
    response->set_execution_time(tv1.tv_sec * 1000LL + tv1.tv_usec / 1000);
345
288k
    brpc::ClosureGuard closure_guard(done);
346
288k
    auto st = Status::OK();
347
18.4E
    bool compact = request->has_compact() ? request->compact() : false;
348
288k
    PFragmentRequestVersion version =
349
18.4E
            request->has_version() ? request->version() : PFragmentRequestVersion::VERSION_1;
350
288k
    try {
351
288k
        st = _exec_plan_fragment_impl(request->request(), version, compact);
352
288k
    } catch (const Exception& e) {
353
0
        st = e.to_status();
354
0
    } catch (const std::exception& e) {
355
0
        st = Status::Error(ErrorCode::INTERNAL_ERROR, e.what());
356
0
    } catch (...) {
357
0
        st = Status::Error(ErrorCode::INTERNAL_ERROR,
358
0
                           "_exec_plan_fragment_impl meet unknown error");
359
0
    }
360
288k
    if (!st.ok()) {
361
1.20k
        LOG(WARNING) << "exec plan fragment failed, errmsg=" << st;
362
1.20k
    }
363
287k
    st.to_protobuf(response->mutable_status());
364
287k
    timeval tv2 {};
365
287k
    gettimeofday(&tv2, nullptr);
366
287k
    response->set_execution_done_time(tv2.tv_sec * 1000LL + tv2.tv_usec / 1000);
367
287k
}
368
369
void PInternalService::exec_plan_fragment_prepare(google::protobuf::RpcController* controller,
370
                                                  const PExecPlanFragmentRequest* request,
371
                                                  PExecPlanFragmentResult* response,
372
119k
                                                  google::protobuf::Closure* done) {
373
119k
    timeval tv {};
374
119k
    gettimeofday(&tv, nullptr);
375
119k
    response->set_received_time(tv.tv_sec * 1000LL + tv.tv_usec / 1000);
376
119k
    bool ret = _light_work_pool.try_offer([this, controller, request, response, done]() {
377
119k
        _exec_plan_fragment_in_pthread(controller, request, response, done);
378
119k
    });
379
119k
    if (!ret) {
380
0
        offer_failed(response, done, _light_work_pool);
381
0
        return;
382
0
    }
383
119k
}
384
385
void PInternalService::exec_plan_fragment_start(google::protobuf::RpcController* /*controller*/,
386
                                                const PExecPlanFragmentStartRequest* request,
387
                                                PExecPlanFragmentResult* result,
388
119k
                                                google::protobuf::Closure* done) {
389
119k
    timeval tv {};
390
119k
    gettimeofday(&tv, nullptr);
391
119k
    result->set_received_time(tv.tv_sec * 1000LL + tv.tv_usec / 1000);
392
119k
    bool ret = _light_work_pool.try_offer([this, request, result, done]() {
393
119k
        timeval tv1 {};
394
119k
        gettimeofday(&tv1, nullptr);
395
119k
        result->set_execution_time(tv1.tv_sec * 1000LL + tv1.tv_usec / 1000);
396
119k
        brpc::ClosureGuard closure_guard(done);
397
119k
        auto st = _exec_env->fragment_mgr()->start_query_execution(request);
398
119k
        st.to_protobuf(result->mutable_status());
399
119k
        timeval tv2 {};
400
119k
        gettimeofday(&tv2, nullptr);
401
119k
        result->set_execution_done_time(tv2.tv_sec * 1000LL + tv2.tv_usec / 1000);
402
119k
    });
403
119k
    if (!ret) {
404
0
        offer_failed(result, done, _light_work_pool);
405
0
        return;
406
0
    }
407
119k
}
408
409
void PInternalService::open_load_stream(google::protobuf::RpcController* controller,
410
                                        const POpenLoadStreamRequest* request,
411
                                        POpenLoadStreamResponse* response,
412
5.16k
                                        google::protobuf::Closure* done) {
413
5.16k
    bool ret = _heavy_work_pool.try_offer([this, controller, request, response, done]() {
414
5.16k
        signal::SignalTaskIdKeeper keeper(request->load_id());
415
5.16k
        brpc::ClosureGuard done_guard(done);
416
5.16k
        brpc::Controller* cntl = static_cast<brpc::Controller*>(controller);
417
5.16k
        brpc::StreamOptions stream_options;
418
419
5.16k
        LOG(INFO) << "open load stream, load_id=" << request->load_id()
420
5.16k
                  << ", src_id=" << request->src_id();
421
422
5.16k
        std::vector<BaseTabletSPtr> tablets;
423
5.16k
        for (const auto& req : request->tablets()) {
424
2.63k
            BaseTabletSPtr tablet;
425
2.63k
            if (auto res = ExecEnv::get_tablet(req.tablet_id()); !res.has_value()) [[unlikely]] {
426
0
                auto st = std::move(res).error();
427
0
                st.to_protobuf(response->mutable_status());
428
0
                cntl->SetFailed(st.to_string());
429
0
                return;
430
2.63k
            } else {
431
2.63k
                tablet = std::move(res).value();
432
2.63k
            }
433
2.63k
            auto resp = response->add_tablet_schemas();
434
2.63k
            resp->set_index_id(req.index_id());
435
2.63k
            resp->set_enable_unique_key_merge_on_write(tablet->enable_unique_key_merge_on_write());
436
2.63k
            tablet->tablet_schema()->to_schema_pb(resp->mutable_tablet_schema());
437
2.63k
            tablets.push_back(tablet);
438
2.63k
        }
439
5.16k
        if (!tablets.empty()) {
440
2.63k
            auto* tablet_load_infos = response->mutable_tablet_load_rowset_num_infos();
441
2.63k
            for (const auto& tablet : tablets) {
442
2.63k
                BaseDeltaWriter::collect_tablet_load_rowset_num_info(tablet.get(),
443
2.63k
                                                                     tablet_load_infos);
444
2.63k
            }
445
2.63k
        }
446
447
5.16k
        LoadStream* load_stream = nullptr;
448
5.16k
        auto st = _exec_env->load_stream_mgr()->open_load_stream(request, load_stream);
449
5.16k
        if (!st.ok()) {
450
0
            st.to_protobuf(response->mutable_status());
451
0
            return;
452
0
        }
453
454
5.16k
        stream_options.handler = load_stream;
455
5.16k
        stream_options.idle_timeout_ms = request->idle_timeout_ms();
456
5.16k
        DBUG_EXECUTE_IF("PInternalServiceImpl.open_load_stream.set_idle_timeout",
457
5.16k
                        { stream_options.idle_timeout_ms = 1; });
458
459
5.16k
        StreamId streamid;
460
5.16k
        if (brpc::StreamAccept(&streamid, *cntl, &stream_options) != 0) {
461
0
            st = Status::Cancelled("Fail to accept stream {}", streamid);
462
0
            st.to_protobuf(response->mutable_status());
463
0
            cntl->SetFailed(st.to_string());
464
0
            return;
465
0
        }
466
467
5.16k
        VLOG_DEBUG << "get streamid =" << streamid;
468
5.16k
        st.to_protobuf(response->mutable_status());
469
5.16k
    });
470
5.16k
    if (!ret) {
471
0
        offer_failed(response, done, _heavy_work_pool);
472
0
    }
473
5.16k
}
474
475
void PInternalService::tablet_writer_add_block_by_http(google::protobuf::RpcController* controller,
476
                                                       const ::doris::PEmptyRequest* request,
477
                                                       PTabletWriterAddBlockResult* response,
478
0
                                                       google::protobuf::Closure* done) {
479
0
    PTabletWriterAddBlockRequest* new_request = new PTabletWriterAddBlockRequest();
480
0
    google::protobuf::Closure* new_done =
481
0
            new NewHttpClosure<PTabletWriterAddBlockRequest>(new_request, done);
482
0
    brpc::Controller* cntl = static_cast<brpc::Controller*>(controller);
483
0
    Status st = attachment_extract_request_contain_block<PTabletWriterAddBlockRequest>(new_request,
484
0
                                                                                       cntl);
485
0
    if (st.ok()) {
486
0
        tablet_writer_add_block(controller, new_request, response, new_done);
487
0
    } else {
488
0
        st.to_protobuf(response->mutable_status());
489
0
    }
490
0
}
491
492
void PInternalService::tablet_writer_add_block(google::protobuf::RpcController* controller,
493
                                               const PTabletWriterAddBlockRequest* request,
494
                                               PTabletWriterAddBlockResult* response,
495
46.9k
                                               google::protobuf::Closure* done) {
496
46.9k
    int64_t submit_task_time_ns = MonotonicNanos();
497
47.0k
    bool ret = _heavy_work_pool.try_offer([request, response, done, submit_task_time_ns, this]() {
498
47.0k
        int64_t wait_execution_time_ns = MonotonicNanos() - submit_task_time_ns;
499
47.0k
        brpc::ClosureGuard closure_guard(done);
500
47.0k
        int64_t execution_time_ns = 0;
501
47.0k
        {
502
47.0k
            SCOPED_RAW_TIMER(&execution_time_ns);
503
47.0k
            signal::SignalTaskIdKeeper keeper(request->id());
504
47.0k
            auto st = _exec_env->load_channel_mgr()->add_batch(*request, response);
505
47.0k
            if (!st.ok()) {
506
48
                LOG(WARNING) << "tablet writer add block failed, message=" << st
507
48
                             << ", id=" << request->id() << ", index_id=" << request->index_id()
508
48
                             << ", sender_id=" << request->sender_id()
509
48
                             << ", backend id=" << request->backend_id();
510
48
            }
511
47.0k
            st.to_protobuf(response->mutable_status());
512
47.0k
        }
513
47.0k
        response->set_execution_time_us(execution_time_ns / NANOS_PER_MICRO);
514
47.0k
        response->set_wait_execution_time_us(wait_execution_time_ns / NANOS_PER_MICRO);
515
47.0k
    });
516
46.9k
    if (!ret) {
517
0
        offer_failed(response, done, _heavy_work_pool);
518
0
        return;
519
0
    }
520
46.9k
}
521
522
void PInternalService::tablet_writer_cancel(google::protobuf::RpcController* controller,
523
                                            const PTabletWriterCancelRequest* request,
524
                                            PTabletWriterCancelResult* response,
525
107
                                            google::protobuf::Closure* done) {
526
107
    bool ret = _heavy_work_pool.try_offer([this, request, done]() {
527
107
        VLOG_RPC << "tablet writer cancel, id=" << request->id()
528
0
                 << ", index_id=" << request->index_id() << ", sender_id=" << request->sender_id();
529
107
        signal::SignalTaskIdKeeper keeper(request->id());
530
107
        brpc::ClosureGuard closure_guard(done);
531
107
        auto st = _exec_env->load_channel_mgr()->cancel(*request);
532
107
        if (!st.ok()) {
533
0
            LOG(WARNING) << "tablet writer cancel failed, id=" << request->id()
534
0
                         << ", index_id=" << request->index_id()
535
0
                         << ", sender_id=" << request->sender_id();
536
0
        }
537
107
    });
538
107
    if (!ret) {
539
0
        offer_failed(response, done, _heavy_work_pool);
540
0
        return;
541
0
    }
542
107
}
543
544
Status PInternalService::_exec_plan_fragment_impl(
545
        const std::string& ser_request, PFragmentRequestVersion version, bool compact,
546
288k
        const std::function<void(RuntimeState*, Status*)>& cb) {
547
    // Sometimes the BE do not receive the first heartbeat message and it receives request from FE
548
    // If BE execute this fragment, it will core when it wants to get some property from master info.
549
288k
    if (ExecEnv::GetInstance()->cluster_info() == nullptr) {
550
0
        return Status::InternalError(
551
0
                "Have not receive the first heartbeat message from master, not ready to provide "
552
0
                "service");
553
0
    }
554
288k
    CHECK(version == PFragmentRequestVersion::VERSION_3)
555
1
            << "only support version 3, received " << version;
556
288k
    if (version == PFragmentRequestVersion::VERSION_3) {
557
288k
        TPipelineFragmentParamsList t_request;
558
288k
        {
559
288k
            const uint8_t* buf = (const uint8_t*)ser_request.data();
560
288k
            uint32_t len = ser_request.size();
561
288k
            RETURN_IF_ERROR(deserialize_thrift_msg(buf, &len, compact, &t_request));
562
288k
        }
563
564
288k
        const auto& fragment_list = t_request.params_list;
565
288k
        if (fragment_list.empty()) {
566
0
            return Status::InternalError("Invalid TPipelineFragmentParamsList!");
567
0
        }
568
288k
        MonotonicStopWatch timer;
569
288k
        timer.start();
570
571
        // work for old version frontend
572
288k
        if (!t_request.__isset.runtime_filter_info) {
573
117k
            TRuntimeFilterInfo runtime_filter_info;
574
117k
            auto local_param = fragment_list[0].local_params[0];
575
117k
            if (local_param.__isset.runtime_filter_params) {
576
117k
                runtime_filter_info.__set_runtime_filter_params(local_param.runtime_filter_params);
577
117k
            }
578
117k
            if (local_param.__isset.topn_filter_descs) {
579
0
                runtime_filter_info.__set_topn_filter_descs(local_param.topn_filter_descs);
580
0
            }
581
117k
            t_request.__set_runtime_filter_info(runtime_filter_info);
582
117k
        }
583
584
494k
        for (const TPipelineFragmentParams& fragment : fragment_list) {
585
494k
            if (cb) {
586
29
                RETURN_IF_ERROR(_exec_env->fragment_mgr()->exec_plan_fragment(
587
29
                        fragment, QuerySource::INTERNAL_FRONTEND, cb, t_request));
588
494k
            } else {
589
494k
                RETURN_IF_ERROR(_exec_env->fragment_mgr()->exec_plan_fragment(
590
494k
                        fragment, QuerySource::INTERNAL_FRONTEND, t_request));
591
494k
            }
592
494k
        }
593
286k
        timer.stop();
594
286k
        double cost_secs = static_cast<double>(timer.elapsed_time()) / 1000000000ULL;
595
286k
        if (cost_secs > 5) {
596
27
            LOG_WARNING("Prepare {} fragments of query {} costs {} seconds, it costs too much",
597
27
                        fragment_list.size(), print_id(fragment_list.front().query_id), cost_secs);
598
27
        }
599
600
286k
        return Status::OK();
601
288k
    } else {
602
1
        return Status::InternalError("invalid version");
603
1
    }
604
288k
}
605
606
void PInternalService::cancel_plan_fragment(google::protobuf::RpcController* /*controller*/,
607
                                            const PCancelPlanFragmentRequest* request,
608
                                            PCancelPlanFragmentResult* result,
609
170k
                                            google::protobuf::Closure* done) {
610
170k
    bool ret = _light_work_pool.try_offer([this, request, result, done]() {
611
170k
        brpc::ClosureGuard closure_guard(done);
612
170k
        signal::SignalTaskIdKeeper keeper(request->finst_id());
613
170k
        Status st = Status::OK();
614
615
170k
        const bool has_cancel_reason = request->has_cancel_reason();
616
170k
        const bool has_cancel_status = request->has_cancel_status();
617
        // During upgrade only LIMIT_REACH is used, other reason is changed to internal error
618
170k
        Status actual_cancel_status = Status::OK();
619
        // Convert PPlanFragmentCancelReason to Status
620
170k
        if (has_cancel_status) {
621
            // If fe set cancel status, then it is new FE now, should use cancel status.
622
170k
            actual_cancel_status = Status::create<false>(request->cancel_status());
623
170k
        } else if (has_cancel_reason) {
624
            // If fe not set cancel status, but set cancel reason, should convert cancel reason
625
            // to cancel status here.
626
0
            if (request->cancel_reason() == PPlanFragmentCancelReason::LIMIT_REACH) {
627
0
                actual_cancel_status = Status::Error<ErrorCode::LIMIT_REACH>("limit reach");
628
0
            } else {
629
                // Use cancel reason as error message
630
0
                actual_cancel_status = Status::InternalError(
631
0
                        PPlanFragmentCancelReason_Name(request->cancel_reason()));
632
0
            }
633
0
        } else {
634
0
            actual_cancel_status = Status::InternalError("unknown error");
635
0
        }
636
637
170k
        TUniqueId query_id;
638
170k
        query_id.__set_hi(request->query_id().hi());
639
170k
        query_id.__set_lo(request->query_id().lo());
640
170k
        LOG(INFO) << fmt::format("Cancel query {}, reason: {}", print_id(query_id),
641
170k
                                 actual_cancel_status.to_string());
642
170k
        _exec_env->fragment_mgr()->cancel_query(query_id, actual_cancel_status);
643
644
        // TODO: the logic seems useless, cancel only return Status::OK. remove it
645
170k
        st.to_protobuf(result->mutable_status());
646
170k
    });
647
170k
    if (!ret) {
648
0
        offer_failed(result, done, _light_work_pool);
649
0
        return;
650
0
    }
651
170k
}
652
653
void PInternalService::fetch_data(google::protobuf::RpcController* controller,
654
                                  const PFetchDataRequest* request, PFetchDataResult* result,
655
416k
                                  google::protobuf::Closure* done) {
656
    // fetch_data is a light operation which will put a request rather than wait inplace when there's no data ready.
657
    // when there's data ready, use brpc to send. there's queue in brpc service. won't take it too long.
658
416k
    auto ctx = GetResultBatchCtx::create_shared(result, done);
659
416k
    TUniqueId unique_id = UniqueId(request->finst_id()).to_thrift(); // query_id or instance_id
660
416k
    std::shared_ptr<MySQLResultBlockBuffer> buffer;
661
416k
    Status st = ExecEnv::GetInstance()->result_mgr()->find_buffer(unique_id, buffer);
662
416k
    if (!st.ok()) {
663
0
        LOG(WARNING) << "Result buffer not found! finst ID: " << print_id(unique_id);
664
0
        return;
665
0
    }
666
416k
    if (st = buffer->get_batch(ctx); !st.ok()) {
667
6
        LOG(WARNING) << "fetch_data failed: " << st.to_string();
668
6
    }
669
416k
}
670
671
void PInternalService::fetch_arrow_data(google::protobuf::RpcController* controller,
672
                                        const PFetchArrowDataRequest* request,
673
                                        PFetchArrowDataResult* result,
674
0
                                        google::protobuf::Closure* done) {
675
0
    bool ret = _arrow_flight_work_pool.try_offer([request, result, done]() {
676
0
        auto ctx = GetArrowResultBatchCtx::create_shared(result, done);
677
0
        TUniqueId unique_id = UniqueId(request->finst_id()).to_thrift(); // query_id or instance_id
678
0
        std::shared_ptr<ArrowFlightResultBlockBuffer> arrow_buffer;
679
0
        auto st = ExecEnv::GetInstance()->result_mgr()->find_buffer(unique_id, arrow_buffer);
680
0
        if (!st.ok()) {
681
0
            LOG(WARNING) << "Result buffer not found! Query ID: " << print_id(unique_id);
682
0
            return;
683
0
        }
684
0
        if (st = arrow_buffer->get_batch(ctx); !st.ok()) {
685
0
            LOG(WARNING) << "fetch_arrow_data failed: " << st.to_string();
686
0
        }
687
0
    });
688
0
    if (!ret) {
689
0
        offer_failed(result, done, _arrow_flight_work_pool);
690
0
        return;
691
0
    }
692
0
}
693
694
void PInternalService::outfile_write_success(google::protobuf::RpcController* controller,
695
                                             const POutfileWriteSuccessRequest* request,
696
                                             POutfileWriteSuccessResult* result,
697
4
                                             google::protobuf::Closure* done) {
698
4
    bool ret = _heavy_work_pool.try_offer([request, result, done]() {
699
4
        VLOG_RPC << "outfile write success file";
700
4
        brpc::ClosureGuard closure_guard(done);
701
4
        TResultFileSink result_file_sink;
702
4
        Status st = Status::OK();
703
4
        {
704
4
            const uint8_t* buf = (const uint8_t*)(request->result_file_sink().data());
705
4
            uint32_t len = request->result_file_sink().size();
706
4
            st = deserialize_thrift_msg(buf, &len, false, &result_file_sink);
707
4
            if (!st.ok()) {
708
0
                LOG(WARNING) << "outfile write success file failed, errmsg = " << st;
709
0
                st.to_protobuf(result->mutable_status());
710
0
                return;
711
0
            }
712
4
        }
713
714
4
        TResultFileSinkOptions file_options = result_file_sink.file_options;
715
4
        std::stringstream ss;
716
4
        ss << file_options.file_path << file_options.success_file_name;
717
4
        std::string file_name = ss.str();
718
4
        if (result_file_sink.storage_backend_type == TStorageBackendType::LOCAL) {
719
            // For local file writer, the file_path is a local dir.
720
            // Here we do a simple security verification by checking whether the file exists.
721
            // Because the file path is currently arbitrarily specified by the user,
722
            // Doris is not responsible for ensuring the correctness of the path.
723
            // This is just to prevent overwriting the existing file.
724
4
            bool exists = true;
725
4
            st = io::global_local_filesystem()->exists(file_name, &exists);
726
4
            if (!st.ok()) {
727
0
                LOG(WARNING) << "outfile write success filefailed, errmsg = " << st;
728
0
                st.to_protobuf(result->mutable_status());
729
0
                return;
730
0
            }
731
4
            if (exists) {
732
0
                st = Status::InternalError("File already exists: {}", file_name);
733
0
            }
734
4
            if (!st.ok()) {
735
0
                LOG(WARNING) << "outfile write success file failed, errmsg = " << st;
736
0
                st.to_protobuf(result->mutable_status());
737
0
                return;
738
0
            }
739
4
        }
740
741
4
        auto file_type_res =
742
4
                FileFactory::convert_storage_type(result_file_sink.storage_backend_type);
743
4
        if (!file_type_res.has_value()) [[unlikely]] {
744
0
            st = std::move(file_type_res).error();
745
0
            st.to_protobuf(result->mutable_status());
746
0
            LOG(WARNING) << "encounter unkonw type=" << result_file_sink.storage_backend_type
747
0
                         << ", st=" << st;
748
0
            return;
749
0
        }
750
751
4
        auto&& res = FileFactory::create_file_writer(file_type_res.value(), ExecEnv::GetInstance(),
752
4
                                                     file_options.broker_addresses,
753
4
                                                     file_options.broker_properties, file_name,
754
4
                                                     {
755
4
                                                             .write_file_cache = false,
756
4
                                                             .sync_file_data = false,
757
4
                                                     });
758
4
        using T = std::decay_t<decltype(res)>;
759
4
        if (!res.has_value()) [[unlikely]] {
760
0
            st = std::forward<T>(res).error();
761
0
            st.to_protobuf(result->mutable_status());
762
0
            return;
763
0
        }
764
765
4
        std::unique_ptr<doris::io::FileWriter> _file_writer_impl = std::forward<T>(res).value();
766
        // must write somthing because s3 file writer can not writer empty file
767
4
        st = _file_writer_impl->append({"success"});
768
4
        if (!st.ok()) {
769
0
            LOG(WARNING) << "outfile write success filefailed, errmsg=" << st;
770
0
            st.to_protobuf(result->mutable_status());
771
0
            return;
772
0
        }
773
4
        st = _file_writer_impl->close();
774
4
        if (!st.ok()) {
775
0
            LOG(WARNING) << "outfile write success filefailed, errmsg=" << st;
776
0
            st.to_protobuf(result->mutable_status());
777
0
            return;
778
0
        }
779
4
    });
780
4
    if (!ret) {
781
0
        offer_failed(result, done, _heavy_work_pool);
782
0
        return;
783
0
    }
784
4
}
785
786
void PInternalService::fetch_table_schema(google::protobuf::RpcController* controller,
787
                                          const PFetchTableSchemaRequest* request,
788
                                          PFetchTableSchemaResult* result,
789
4.06k
                                          google::protobuf::Closure* done) {
790
4.06k
    bool ret = _heavy_work_pool.try_offer([request, result, done]() {
791
4.06k
        VLOG_RPC << "fetch table schema";
792
4.06k
        brpc::ClosureGuard closure_guard(done);
793
4.06k
        TFileScanRange file_scan_range;
794
4.06k
        Status st = Status::OK();
795
4.06k
        {
796
4.06k
            const uint8_t* buf = (const uint8_t*)(request->file_scan_range().data());
797
4.06k
            uint32_t len = request->file_scan_range().size();
798
4.06k
            st = deserialize_thrift_msg(buf, &len, false, &file_scan_range);
799
4.06k
            if (!st.ok()) {
800
0
                LOG(WARNING) << "fetch table schema failed, errmsg=" << st;
801
0
                st.to_protobuf(result->mutable_status());
802
0
                return;
803
0
            }
804
4.06k
        }
805
4.06k
        if (file_scan_range.__isset.ranges == false) {
806
0
            st = Status::InternalError("can not get TFileRangeDesc.");
807
0
            st.to_protobuf(result->mutable_status());
808
0
            return;
809
0
        }
810
4.06k
        if (file_scan_range.__isset.params == false) {
811
0
            st = Status::InternalError("can not get TFileScanRangeParams.");
812
0
            st.to_protobuf(result->mutable_status());
813
0
            return;
814
0
        }
815
4.06k
        const TFileRangeDesc& range = file_scan_range.ranges.at(0);
816
4.06k
        const TFileScanRangeParams& params = file_scan_range.params;
817
818
4.06k
        std::shared_ptr<MemTrackerLimiter> mem_tracker = MemTrackerLimiter::create_shared(
819
4.06k
                MemTrackerLimiter::Type::OTHER,
820
4.06k
                fmt::format("InternalService::fetch_table_schema:{}#{}", params.format_type,
821
4.06k
                            params.file_type));
822
4.06k
        SCOPED_ATTACH_TASK(mem_tracker);
823
824
        // make sure profile is desctructed after reader cause PrefetchBufferedReader
825
        // might asynchronouslly access the profile
826
4.06k
        std::unique_ptr<RuntimeProfile> profile =
827
4.06k
                std::make_unique<RuntimeProfile>("FetchTableSchema");
828
4.06k
        std::unique_ptr<GenericReader> reader(nullptr);
829
4.06k
        auto io_ctx = std::make_shared<io::IOContext>();
830
4.06k
        auto file_cache_statis = std::make_shared<io::FileCacheStatistics>();
831
4.06k
        auto file_reader_stats = std::make_shared<io::FileReaderStats>();
832
4.06k
        io_ctx->file_cache_stats = file_cache_statis.get();
833
4.06k
        io_ctx->file_reader_stats = file_reader_stats.get();
834
4.06k
        constexpr size_t fetch_schema_batch_size = 4064;
835
        // file_slots is no use, but the lifetime should be longer than reader
836
4.06k
        std::vector<SlotDescriptor*> file_slots;
837
4.06k
        switch (params.format_type) {
838
810
        case TFileFormatType::FORMAT_CSV_PLAIN:
839
810
        case TFileFormatType::FORMAT_CSV_GZ:
840
810
        case TFileFormatType::FORMAT_CSV_BZ2:
841
810
        case TFileFormatType::FORMAT_CSV_LZ4FRAME:
842
810
        case TFileFormatType::FORMAT_CSV_LZ4BLOCK:
843
810
        case TFileFormatType::FORMAT_CSV_SNAPPYBLOCK:
844
810
        case TFileFormatType::FORMAT_CSV_LZOP:
845
810
        case TFileFormatType::FORMAT_CSV_DEFLATE: {
846
810
            reader = CsvReader::create_unique(nullptr, profile.get(), nullptr, params, range,
847
810
                                              file_slots, fetch_schema_batch_size, io_ctx.get(),
848
810
                                              io_ctx);
849
810
            break;
850
810
        }
851
4
        case TFileFormatType::FORMAT_TEXT: {
852
4
            reader = TextReader::create_unique(nullptr, profile.get(), nullptr, params, range,
853
4
                                               file_slots, fetch_schema_batch_size, io_ctx.get());
854
4
            break;
855
810
        }
856
2.44k
        case TFileFormatType::FORMAT_PARQUET: {
857
2.44k
            reader = ParquetReader::create_unique(params, range, io_ctx, nullptr);
858
2.44k
            break;
859
810
        }
860
716
        case TFileFormatType::FORMAT_ORC: {
861
716
            reader = OrcReader::create_unique(params, range, fetch_schema_batch_size, "", io_ctx);
862
716
            break;
863
810
        }
864
2
        case TFileFormatType::FORMAT_NATIVE: {
865
2
            reader = NativeReader::create_unique(profile.get(), params, range, io_ctx.get(),
866
2
                                                 nullptr);
867
2
            break;
868
810
        }
869
71
        case TFileFormatType::FORMAT_JSON: {
870
71
            reader = NewJsonReader::create_unique(profile.get(), params, range, file_slots,
871
71
                                                  fetch_schema_batch_size, io_ctx.get(), io_ctx);
872
71
            break;
873
810
        }
874
0
#ifdef BUILD_RUST_READERS
875
16
        case TFileFormatType::FORMAT_LANCE: {
876
16
            reader = LanceRustReader::create_unique(params, range, io_ctx.get());
877
16
            break;
878
810
        }
879
0
#endif
880
0
        default:
881
0
            st = Status::InternalError("Not supported file format in fetch table schema: {}",
882
0
                                       params.format_type);
883
0
            st.to_protobuf(result->mutable_status());
884
0
            return;
885
4.06k
        }
886
4.06k
        if (!st.ok()) {
887
0
            LOG(WARNING) << "failed to create reader, errmsg=" << st;
888
0
            st.to_protobuf(result->mutable_status());
889
0
            return;
890
0
        }
891
4.06k
        st = reader->init_schema_reader();
892
4.06k
        if (!st.ok()) {
893
19
            LOG(WARNING) << "failed to init reader, errmsg=" << st;
894
19
            st.to_protobuf(result->mutable_status());
895
19
            return;
896
19
        }
897
4.04k
        std::vector<std::string> col_names;
898
4.04k
        std::vector<DataTypePtr> col_types;
899
4.04k
        st = reader->get_parsed_schema(&col_names, &col_types);
900
4.04k
        if (!st.ok()) {
901
10
            LOG(WARNING) << "fetch table schema failed, errmsg=" << st;
902
10
            st.to_protobuf(result->mutable_status());
903
10
            return;
904
10
        }
905
4.03k
        result->set_column_nums(col_names.size());
906
29.3k
        for (size_t idx = 0; idx < col_names.size(); ++idx) {
907
25.3k
            result->add_column_names(col_names[idx]);
908
25.3k
        }
909
29.3k
        for (size_t idx = 0; idx < col_types.size(); ++idx) {
910
25.3k
            PTypeDesc* type_desc = result->add_column_types();
911
25.3k
            col_types[idx]->to_protobuf(type_desc);
912
25.3k
        }
913
4.03k
        st.to_protobuf(result->mutable_status());
914
4.03k
    });
915
4.06k
    if (!ret) {
916
0
        offer_failed(result, done, _heavy_work_pool);
917
0
        return;
918
0
    }
919
4.06k
}
920
921
void PInternalService::fetch_arrow_flight_schema(google::protobuf::RpcController* controller,
922
                                                 const PFetchArrowFlightSchemaRequest* request,
923
                                                 PFetchArrowFlightSchemaResult* result,
924
104
                                                 google::protobuf::Closure* done) {
925
104
    bool ret = _arrow_flight_work_pool.try_offer([request, result, done]() {
926
104
        brpc::ClosureGuard closure_guard(done);
927
104
        std::shared_ptr<arrow::Schema> schema;
928
104
        std::shared_ptr<ArrowFlightResultBlockBuffer> buffer;
929
104
        auto st = ExecEnv::GetInstance()->result_mgr()->find_buffer(
930
104
                UniqueId(request->finst_id()).to_thrift(), buffer);
931
104
        if (!st.ok()) {
932
0
            LOG(WARNING) << "fetch arrow flight schema failed, errmsg=" << st;
933
0
            st.to_protobuf(result->mutable_status());
934
0
            return;
935
0
        }
936
104
        st = buffer->get_schema(&schema);
937
104
        if (!st.ok()) {
938
0
            LOG(WARNING) << "fetch arrow flight schema failed, errmsg=" << st;
939
0
            st.to_protobuf(result->mutable_status());
940
0
            return;
941
0
        }
942
943
104
        std::string schema_str;
944
104
        st = serialize_arrow_schema(&schema, &schema_str);
945
104
        if (st.ok()) {
946
104
            result->set_schema(std::move(schema_str));
947
104
            if (!config::public_host.empty()) {
948
0
                result->set_be_arrow_flight_ip(config::public_host);
949
0
            }
950
104
            if (config::arrow_flight_sql_proxy_port != -1) {
951
0
                result->set_be_arrow_flight_port(config::arrow_flight_sql_proxy_port);
952
0
            }
953
104
        }
954
104
        st.to_protobuf(result->mutable_status());
955
104
    });
956
104
    if (!ret) {
957
0
        offer_failed(result, done, _arrow_flight_work_pool);
958
0
        return;
959
0
    }
960
104
}
961
962
Status PInternalService::_tablet_fetch_data(const PTabletKeyLookupRequest* request,
963
218
                                            PTabletKeyLookupResponse* response) {
964
218
    PointQueryExecutor executor;
965
218
    RETURN_IF_ERROR(executor.init(request, response));
966
218
    if (response->has_need_resend_query_context() && response->need_resend_query_context()) {
967
2
        return Status::OK();
968
2
    }
969
216
    RETURN_IF_ERROR(executor.lookup_up());
970
213
    executor.print_profile();
971
213
    return Status::OK();
972
216
}
973
974
void PInternalService::tablet_fetch_data(google::protobuf::RpcController* controller,
975
                                         const PTabletKeyLookupRequest* request,
976
                                         PTabletKeyLookupResponse* response,
977
217
                                         google::protobuf::Closure* done) {
978
218
    bool ret = _light_work_pool.try_offer([this, controller, request, response, done]() {
979
218
        [[maybe_unused]] auto* cntl = static_cast<brpc::Controller*>(controller);
980
218
        brpc::ClosureGuard guard(done);
981
218
        Status st = _tablet_fetch_data(request, response);
982
218
        st.to_protobuf(response->mutable_status());
983
218
    });
984
217
    if (!ret) {
985
0
        offer_failed(response, done, _light_work_pool);
986
0
        return;
987
0
    }
988
217
}
989
990
void PInternalService::test_jdbc_connection(google::protobuf::RpcController* controller,
991
                                            const PJdbcTestConnectionRequest* request,
992
                                            PJdbcTestConnectionResult* result,
993
317
                                            google::protobuf::Closure* done) {
994
317
    if (!doris::config::enable_java_support) {
995
0
        doris::Status status = doris::Status::InternalError(
996
0
                "you can change be config enable_java_support to true and restart be.");
997
0
        status.to_protobuf(result->mutable_status());
998
0
        done->Run();
999
0
        return;
1000
0
    }
1001
317
    bool ret = _heavy_work_pool.try_offer([request, result, done]() {
1002
317
        VLOG_RPC << "test jdbc connection";
1003
317
        brpc::ClosureGuard closure_guard(done);
1004
317
        std::shared_ptr<MemTrackerLimiter> mem_tracker = MemTrackerLimiter::create_shared(
1005
317
                MemTrackerLimiter::Type::OTHER,
1006
317
                fmt::format("InternalService::test_jdbc_connection"));
1007
317
        SCOPED_ATTACH_TASK(mem_tracker);
1008
317
        TTableDescriptor table_desc;
1009
317
        Status st = Status::OK();
1010
317
        {
1011
317
            const uint8_t* buf = (const uint8_t*)request->jdbc_table().data();
1012
317
            uint32_t len = request->jdbc_table().size();
1013
317
            st = deserialize_thrift_msg(buf, &len, false, &table_desc);
1014
317
            if (!st.ok()) {
1015
0
                LOG(WARNING) << "test jdbc connection failed, errmsg=" << st;
1016
0
                st.to_protobuf(result->mutable_status());
1017
0
                return;
1018
0
            }
1019
317
        }
1020
317
        TJdbcTable jdbc_table = (table_desc.jdbcTable);
1021
1022
        // Resolve driver URL to absolute file:// path
1023
317
        std::string driver_url;
1024
317
        st = JdbcUtils::resolve_driver_url(jdbc_table.jdbc_driver_url, &driver_url);
1025
317
        if (!st.ok()) {
1026
0
            st.to_protobuf(result->mutable_status());
1027
0
            return;
1028
0
        }
1029
1030
        // Build params for JdbcConnectionTester
1031
317
        std::map<std::string, std::string> params;
1032
317
        params["jdbc_url"] = jdbc_table.jdbc_url;
1033
317
        params["jdbc_user"] = jdbc_table.jdbc_user;
1034
317
        params["jdbc_password"] = jdbc_table.jdbc_password;
1035
317
        params["jdbc_driver_class"] = jdbc_table.jdbc_driver_class;
1036
317
        params["jdbc_driver_url"] = driver_url;
1037
317
        params["query_sql"] = request->query_str();
1038
317
        params["catalog_id"] = std::to_string(jdbc_table.catalog_id);
1039
317
        params["connection_pool_min_size"] = std::to_string(jdbc_table.connection_pool_min_size);
1040
317
        params["connection_pool_max_size"] = std::to_string(jdbc_table.connection_pool_max_size);
1041
317
        params["connection_pool_max_wait_time"] =
1042
317
                std::to_string(jdbc_table.connection_pool_max_wait_time);
1043
317
        params["connection_pool_max_life_time"] =
1044
317
                std::to_string(jdbc_table.connection_pool_max_life_time);
1045
317
        params["connection_pool_keep_alive"] =
1046
317
                jdbc_table.connection_pool_keep_alive ? "true" : "false";
1047
317
        params["clean_datasource"] = "true";
1048
        // Map jdbc_table_type (TOdbcTableType enum value) to string name
1049
        // for JdbcTypeHandlerFactory to select the correct type handler.
1050
        // This ensures the right validation query is used (e.g. Oracle: "SELECT 1 FROM dual").
1051
317
        if (request->has_jdbc_table_type()) {
1052
317
            std::string type_name;
1053
317
            switch (request->jdbc_table_type()) {
1054
215
            case 0:
1055
215
                type_name = "MYSQL";
1056
215
                break;
1057
38
            case 1:
1058
38
                type_name = "ORACLE";
1059
38
                break;
1060
24
            case 2:
1061
24
                type_name = "POSTGRESQL";
1062
24
                break;
1063
16
            case 3:
1064
16
                type_name = "SQLSERVER";
1065
16
                break;
1066
16
            case 6:
1067
16
                type_name = "CLICKHOUSE";
1068
16
                break;
1069
0
            case 7:
1070
0
                type_name = "SAP_HANA";
1071
0
                break;
1072
0
            case 8:
1073
0
                type_name = "TRINO";
1074
0
                break;
1075
0
            case 9:
1076
0
                type_name = "PRESTO";
1077
0
                break;
1078
4
            case 10:
1079
4
                type_name = "OCEANBASE";
1080
4
                break;
1081
0
            case 11:
1082
0
                type_name = "OCEANBASE_ORACLE";
1083
0
                break;
1084
4
            case 13:
1085
4
                type_name = "DB2";
1086
4
                break;
1087
0
            case 14:
1088
0
                type_name = "GBASE";
1089
0
                break;
1090
0
            default:
1091
0
                break;
1092
317
            }
1093
317
            if (!type_name.empty()) {
1094
317
                params["table_type"] = type_name;
1095
317
            }
1096
317
        }
1097
        // required_fields and columns_types are required by JniReader
1098
317
        params["required_fields"] = "result";
1099
317
        params["columns_types"] = "int";
1100
1101
        // Use JniReader to create JdbcConnectionTester, which tests
1102
        // the connection in its open() method.
1103
317
        auto jni_reader =
1104
317
                std::make_unique<JniReader>("org/apache/doris/jdbc/JdbcConnectionTester", params);
1105
317
        st = jni_reader->open(nullptr, nullptr);
1106
317
        st.to_protobuf(result->mutable_status());
1107
1108
317
        Status close_st = jni_reader->close();
1109
317
        if (!close_st.ok()) {
1110
0
            LOG(WARNING) << "Failed to close JDBC connection tester: " << close_st.msg();
1111
0
        }
1112
317
    });
1113
1114
317
    if (!ret) {
1115
0
        offer_failed(result, done, _heavy_work_pool);
1116
0
        return;
1117
0
    }
1118
317
}
1119
1120
void PInternalServiceImpl::get_column_ids_by_tablet_ids(google::protobuf::RpcController* controller,
1121
                                                        const PFetchColIdsRequest* request,
1122
                                                        PFetchColIdsResponse* response,
1123
0
                                                        google::protobuf::Closure* done) {
1124
0
    bool ret = _light_work_pool.try_offer([this, controller, request, response, done]() {
1125
0
        _get_column_ids_by_tablet_ids(controller, request, response, done);
1126
0
    });
1127
0
    if (!ret) {
1128
0
        offer_failed(response, done, _light_work_pool);
1129
0
        return;
1130
0
    }
1131
0
}
1132
1133
void PInternalServiceImpl::_get_column_ids_by_tablet_ids(
1134
        google::protobuf::RpcController* controller, const PFetchColIdsRequest* request,
1135
0
        PFetchColIdsResponse* response, google::protobuf::Closure* done) {
1136
0
    brpc::ClosureGuard guard(done);
1137
0
    [[maybe_unused]] auto* cntl = static_cast<brpc::Controller*>(controller);
1138
0
    TabletManager* tablet_mgr = _engine.tablet_manager();
1139
0
    const auto& params = request->params();
1140
0
    for (const auto& param : params) {
1141
0
        int64_t index_id = param.indexid();
1142
0
        const auto& tablet_ids = param.tablet_ids();
1143
0
        std::set<std::set<int32_t>> filter_set;
1144
0
        std::map<int32_t, const TabletColumn*> id_to_column;
1145
0
        for (const int64_t tablet_id : tablet_ids) {
1146
0
            TabletSharedPtr tablet = tablet_mgr->get_tablet(tablet_id);
1147
0
            if (tablet == nullptr) {
1148
0
                std::stringstream ss;
1149
0
                ss << "cannot get tablet by id:" << tablet_id;
1150
0
                LOG(WARNING) << ss.str();
1151
0
                response->mutable_status()->set_status_code(TStatusCode::ILLEGAL_STATE);
1152
0
                response->mutable_status()->add_error_msgs(ss.str());
1153
0
                return;
1154
0
            }
1155
            // check schema consistency, column ids should be the same
1156
0
            const auto& columns = tablet->tablet_schema()->columns();
1157
1158
0
            std::set<int32_t> column_ids;
1159
0
            for (const auto& col : columns) {
1160
0
                column_ids.insert(col->unique_id());
1161
0
            }
1162
0
            filter_set.insert(std::move(column_ids));
1163
1164
0
            if (id_to_column.empty()) {
1165
0
                for (const auto& col : columns) {
1166
0
                    id_to_column.insert(std::pair {col->unique_id(), col.get()});
1167
0
                }
1168
0
            } else {
1169
0
                for (const auto& col : columns) {
1170
0
                    auto it = id_to_column.find(col->unique_id());
1171
0
                    if (it == id_to_column.end() || *(it->second) != *col) {
1172
0
                        ColumnPB prev_col_pb;
1173
0
                        ColumnPB curr_col_pb;
1174
0
                        if (it != id_to_column.end()) {
1175
0
                            it->second->to_schema_pb(&prev_col_pb);
1176
0
                        }
1177
0
                        col->to_schema_pb(&curr_col_pb);
1178
0
                        std::stringstream ss;
1179
0
                        ss << "consistency check failed: index{ " << index_id << " }"
1180
0
                           << " got inconsistent schema, prev column: " << prev_col_pb.DebugString()
1181
0
                           << " current column: " << curr_col_pb.DebugString();
1182
0
                        LOG(WARNING) << ss.str();
1183
0
                        response->mutable_status()->set_status_code(TStatusCode::ILLEGAL_STATE);
1184
0
                        response->mutable_status()->add_error_msgs(ss.str());
1185
0
                        return;
1186
0
                    }
1187
0
                }
1188
0
            }
1189
0
        }
1190
1191
0
        if (filter_set.size() > 1) {
1192
            // consistecy check failed
1193
0
            std::stringstream ss;
1194
0
            ss << "consistency check failed: index{" << index_id << "}"
1195
0
               << "got inconsistent schema";
1196
0
            LOG(WARNING) << ss.str();
1197
0
            response->mutable_status()->set_status_code(TStatusCode::ILLEGAL_STATE);
1198
0
            response->mutable_status()->add_error_msgs(ss.str());
1199
0
            return;
1200
0
        }
1201
        // consistency check passed, use the first tablet to be the representative
1202
0
        TabletSharedPtr tablet = tablet_mgr->get_tablet(tablet_ids[0]);
1203
0
        const auto& columns = tablet->tablet_schema()->columns();
1204
0
        auto entry = response->add_entries();
1205
0
        entry->set_index_id(index_id);
1206
0
        auto col_name_to_id = entry->mutable_col_name_to_id();
1207
0
        for (const auto& column : columns) {
1208
0
            (*col_name_to_id)[column->name()] = column->unique_id();
1209
0
        }
1210
0
    }
1211
0
    response->mutable_status()->set_status_code(TStatusCode::OK);
1212
0
}
1213
1214
template <class RPCResponse>
1215
struct AsyncRPCContext {
1216
    RPCResponse response;
1217
    brpc::Controller cntl;
1218
    brpc::CallId cid;
1219
};
1220
1221
void PInternalService::fetch_remote_tablet_schema(google::protobuf::RpcController* controller,
1222
                                                  const PFetchRemoteSchemaRequest* request,
1223
                                                  PFetchRemoteSchemaResponse* response,
1224
212
                                                  google::protobuf::Closure* done) {
1225
212
    bool ret = _heavy_work_pool.try_offer([request, response, done]() {
1226
212
        brpc::ClosureGuard closure_guard(done);
1227
212
        Status st = Status::OK();
1228
212
        std::shared_ptr<MemTrackerLimiter> mem_tracker = MemTrackerLimiter::create_shared(
1229
212
                MemTrackerLimiter::Type::OTHER,
1230
212
                fmt::format("InternalService::fetch_remote_tablet_schema"));
1231
212
        SCOPED_ATTACH_TASK(mem_tracker);
1232
212
        if (request->is_coordinator()) {
1233
            // Spawn rpc request to none coordinator nodes, and finally merge them all
1234
106
            PFetchRemoteSchemaRequest remote_request(*request);
1235
            // set it none coordinator to get merged schema
1236
106
            remote_request.set_is_coordinator(false);
1237
106
            using PFetchRemoteTabletSchemaRpcContext = AsyncRPCContext<PFetchRemoteSchemaResponse>;
1238
106
            std::vector<PFetchRemoteTabletSchemaRpcContext> rpc_contexts(
1239
106
                    request->tablet_location_size());
1240
212
            for (int i = 0; i < request->tablet_location_size(); ++i) {
1241
106
                std::string host = request->tablet_location(i).host();
1242
106
                int32_t brpc_port = request->tablet_location(i).brpc_port();
1243
106
                std::shared_ptr<PBackendService_Stub> stub(
1244
106
                        ExecEnv::GetInstance()->brpc_internal_client_cache()->get_client(
1245
106
                                host, brpc_port));
1246
106
                if (stub == nullptr) {
1247
0
                    LOG(WARNING) << "Failed to init rpc to " << host << ":" << brpc_port;
1248
0
                    st = Status::InternalError("Failed to init rpc to {}:{}", host, brpc_port);
1249
0
                    continue;
1250
0
                }
1251
106
                rpc_contexts[i].cid = rpc_contexts[i].cntl.call_id();
1252
106
                rpc_contexts[i].cntl.set_timeout_ms(config::fetch_remote_schema_rpc_timeout_ms);
1253
106
                stub->fetch_remote_tablet_schema(&rpc_contexts[i].cntl, &remote_request,
1254
106
                                                 &rpc_contexts[i].response, brpc::DoNothing());
1255
106
            }
1256
106
            std::vector<TabletSchemaSPtr> schemas;
1257
106
            for (auto& rpc_context : rpc_contexts) {
1258
106
                brpc::Join(rpc_context.cid);
1259
106
                if (!st.ok()) {
1260
                    // make sure all flying rpc request is joined
1261
0
                    continue;
1262
0
                }
1263
106
                if (rpc_context.cntl.Failed()) {
1264
0
                    LOG(WARNING) << "fetch_remote_tablet_schema rpc err:"
1265
0
                                 << rpc_context.cntl.ErrorText();
1266
0
                    ExecEnv::GetInstance()->brpc_internal_client_cache()->erase(
1267
0
                            rpc_context.cntl.remote_side());
1268
0
                    st = Status::InternalError("fetch_remote_tablet_schema rpc err: {}",
1269
0
                                               rpc_context.cntl.ErrorText());
1270
0
                }
1271
106
                if (rpc_context.response.status().status_code() != 0) {
1272
0
                    st = Status::create(rpc_context.response.status());
1273
0
                }
1274
106
                if (rpc_context.response.has_merged_schema()) {
1275
106
                    TabletSchemaSPtr schema = std::make_shared<TabletSchema>();
1276
106
                    schema->init_from_pb(rpc_context.response.merged_schema());
1277
106
                    schemas.push_back(schema);
1278
106
                }
1279
106
            }
1280
106
            if (!schemas.empty() && st.ok()) {
1281
                // merge all
1282
106
                TabletSchemaSPtr merged_schema;
1283
106
                st = variant_util::get_least_common_schema(schemas, nullptr, merged_schema);
1284
106
                if (!st.ok()) {
1285
0
                    LOG(WARNING) << "Failed to get least common schema: " << st.to_string();
1286
0
                    st = Status::InternalError("Failed to get least common schema: {}",
1287
0
                                               st.to_string());
1288
0
                }
1289
106
                VLOG_DEBUG << "dump schema:" << merged_schema->dump_structure();
1290
106
                merged_schema->reserve_extracted_columns();
1291
106
                merged_schema->to_schema_pb(response->mutable_merged_schema());
1292
106
            }
1293
106
            st.to_protobuf(response->mutable_status());
1294
106
            return;
1295
106
        } else {
1296
            // This is not a coordinator, get it's tablet and merge schema
1297
106
            std::vector<int64_t> target_tablets;
1298
106
            for (int i = 0; i < request->tablet_location_size(); ++i) {
1299
106
                const auto& location = request->tablet_location(i);
1300
106
                auto backend = BackendOptions::get_local_backend();
1301
                // If this is the target backend
1302
106
                if (backend.host == location.host() && config::brpc_port == location.brpc_port()) {
1303
106
                    target_tablets.assign(location.tablet_id().begin(), location.tablet_id().end());
1304
106
                    break;
1305
106
                }
1306
106
            }
1307
106
            if (!target_tablets.empty()) {
1308
106
                std::vector<TabletSchemaSPtr> tablet_schemas;
1309
1.41k
                for (int64_t tablet_id : target_tablets) {
1310
1.41k
                    auto res = ExecEnv::get_tablet(tablet_id);
1311
1.41k
                    if (!res.has_value()) {
1312
                        // just ignore
1313
0
                        LOG(WARNING) << "tablet does not exist, tablet id is " << tablet_id;
1314
0
                        continue;
1315
0
                    }
1316
1.41k
                    auto tablet = res.value();
1317
1.41k
                    auto rowsets = tablet->get_snapshot_rowset();
1318
1.41k
                    auto schema =
1319
1.41k
                            variant_util::VariantCompactionUtil::calculate_variant_extended_schema(
1320
1.41k
                                    rowsets, tablet->tablet_schema());
1321
1.41k
                    tablet_schemas.push_back(schema);
1322
1.41k
                }
1323
106
                if (!tablet_schemas.empty()) {
1324
                    // merge all
1325
106
                    TabletSchemaSPtr merged_schema;
1326
106
                    st = variant_util::get_least_common_schema(tablet_schemas, nullptr,
1327
106
                                                               merged_schema);
1328
106
                    if (!st.ok()) {
1329
0
                        LOG(WARNING) << "Failed to get least common schema: " << st.to_string();
1330
0
                        st = Status::InternalError("Failed to get least common schema: {}",
1331
0
                                                   st.to_string());
1332
0
                    }
1333
106
                    merged_schema->to_schema_pb(response->mutable_merged_schema());
1334
106
                    VLOG_DEBUG << "dump schema:" << merged_schema->dump_structure();
1335
106
                }
1336
106
            }
1337
106
            st.to_protobuf(response->mutable_status());
1338
106
        }
1339
212
    });
1340
212
    if (!ret) {
1341
0
        offer_failed(response, done, _heavy_work_pool);
1342
0
    }
1343
212
}
1344
1345
void PInternalService::report_stream_load_status(google::protobuf::RpcController* controller,
1346
                                                 const PReportStreamLoadStatusRequest* request,
1347
                                                 PReportStreamLoadStatusResponse* response,
1348
0
                                                 google::protobuf::Closure* done) {
1349
0
    TUniqueId load_id;
1350
0
    load_id.__set_hi(request->load_id().hi());
1351
0
    load_id.__set_lo(request->load_id().lo());
1352
0
    Status st = Status::OK();
1353
0
    auto stream_load_ctx = _exec_env->new_load_stream_mgr()->get(load_id);
1354
0
    if (!stream_load_ctx) {
1355
0
        st = Status::InternalError("unknown stream load id: {}", UniqueId(load_id).to_string());
1356
0
    }
1357
0
    stream_load_ctx->load_status_promise.set_value(st);
1358
0
    st.to_protobuf(response->mutable_status());
1359
0
}
1360
1361
void PInternalService::get_info(google::protobuf::RpcController* controller,
1362
                                const PProxyRequest* request, PProxyResult* response,
1363
413
                                google::protobuf::Closure* done) {
1364
413
    bool ret = _exec_env->routine_load_task_executor()->get_thread_pool().submit_func([this,
1365
413
                                                                                       request,
1366
413
                                                                                       response,
1367
413
                                                                                       done]() {
1368
413
        brpc::ClosureGuard closure_guard(done);
1369
        // PProxyRequest is defined in gensrc/proto/internal_service.proto
1370
        // Currently it supports 2 kinds of requests:
1371
        // 1. get all kafka partition ids for given topic
1372
        // 2. get all kafka partition offsets for given topic and timestamp.
1373
413
        int timeout_ms = request->has_timeout_secs() ? request->timeout_secs() * 1000 : 60 * 1000;
1374
413
        if (request->has_kafka_meta_request()) {
1375
413
            const PKafkaMetaProxyRequest& kafka_request = request->kafka_meta_request();
1376
413
            if (!kafka_request.offset_flags().empty()) {
1377
65
                std::vector<PIntegerPair> partition_offsets;
1378
65
                Status st = _exec_env->routine_load_task_executor()
1379
65
                                    ->get_kafka_real_offsets_for_partitions(
1380
65
                                            request->kafka_meta_request(), &partition_offsets,
1381
65
                                            timeout_ms);
1382
65
                if (st.ok()) {
1383
65
                    PKafkaPartitionOffsets* part_offsets = response->mutable_partition_offsets();
1384
65
                    for (const auto& entry : partition_offsets) {
1385
65
                        PIntegerPair* res = part_offsets->add_offset_times();
1386
65
                        res->set_key(entry.key());
1387
65
                        res->set_val(entry.val());
1388
65
                    }
1389
65
                }
1390
65
                st.to_protobuf(response->mutable_status());
1391
65
                return;
1392
348
            } else if (!kafka_request.partition_id_for_latest_offsets().empty()) {
1393
                // get latest offsets for specified partition ids
1394
271
                std::vector<PIntegerPair> partition_offsets;
1395
271
                Status st = _exec_env->routine_load_task_executor()
1396
271
                                    ->get_kafka_latest_offsets_for_partitions(
1397
271
                                            request->kafka_meta_request(), &partition_offsets,
1398
271
                                            timeout_ms);
1399
271
                if (st.ok()) {
1400
271
                    PKafkaPartitionOffsets* part_offsets = response->mutable_partition_offsets();
1401
271
                    for (const auto& entry : partition_offsets) {
1402
271
                        PIntegerPair* res = part_offsets->add_offset_times();
1403
271
                        res->set_key(entry.key());
1404
271
                        res->set_val(entry.val());
1405
271
                    }
1406
271
                }
1407
271
                st.to_protobuf(response->mutable_status());
1408
271
                return;
1409
271
            } else if (!kafka_request.offset_times().empty()) {
1410
                // if offset_times() has elements, which means this request is to get offset by timestamp.
1411
1
                std::vector<PIntegerPair> partition_offsets;
1412
1
                Status st = _exec_env->routine_load_task_executor()
1413
1
                                    ->get_kafka_partition_offsets_for_times(
1414
1
                                            request->kafka_meta_request(), &partition_offsets,
1415
1
                                            timeout_ms);
1416
1
                if (st.ok()) {
1417
1
                    PKafkaPartitionOffsets* part_offsets = response->mutable_partition_offsets();
1418
1
                    for (const auto& entry : partition_offsets) {
1419
1
                        PIntegerPair* res = part_offsets->add_offset_times();
1420
1
                        res->set_key(entry.key());
1421
1
                        res->set_val(entry.val());
1422
1
                    }
1423
1
                }
1424
1
                st.to_protobuf(response->mutable_status());
1425
1
                return;
1426
76
            } else {
1427
                // get partition ids of topic
1428
76
                std::vector<int32_t> partition_ids;
1429
76
                Status st = _exec_env->routine_load_task_executor()->get_kafka_partition_meta(
1430
76
                        request->kafka_meta_request(), &partition_ids);
1431
76
                if (st.ok()) {
1432
73
                    PKafkaMetaProxyResult* kafka_result = response->mutable_kafka_meta_result();
1433
73
                    for (int32_t id : partition_ids) {
1434
73
                        kafka_result->add_partition_ids(id);
1435
73
                    }
1436
73
                }
1437
76
                st.to_protobuf(response->mutable_status());
1438
76
                return;
1439
76
            }
1440
413
        }
1441
0
        if (request->has_kinesis_meta_request()) {
1442
0
            std::vector<std::string> shard_ids;
1443
0
            Status st = _exec_env->routine_load_task_executor()->get_kinesis_shard_meta(
1444
0
                    request->kinesis_meta_request(), &shard_ids);
1445
0
            if (st.ok()) {
1446
0
                PKinesisMetaProxyResult* kinesis_result = response->mutable_kinesis_meta_result();
1447
0
                for (const auto& shard_id : shard_ids) {
1448
0
                    kinesis_result->add_shard_ids(shard_id);
1449
0
                }
1450
0
            }
1451
0
            st.to_protobuf(response->mutable_status());
1452
0
            return;
1453
0
        }
1454
0
        Status::OK().to_protobuf(response->mutable_status());
1455
0
    });
1456
413
    if (!ret) {
1457
0
        offer_failed(response, done, _heavy_work_pool);
1458
0
        return;
1459
0
    }
1460
413
}
1461
1462
void PInternalService::update_cache(google::protobuf::RpcController* controller,
1463
                                    const PUpdateCacheRequest* request, PCacheResponse* response,
1464
65.1k
                                    google::protobuf::Closure* done) {
1465
65.1k
    bool ret = _light_work_pool.try_offer([this, request, response, done]() {
1466
65.1k
        brpc::ClosureGuard closure_guard(done);
1467
65.1k
        _exec_env->result_cache()->update(request, response);
1468
65.1k
    });
1469
65.1k
    if (!ret) {
1470
0
        offer_failed(response, done, _light_work_pool);
1471
0
        return;
1472
0
    }
1473
65.1k
}
1474
1475
void PInternalService::fetch_cache(google::protobuf::RpcController* controller,
1476
                                   const PFetchCacheRequest* request, PFetchCacheResult* result,
1477
3.73k
                                   google::protobuf::Closure* done) {
1478
3.73k
    bool ret = _light_work_pool.try_offer([this, request, result, done]() {
1479
3.73k
        brpc::ClosureGuard closure_guard(done);
1480
3.73k
        _exec_env->result_cache()->fetch(request, result);
1481
3.73k
    });
1482
3.73k
    if (!ret) {
1483
0
        offer_failed(result, done, _light_work_pool);
1484
0
        return;
1485
0
    }
1486
3.73k
}
1487
1488
void PInternalService::clear_cache(google::protobuf::RpcController* controller,
1489
                                   const PClearCacheRequest* request, PCacheResponse* response,
1490
0
                                   google::protobuf::Closure* done) {
1491
0
    bool ret = _light_work_pool.try_offer([this, request, response, done]() {
1492
0
        brpc::ClosureGuard closure_guard(done);
1493
0
        _exec_env->result_cache()->clear(request, response);
1494
0
    });
1495
0
    if (!ret) {
1496
0
        offer_failed(response, done, _light_work_pool);
1497
0
        return;
1498
0
    }
1499
0
}
1500
1501
void PInternalService::merge_filter(::google::protobuf::RpcController* controller,
1502
                                    const ::doris::PMergeFilterRequest* request,
1503
                                    ::doris::PMergeFilterResponse* response,
1504
2.92k
                                    ::google::protobuf::Closure* done) {
1505
2.92k
    bool ret = _light_work_pool.try_offer([this, controller, request, response, done]() {
1506
2.92k
        signal::SignalTaskIdKeeper keeper(request->query_id());
1507
2.92k
        brpc::ClosureGuard closure_guard(done);
1508
2.92k
        auto attachment = static_cast<brpc::Controller*>(controller)->request_attachment();
1509
2.92k
        butil::IOBufAsZeroCopyInputStream zero_copy_input_stream(attachment);
1510
2.92k
        Status st;
1511
2.92k
        try {
1512
2.92k
            st = _exec_env->fragment_mgr()->merge_filter(request, &zero_copy_input_stream);
1513
2.92k
        } catch (Exception& e) {
1514
0
            st = e.to_status();
1515
0
        }
1516
2.92k
        st.to_protobuf(response->mutable_status());
1517
2.92k
    });
1518
2.92k
    if (!ret) {
1519
0
        offer_failed(response, done, _light_work_pool);
1520
0
        return;
1521
0
    }
1522
2.92k
}
1523
1524
void PInternalService::send_filter_size(::google::protobuf::RpcController* controller,
1525
                                        const ::doris::PSendFilterSizeRequest* request,
1526
                                        ::doris::PSendFilterSizeResponse* response,
1527
290
                                        ::google::protobuf::Closure* done) {
1528
290
    bool ret = _light_work_pool.try_offer([this, request, response, done]() {
1529
290
        signal::SignalTaskIdKeeper keeper(request->query_id());
1530
290
        brpc::ClosureGuard closure_guard(done);
1531
290
        Status st;
1532
290
        try {
1533
290
            st = _exec_env->fragment_mgr()->send_filter_size(request);
1534
290
        } catch (Exception& e) {
1535
0
            st = e.to_status();
1536
0
        }
1537
290
        st.to_protobuf(response->mutable_status());
1538
290
    });
1539
290
    if (!ret) {
1540
0
        offer_failed(response, done, _light_work_pool);
1541
0
        return;
1542
0
    }
1543
290
}
1544
1545
void PInternalService::sync_filter_size(::google::protobuf::RpcController* controller,
1546
                                        const ::doris::PSyncFilterSizeRequest* request,
1547
                                        ::doris::PSyncFilterSizeResponse* response,
1548
290
                                        ::google::protobuf::Closure* done) {
1549
290
    bool ret = _light_work_pool.try_offer([this, request, response, done]() {
1550
290
        signal::SignalTaskIdKeeper keeper(request->query_id());
1551
290
        brpc::ClosureGuard closure_guard(done);
1552
290
        Status st;
1553
290
        try {
1554
290
            st = _exec_env->fragment_mgr()->sync_filter_size(request);
1555
290
        } catch (Exception& e) {
1556
0
            st = e.to_status();
1557
0
        }
1558
290
        st.to_protobuf(response->mutable_status());
1559
290
    });
1560
290
    if (!ret) {
1561
0
        offer_failed(response, done, _light_work_pool);
1562
0
        return;
1563
0
    }
1564
290
}
1565
1566
void PInternalService::apply_filterv2(::google::protobuf::RpcController* controller,
1567
                                      const ::doris::PPublishFilterRequestV2* request,
1568
                                      ::doris::PPublishFilterResponse* response,
1569
1.76k
                                      ::google::protobuf::Closure* done) {
1570
1.76k
    bool ret = _light_work_pool.try_offer([this, controller, request, response, done]() {
1571
1.76k
        signal::SignalTaskIdKeeper keeper(request->query_id());
1572
1.76k
        brpc::ClosureGuard closure_guard(done);
1573
1.76k
        auto attachment = static_cast<brpc::Controller*>(controller)->request_attachment();
1574
1.76k
        butil::IOBufAsZeroCopyInputStream zero_copy_input_stream(attachment);
1575
1.76k
        VLOG_NOTICE << "rpc apply_filterv2 recv";
1576
1.76k
        Status st;
1577
1.76k
        try {
1578
1.76k
            st = _exec_env->fragment_mgr()->apply_filterv2(request, &zero_copy_input_stream);
1579
1.76k
        } catch (Exception& e) {
1580
0
            st = e.to_status();
1581
0
        }
1582
1.76k
        if (!st.ok()) {
1583
0
            LOG(WARNING) << "apply filter meet error: " << st.to_string();
1584
0
        }
1585
1.75k
        st.to_protobuf(response->mutable_status());
1586
1.75k
    });
1587
1.76k
    if (!ret) {
1588
0
        offer_failed(response, done, _light_work_pool);
1589
0
        return;
1590
0
    }
1591
1.76k
}
1592
1593
void PInternalService::send_data(google::protobuf::RpcController* controller,
1594
                                 const PSendDataRequest* request, PSendDataResult* response,
1595
43
                                 google::protobuf::Closure* done) {
1596
43
    bool ret = _heavy_work_pool.try_offer([this, request, response, done]() {
1597
43
        brpc::ClosureGuard closure_guard(done);
1598
43
        TUniqueId load_id;
1599
43
        load_id.hi = request->load_id().hi();
1600
43
        load_id.lo = request->load_id().lo();
1601
        // On 1.2.3 we add load id to send data request and using load id to get pipe
1602
43
        auto stream_load_ctx = _exec_env->new_load_stream_mgr()->get(load_id);
1603
43
        if (stream_load_ctx == nullptr) {
1604
0
            response->mutable_status()->set_status_code(1);
1605
0
            response->mutable_status()->add_error_msgs("could not find stream load context");
1606
43
        } else {
1607
43
            auto pipe = stream_load_ctx->pipe;
1608
154
            for (int i = 0; i < request->data_size(); ++i) {
1609
111
                std::unique_ptr<PDataRow> row(new PDataRow());
1610
111
                row->CopyFrom(request->data(i));
1611
111
                Status s = pipe->append(std::move(row));
1612
111
                if (!s.ok()) {
1613
0
                    response->mutable_status()->set_status_code(1);
1614
0
                    response->mutable_status()->add_error_msgs(s.to_string());
1615
0
                    return;
1616
0
                }
1617
111
            }
1618
43
            response->mutable_status()->set_status_code(0);
1619
43
        }
1620
43
    });
1621
43
    if (!ret) {
1622
0
        offer_failed(response, done, _heavy_work_pool);
1623
0
        return;
1624
0
    }
1625
43
}
1626
1627
void PInternalService::commit(google::protobuf::RpcController* controller,
1628
                              const PCommitRequest* request, PCommitResult* response,
1629
43
                              google::protobuf::Closure* done) {
1630
43
    bool ret = _heavy_work_pool.try_offer([this, request, response, done]() {
1631
43
        brpc::ClosureGuard closure_guard(done);
1632
43
        TUniqueId load_id;
1633
43
        load_id.hi = request->load_id().hi();
1634
43
        load_id.lo = request->load_id().lo();
1635
1636
43
        auto stream_load_ctx = _exec_env->new_load_stream_mgr()->get(load_id);
1637
43
        if (stream_load_ctx == nullptr) {
1638
0
            response->mutable_status()->set_status_code(1);
1639
0
            response->mutable_status()->add_error_msgs("could not find stream load context");
1640
43
        } else {
1641
43
            static_cast<void>(stream_load_ctx->pipe->finish());
1642
43
            response->mutable_status()->set_status_code(0);
1643
43
        }
1644
43
    });
1645
43
    if (!ret) {
1646
0
        offer_failed(response, done, _heavy_work_pool);
1647
0
        return;
1648
0
    }
1649
43
}
1650
1651
void PInternalService::rollback(google::protobuf::RpcController* controller,
1652
                                const PRollbackRequest* request, PRollbackResult* response,
1653
5
                                google::protobuf::Closure* done) {
1654
5
    bool ret = _heavy_work_pool.try_offer([this, request, response, done]() {
1655
5
        brpc::ClosureGuard closure_guard(done);
1656
5
        TUniqueId load_id;
1657
5
        load_id.hi = request->load_id().hi();
1658
5
        load_id.lo = request->load_id().lo();
1659
5
        auto stream_load_ctx = _exec_env->new_load_stream_mgr()->get(load_id);
1660
5
        if (stream_load_ctx == nullptr) {
1661
0
            response->mutable_status()->set_status_code(1);
1662
0
            response->mutable_status()->add_error_msgs("could not find stream load context");
1663
5
        } else {
1664
5
            stream_load_ctx->pipe->cancel("rollback");
1665
5
            response->mutable_status()->set_status_code(0);
1666
5
        }
1667
5
    });
1668
5
    if (!ret) {
1669
0
        offer_failed(response, done, _heavy_work_pool);
1670
0
        return;
1671
0
    }
1672
5
}
1673
1674
void PInternalService::fold_constant_expr(google::protobuf::RpcController* controller,
1675
                                          const PConstantExprRequest* request,
1676
                                          PConstantExprResult* response,
1677
570
                                          google::protobuf::Closure* done) {
1678
570
    bool ret = _light_work_pool.try_offer([request, response, done]() {
1679
570
        brpc::ClosureGuard closure_guard(done);
1680
570
        TFoldConstantParams t_request;
1681
570
        Status st = Status::OK();
1682
570
        {
1683
570
            const uint8_t* buf = (const uint8_t*)request->request().data();
1684
570
            uint32_t len = request->request().size();
1685
570
            st = deserialize_thrift_msg(buf, &len, false, &t_request);
1686
570
        }
1687
570
        if (!st.ok()) {
1688
0
            LOG(WARNING) << "exec fold constant expr failed, errmsg=" << st
1689
0
                         << " .and query_id_is: " << t_request.query_id;
1690
0
            st.to_protobuf(response->mutable_status());
1691
0
            return;
1692
0
        }
1693
570
        auto fold_func = [&]() -> Status {
1694
570
            std::unique_ptr<FoldConstantExecutor> fold_executor =
1695
570
                    std::make_unique<FoldConstantExecutor>();
1696
570
            RETURN_IF_ERROR_OR_CATCH_EXCEPTION(
1697
570
                    fold_executor->fold_constant_vexpr(t_request, response));
1698
532
            return Status::OK();
1699
570
        };
1700
570
        st = fold_func();
1701
570
        if (!st.ok()) {
1702
38
            LOG(WARNING) << "exec fold constant expr failed, errmsg=" << st
1703
38
                         << " .and query_id_is: " << t_request.query_id;
1704
38
        }
1705
570
        st.to_protobuf(response->mutable_status());
1706
570
    });
1707
570
    if (!ret) {
1708
0
        offer_failed(response, done, _light_work_pool);
1709
0
        return;
1710
0
    }
1711
570
}
1712
1713
void PInternalService::transmit_rec_cte_block(google::protobuf::RpcController* controller,
1714
                                              const PTransmitRecCTEBlockParams* request,
1715
                                              PTransmitRecCTEBlockResult* response,
1716
3.76k
                                              google::protobuf::Closure* done) {
1717
3.76k
    bool ret = _light_work_pool.try_offer([this, request, response, done]() {
1718
3.76k
        brpc::ClosureGuard closure_guard(done);
1719
3.76k
        auto st = _exec_env->fragment_mgr()->transmit_rec_cte_block(
1720
3.76k
                UniqueId(request->query_id()).to_thrift(),
1721
3.76k
                UniqueId(request->fragment_instance_id()).to_thrift(), request->node_id(),
1722
3.76k
                request->blocks(), request->eos());
1723
3.76k
        st.to_protobuf(response->mutable_status());
1724
3.76k
    });
1725
3.76k
    if (!ret) {
1726
0
        offer_failed(response, done, _light_work_pool);
1727
0
        return;
1728
0
    }
1729
3.76k
}
1730
1731
void PInternalService::rerun_fragment(google::protobuf::RpcController* controller,
1732
                                      const PRerunFragmentParams* request,
1733
                                      PRerunFragmentResult* response,
1734
10.0k
                                      google::protobuf::Closure* done) {
1735
10.0k
    bool ret = _light_work_pool.try_offer([this, request, response, done]() {
1736
        // Use shared_ptr<ClosureGuard> so we can transfer ownership to the PFC.
1737
        // For wait_for_destroy/final_close, the guard is stored in the PFC and the RPC
1738
        // response is deferred until the PFC is fully destroyed. For rebuild/submit,
1739
        // the guard fires immediately when this lambda returns.
1740
10.0k
        std::shared_ptr<brpc::ClosureGuard> closure_guard =
1741
10.0k
                std::make_shared<brpc::ClosureGuard>(done);
1742
10.0k
        auto st = _exec_env->fragment_mgr()->rerun_fragment(
1743
10.0k
                closure_guard, UniqueId(request->query_id()).to_thrift(), request->fragment_id(),
1744
10.0k
                request->stage());
1745
10.0k
        st.to_protobuf(response->mutable_status());
1746
10.0k
    });
1747
10.0k
    if (!ret) {
1748
0
        offer_failed(response, done, _light_work_pool);
1749
0
        return;
1750
0
    }
1751
10.0k
}
1752
1753
void PInternalService::reset_global_rf(google::protobuf::RpcController* controller,
1754
                                       const PResetGlobalRfParams* request,
1755
                                       PResetGlobalRfResult* response,
1756
1.82k
                                       google::protobuf::Closure* done) {
1757
1.82k
    bool ret = _light_work_pool.try_offer([this, request, response, done]() {
1758
1.82k
        brpc::ClosureGuard closure_guard(done);
1759
1.82k
        auto st = _exec_env->fragment_mgr()->reset_global_rf(
1760
1.82k
                UniqueId(request->query_id()).to_thrift(), request->filter_ids());
1761
1.82k
        st.to_protobuf(response->mutable_status());
1762
1.82k
    });
1763
1.82k
    if (!ret) {
1764
0
        offer_failed(response, done, _light_work_pool);
1765
0
        return;
1766
0
    }
1767
1.82k
}
1768
1769
void PInternalService::transmit_block(google::protobuf::RpcController* controller,
1770
                                      const PTransmitDataParams* request,
1771
                                      PTransmitDataResult* response,
1772
1.45M
                                      google::protobuf::Closure* done) {
1773
1.45M
    int64_t receive_time = GetCurrentTimeNanos();
1774
1.46M
    if (config::enable_bthread_transmit_block) {
1775
1.46M
        response->set_receive_time(receive_time);
1776
        // under high concurrency, thread pool will have a lot of lock contention.
1777
        // May offer failed to the thread pool, so that we should avoid using thread
1778
        // pool here.
1779
1.46M
        _transmit_block(controller, request, response, done, Status::OK(), 0);
1780
18.4E
    } else {
1781
18.4E
        bool ret = _light_work_pool.try_offer([this, controller, request, response, done,
1782
18.4E
                                               receive_time]() {
1783
0
            response->set_receive_time(receive_time);
1784
            // Sometimes transmit block function is the last owner of PlanFragmentExecutor
1785
            // It will release the object. And the object maybe a JNIContext.
1786
            // JNIContext will hold some TLS object. It could not work correctly under bthread
1787
            // Context. So that put the logic into pthread.
1788
            // But this is rarely happens, so this config is disabled by default.
1789
0
            _transmit_block(controller, request, response, done, Status::OK(),
1790
0
                            GetCurrentTimeNanos() - receive_time);
1791
0
        });
1792
18.4E
        if (!ret) {
1793
0
            offer_failed(response, done, _light_work_pool);
1794
0
            return;
1795
0
        }
1796
18.4E
    }
1797
1.45M
}
1798
1799
void PInternalService::transmit_block_by_http(google::protobuf::RpcController* controller,
1800
                                              const PEmptyRequest* request,
1801
                                              PTransmitDataResult* response,
1802
0
                                              google::protobuf::Closure* done) {
1803
0
    int64_t receive_time = GetCurrentTimeNanos();
1804
0
    bool ret = _heavy_work_pool.try_offer([this, controller, response, done, receive_time]() {
1805
0
        PTransmitDataParams* new_request = new PTransmitDataParams();
1806
0
        google::protobuf::Closure* new_done =
1807
0
                new NewHttpClosure<PTransmitDataParams>(new_request, done);
1808
0
        brpc::Controller* cntl = static_cast<brpc::Controller*>(controller);
1809
0
        Status st =
1810
0
                attachment_extract_request_contain_block<PTransmitDataParams>(new_request, cntl);
1811
0
        _transmit_block(controller, new_request, response, new_done, st,
1812
0
                        GetCurrentTimeNanos() - receive_time);
1813
0
    });
1814
0
    if (!ret) {
1815
0
        offer_failed(response, done, _heavy_work_pool);
1816
0
        return;
1817
0
    }
1818
0
}
1819
1820
void PInternalService::_transmit_block(google::protobuf::RpcController* controller,
1821
                                       const PTransmitDataParams* request,
1822
                                       PTransmitDataResult* response,
1823
                                       google::protobuf::Closure* done, const Status& extract_st,
1824
1.45M
                                       const int64_t wait_for_worker) {
1825
1.45M
    if (request->has_query_id()) {
1826
18.4E
        VLOG_ROW << "transmit block: fragment_instance_id=" << print_id(request->finst_id())
1827
18.4E
                 << " query_id=" << print_id(request->query_id()) << " node=" << request->node_id();
1828
1.45M
    }
1829
1830
    // The response is accessed when done->Run is called in transmit_block(),
1831
    // give response a default value to avoid null pointers in high concurrency.
1832
1.45M
    Status st;
1833
1.45M
    if (extract_st.ok()) {
1834
1.45M
        st = _exec_env->vstream_mgr()->transmit_block(request, &done, wait_for_worker);
1835
1.45M
        if (!st.ok() && !st.is<END_OF_FILE>()) {
1836
0
            LOG(WARNING) << "transmit_block failed, message=" << st
1837
0
                         << ", fragment_instance_id=" << print_id(request->finst_id())
1838
0
                         << ", node=" << request->node_id()
1839
0
                         << ", from sender_id: " << request->sender_id()
1840
0
                         << ", be_number: " << request->be_number()
1841
0
                         << ", packet_seq: " << request->packet_seq();
1842
0
        }
1843
18.4E
    } else {
1844
18.4E
        st = extract_st;
1845
18.4E
    }
1846
1.46M
    if (done != nullptr) {
1847
1.46M
        st.to_protobuf(response->mutable_status());
1848
1.46M
        done->Run();
1849
1.46M
    }
1850
1.45M
}
1851
1852
void PInternalService::check_rpc_channel(google::protobuf::RpcController* controller,
1853
                                         const PCheckRPCChannelRequest* request,
1854
                                         PCheckRPCChannelResponse* response,
1855
0
                                         google::protobuf::Closure* done) {
1856
0
    bool ret = _light_work_pool.try_offer([request, response, done]() {
1857
0
        brpc::ClosureGuard closure_guard(done);
1858
0
        response->mutable_status()->set_status_code(0);
1859
0
        if (request->data().size() != request->size()) {
1860
0
            std::stringstream ss;
1861
0
            ss << "data size not same, expected: " << request->size()
1862
0
               << ", actual: " << request->data().size();
1863
0
            response->mutable_status()->add_error_msgs(ss.str());
1864
0
            response->mutable_status()->set_status_code(1);
1865
1866
0
        } else {
1867
0
            Md5Digest digest;
1868
0
            digest.update(static_cast<const void*>(request->data().c_str()),
1869
0
                          request->data().size());
1870
0
            digest.digest();
1871
0
            if (!iequal(digest.hex(), request->md5())) {
1872
0
                std::stringstream ss;
1873
0
                ss << "md5 not same, expected: " << request->md5() << ", actual: " << digest.hex();
1874
0
                response->mutable_status()->add_error_msgs(ss.str());
1875
0
                response->mutable_status()->set_status_code(1);
1876
0
            }
1877
0
        }
1878
0
    });
1879
0
    if (!ret) {
1880
0
        offer_failed(response, done, _light_work_pool);
1881
0
        return;
1882
0
    }
1883
0
}
1884
1885
void PInternalService::reset_rpc_channel(google::protobuf::RpcController* controller,
1886
                                         const PResetRPCChannelRequest* request,
1887
                                         PResetRPCChannelResponse* response,
1888
0
                                         google::protobuf::Closure* done) {
1889
0
    bool ret = _light_work_pool.try_offer([request, response, done]() {
1890
0
        brpc::ClosureGuard closure_guard(done);
1891
0
        response->mutable_status()->set_status_code(0);
1892
0
        if (request->all()) {
1893
0
            int size = ExecEnv::GetInstance()->brpc_internal_client_cache()->size();
1894
0
            if (size > 0) {
1895
0
                std::vector<std::string> endpoints;
1896
0
                ExecEnv::GetInstance()->brpc_internal_client_cache()->get_all(&endpoints);
1897
0
                ExecEnv::GetInstance()->brpc_internal_client_cache()->clear();
1898
0
                *response->mutable_channels() = {endpoints.begin(), endpoints.end()};
1899
0
            }
1900
0
        } else {
1901
0
            for (const std::string& endpoint : request->endpoints()) {
1902
0
                if (!ExecEnv::GetInstance()->brpc_internal_client_cache()->exist(endpoint)) {
1903
0
                    response->mutable_status()->add_error_msgs(endpoint + ": not found.");
1904
0
                    continue;
1905
0
                }
1906
1907
0
                if (ExecEnv::GetInstance()->brpc_internal_client_cache()->erase(endpoint)) {
1908
0
                    response->add_channels(endpoint);
1909
0
                } else {
1910
0
                    response->mutable_status()->add_error_msgs(endpoint + ": reset failed.");
1911
0
                }
1912
0
            }
1913
0
            if (request->endpoints_size() != response->channels_size()) {
1914
0
                response->mutable_status()->set_status_code(1);
1915
0
            }
1916
0
        }
1917
0
    });
1918
0
    if (!ret) {
1919
0
        offer_failed(response, done, _light_work_pool);
1920
0
        return;
1921
0
    }
1922
0
}
1923
1924
void PInternalService::hand_shake(google::protobuf::RpcController* controller,
1925
                                  const PHandShakeRequest* request, PHandShakeResponse* response,
1926
2.92k
                                  google::protobuf::Closure* done) {
1927
    // The light pool may be full. Handshake is used to check the connection state of brpc.
1928
    // Should not be interfered by the thread pool logic.
1929
2.92k
    brpc::ClosureGuard closure_guard(done);
1930
2.92k
    if (request->has_hello()) {
1931
2.92k
        response->set_hello(request->hello());
1932
2.92k
    }
1933
2.92k
    response->mutable_status()->set_status_code(0);
1934
2.92k
}
1935
1936
constexpr char HttpProtocol[] = "http://";
1937
constexpr char DownloadApiPath[] = "/api/_tablet/_download?token=";
1938
constexpr char FileParam[] = "&file=";
1939
1940
static std::string construct_url(const std::string& host_port, const std::string& token,
1941
0
                                 const std::string& path) {
1942
0
    return fmt::format("{}{}{}{}{}{}", HttpProtocol, host_port, DownloadApiPath, token, FileParam,
1943
0
                       path);
1944
0
}
1945
1946
static Status download_file_action(std::string& remote_file_url, std::string& local_file_path,
1947
0
                                   uint64_t estimate_timeout, uint64_t file_size) {
1948
0
    auto download_cb = [remote_file_url, estimate_timeout, local_file_path,
1949
0
                        file_size](HttpClient* client) {
1950
0
        RETURN_IF_ERROR(client->init(remote_file_url));
1951
0
        client->set_timeout_ms(estimate_timeout * 1000);
1952
0
        RETURN_IF_ERROR(client->download(local_file_path));
1953
1954
0
        if (file_size > 0) {
1955
            // Check file length
1956
0
            uint64_t local_file_size = std::filesystem::file_size(local_file_path);
1957
0
            if (local_file_size != file_size) {
1958
0
                LOG(WARNING) << "failed to pull rowset for slave replica. download file "
1959
0
                                "length error"
1960
0
                             << ", remote_path=" << remote_file_url << ", file_size=" << file_size
1961
0
                             << ", local_file_size=" << local_file_size;
1962
0
                return Status::InternalError("downloaded file size is not equal");
1963
0
            }
1964
0
        }
1965
1966
0
        return io::global_local_filesystem()->permission(local_file_path,
1967
0
                                                         io::LocalFileSystem::PERMS_OWNER_RW);
1968
0
    };
1969
0
    return HttpClient::execute_with_retry(DOWNLOAD_FILE_MAX_RETRY, 1, download_cb);
1970
0
}
1971
1972
void PInternalServiceImpl::request_slave_tablet_pull_rowset(
1973
        google::protobuf::RpcController* controller, const PTabletWriteSlaveRequest* request,
1974
0
        PTabletWriteSlaveResult* response, google::protobuf::Closure* done) {
1975
0
    brpc::ClosureGuard closure_guard(done);
1976
0
    const RowsetMetaPB& rowset_meta_pb = request->rowset_meta();
1977
0
    const std::string& rowset_path = request->rowset_path();
1978
0
    google::protobuf::Map<int64_t, int64_t> segments_size = request->segments_size();
1979
0
    google::protobuf::Map<int64_t, PTabletWriteSlaveRequest_IndexSizeMap> indices_size =
1980
0
            request->inverted_indices_size();
1981
0
    std::string host = request->host();
1982
0
    int64_t http_port = request->http_port();
1983
0
    int64_t brpc_port = request->brpc_port();
1984
0
    std::string token = request->token();
1985
0
    int64_t node_id = request->node_id();
1986
0
    bool ret = _heavy_work_pool.try_offer([rowset_meta_pb, host, brpc_port, node_id, segments_size,
1987
0
                                           indices_size, http_port, token, rowset_path, this]() {
1988
0
        TabletSharedPtr tablet = _engine.tablet_manager()->get_tablet(
1989
0
                rowset_meta_pb.tablet_id(), rowset_meta_pb.tablet_schema_hash());
1990
0
        if (tablet == nullptr) {
1991
0
            LOG(WARNING) << "failed to pull rowset for slave replica. tablet ["
1992
0
                         << rowset_meta_pb.tablet_id()
1993
0
                         << "] is not exist. txn_id=" << rowset_meta_pb.txn_id();
1994
0
            _response_pull_slave_rowset(host, brpc_port, rowset_meta_pb.txn_id(),
1995
0
                                        rowset_meta_pb.tablet_id(), node_id, false);
1996
0
            return;
1997
0
        }
1998
1999
0
        RowsetMetaSharedPtr rowset_meta(new RowsetMeta());
2000
0
        std::string rowset_meta_str;
2001
0
        bool ret = rowset_meta_pb.SerializeToString(&rowset_meta_str);
2002
0
        if (!ret) {
2003
0
            LOG(WARNING) << "failed to pull rowset for slave replica. serialize rowset meta "
2004
0
                            "failed. rowset_id="
2005
0
                         << rowset_meta_pb.rowset_id()
2006
0
                         << ", tablet_id=" << rowset_meta_pb.tablet_id()
2007
0
                         << ", txn_id=" << rowset_meta_pb.txn_id();
2008
0
            _response_pull_slave_rowset(host, brpc_port, rowset_meta_pb.txn_id(),
2009
0
                                        rowset_meta_pb.tablet_id(), node_id, false);
2010
0
            return;
2011
0
        }
2012
0
        bool parsed = rowset_meta->init(rowset_meta_str);
2013
0
        if (!parsed) {
2014
0
            LOG(WARNING) << "failed to pull rowset for slave replica. parse rowset meta string "
2015
0
                            "failed. rowset_id="
2016
0
                         << rowset_meta_pb.rowset_id()
2017
0
                         << ", tablet_id=" << rowset_meta_pb.tablet_id()
2018
0
                         << ", txn_id=" << rowset_meta_pb.txn_id();
2019
            // return false will break meta iterator, return true to skip this error
2020
0
            _response_pull_slave_rowset(host, brpc_port, rowset_meta->txn_id(),
2021
0
                                        rowset_meta->tablet_id(), node_id, false);
2022
0
            return;
2023
0
        }
2024
0
        RowsetId remote_rowset_id = rowset_meta->rowset_id();
2025
        // change rowset id because it maybe same as other local rowset
2026
0
        RowsetId new_rowset_id = _engine.next_rowset_id();
2027
0
        auto pending_rs_guard = _engine.pending_local_rowsets().add(new_rowset_id);
2028
0
        rowset_meta->set_rowset_id(new_rowset_id);
2029
0
        rowset_meta->set_tablet_uid(tablet->tablet_uid());
2030
0
        VLOG_CRITICAL << "succeed to init rowset meta for slave replica. rowset_id="
2031
0
                      << rowset_meta->rowset_id() << ", tablet_id=" << rowset_meta->tablet_id()
2032
0
                      << ", txn_id=" << rowset_meta->txn_id();
2033
2034
0
        auto tablet_scheme = rowset_meta->tablet_schema();
2035
0
        for (const auto& segment : segments_size) {
2036
0
            uint64_t file_size = segment.second;
2037
0
            uint64_t estimate_timeout = file_size / config::download_low_speed_limit_kbps / 1024;
2038
0
            if (estimate_timeout < config::download_low_speed_time) {
2039
0
                estimate_timeout = config::download_low_speed_time;
2040
0
            }
2041
2042
0
            std::string remote_file_path =
2043
0
                    local_segment_path(rowset_path, remote_rowset_id.to_string(), segment.first);
2044
0
            std::string remote_file_url =
2045
0
                    construct_url(get_host_port(host, http_port), token, remote_file_path);
2046
2047
0
            std::string local_file_path = local_segment_path(
2048
0
                    tablet->tablet_path(), rowset_meta->rowset_id().to_string(), segment.first);
2049
2050
0
            auto st = download_file_action(remote_file_url, local_file_path, estimate_timeout,
2051
0
                                           file_size);
2052
0
            if (!st.ok()) {
2053
0
                LOG(WARNING) << "failed to pull rowset for slave replica. failed to download "
2054
0
                                "file. url="
2055
0
                             << remote_file_url << ", local_path=" << local_file_path
2056
0
                             << ", txn_id=" << rowset_meta->txn_id();
2057
0
                _response_pull_slave_rowset(host, brpc_port, rowset_meta->txn_id(),
2058
0
                                            rowset_meta->tablet_id(), node_id, false);
2059
0
                return;
2060
0
            }
2061
0
            VLOG_CRITICAL << "succeed to download file for slave replica. url=" << remote_file_url
2062
0
                          << ", local_path=" << local_file_path
2063
0
                          << ", txn_id=" << rowset_meta->txn_id();
2064
0
            if (indices_size.find(segment.first) != indices_size.end()) {
2065
0
                PTabletWriteSlaveRequest_IndexSizeMap segment_indices_size =
2066
0
                        indices_size.at(segment.first);
2067
2068
0
                for (auto index_size : segment_indices_size.index_sizes()) {
2069
0
                    auto index_id = index_size.indexid();
2070
0
                    auto size = index_size.size();
2071
0
                    auto suffix_path = index_size.suffix_path();
2072
0
                    std::string remote_inverted_index_file;
2073
0
                    std::string local_inverted_index_file;
2074
0
                    std::string remote_inverted_index_file_url;
2075
0
                    if (tablet_scheme->get_inverted_index_storage_format() ==
2076
0
                        InvertedIndexStorageFormatPB::V1) {
2077
0
                        remote_inverted_index_file =
2078
0
                                InvertedIndexDescriptor::get_index_file_path_v1(
2079
0
                                        InvertedIndexDescriptor::get_index_file_path_prefix(
2080
0
                                                remote_file_path),
2081
0
                                        index_id, suffix_path);
2082
0
                        remote_inverted_index_file_url = construct_url(
2083
0
                                get_host_port(host, http_port), token, remote_inverted_index_file);
2084
2085
0
                        local_inverted_index_file = InvertedIndexDescriptor::get_index_file_path_v1(
2086
0
                                InvertedIndexDescriptor::get_index_file_path_prefix(
2087
0
                                        local_file_path),
2088
0
                                index_id, suffix_path);
2089
0
                    } else {
2090
0
                        remote_inverted_index_file =
2091
0
                                InvertedIndexDescriptor::get_index_file_path_v2(
2092
0
                                        InvertedIndexDescriptor::get_index_file_path_prefix(
2093
0
                                                remote_file_path));
2094
0
                        remote_inverted_index_file_url = construct_url(
2095
0
                                get_host_port(host, http_port), token, remote_inverted_index_file);
2096
2097
0
                        local_inverted_index_file = InvertedIndexDescriptor::get_index_file_path_v2(
2098
0
                                InvertedIndexDescriptor::get_index_file_path_prefix(
2099
0
                                        local_file_path));
2100
0
                    }
2101
0
                    st = download_file_action(remote_inverted_index_file_url,
2102
0
                                              local_inverted_index_file, estimate_timeout, size);
2103
0
                    if (!st.ok()) {
2104
0
                        LOG(WARNING) << "failed to pull rowset for slave replica. failed to "
2105
0
                                        "download "
2106
0
                                        "file. url="
2107
0
                                     << remote_inverted_index_file_url
2108
0
                                     << ", local_path=" << local_inverted_index_file
2109
0
                                     << ", txn_id=" << rowset_meta->txn_id();
2110
0
                        _response_pull_slave_rowset(host, brpc_port, rowset_meta->txn_id(),
2111
0
                                                    rowset_meta->tablet_id(), node_id, false);
2112
0
                        return;
2113
0
                    }
2114
2115
0
                    VLOG_CRITICAL
2116
0
                            << "succeed to download inverted index file for slave replica. url="
2117
0
                            << remote_inverted_index_file_url
2118
0
                            << ", local_path=" << local_inverted_index_file
2119
0
                            << ", txn_id=" << rowset_meta->txn_id();
2120
0
                }
2121
0
            }
2122
0
        }
2123
2124
0
        RowsetSharedPtr rowset;
2125
0
        Status create_status = RowsetFactory::create_rowset(
2126
0
                tablet->tablet_schema(), tablet->tablet_path(), rowset_meta, &rowset);
2127
0
        if (!create_status) {
2128
0
            LOG(WARNING) << "failed to create rowset from rowset meta for slave replica"
2129
0
                         << ". rowset_id: " << rowset_meta->rowset_id()
2130
0
                         << ", rowset_type: " << rowset_meta->rowset_type()
2131
0
                         << ", rowset_state: " << rowset_meta->rowset_state()
2132
0
                         << ", tablet_id=" << rowset_meta->tablet_id()
2133
0
                         << ", txn_id=" << rowset_meta->txn_id();
2134
0
            _response_pull_slave_rowset(host, brpc_port, rowset_meta->txn_id(),
2135
0
                                        rowset_meta->tablet_id(), node_id, false);
2136
0
            return;
2137
0
        }
2138
0
        if (rowset_meta->rowset_state() != RowsetStatePB::COMMITTED) {
2139
0
            LOG(WARNING) << "could not commit txn for slave replica because master rowset state is "
2140
0
                            "not committed, rowset_state="
2141
0
                         << rowset_meta->rowset_state()
2142
0
                         << ", tablet_id=" << rowset_meta->tablet_id()
2143
0
                         << ", txn_id=" << rowset_meta->txn_id();
2144
0
            _response_pull_slave_rowset(host, brpc_port, rowset_meta->txn_id(),
2145
0
                                        rowset_meta->tablet_id(), node_id, false);
2146
0
            return;
2147
0
        }
2148
0
        Status commit_txn_status = _engine.txn_manager()->commit_txn(
2149
0
                tablet->data_dir()->get_meta(), rowset_meta->partition_id(), rowset_meta->txn_id(),
2150
0
                rowset_meta->tablet_id(), tablet->tablet_uid(), rowset_meta->load_id(), rowset,
2151
0
                std::move(pending_rs_guard), false);
2152
0
        if (!commit_txn_status && !commit_txn_status.is<PUSH_TRANSACTION_ALREADY_EXIST>()) {
2153
0
            LOG(WARNING) << "failed to add committed rowset for slave replica. rowset_id="
2154
0
                         << rowset_meta->rowset_id() << ", tablet_id=" << rowset_meta->tablet_id()
2155
0
                         << ", txn_id=" << rowset_meta->txn_id();
2156
0
            _response_pull_slave_rowset(host, brpc_port, rowset_meta->txn_id(),
2157
0
                                        rowset_meta->tablet_id(), node_id, false);
2158
0
            return;
2159
0
        }
2160
0
        VLOG_CRITICAL << "succeed to pull rowset for slave replica. successfully to add committed "
2161
0
                         "rowset: "
2162
0
                      << rowset_meta->rowset_id()
2163
0
                      << " to tablet, tablet_id=" << rowset_meta->tablet_id()
2164
0
                      << ", schema_hash=" << rowset_meta->tablet_schema_hash()
2165
0
                      << ", txn_id=" << rowset_meta->txn_id();
2166
0
        _response_pull_slave_rowset(host, brpc_port, rowset_meta->txn_id(),
2167
0
                                    rowset_meta->tablet_id(), node_id, true);
2168
0
    });
2169
0
    if (!ret) {
2170
0
        offer_failed(response, closure_guard.release(), _heavy_work_pool);
2171
0
        return;
2172
0
    }
2173
0
    Status::OK().to_protobuf(response->mutable_status());
2174
0
}
2175
2176
void PInternalServiceImpl::_response_pull_slave_rowset(const std::string& remote_host,
2177
                                                       int64_t brpc_port, int64_t txn_id,
2178
                                                       int64_t tablet_id, int64_t node_id,
2179
0
                                                       bool is_succeed) {
2180
0
    std::shared_ptr<PBackendService_Stub> stub =
2181
0
            ExecEnv::GetInstance()->brpc_internal_client_cache()->get_client(remote_host,
2182
0
                                                                             brpc_port);
2183
0
    if (stub == nullptr) {
2184
0
        LOG(WARNING) << "failed to response result of slave replica to master replica. get rpc "
2185
0
                        "stub failed, master host="
2186
0
                     << remote_host << ", port=" << brpc_port << ", tablet_id=" << tablet_id
2187
0
                     << ", txn_id=" << txn_id;
2188
0
        return;
2189
0
    }
2190
2191
0
    auto request = std::make_shared<PTabletWriteSlaveDoneRequest>();
2192
0
    request->set_txn_id(txn_id);
2193
0
    request->set_tablet_id(tablet_id);
2194
0
    request->set_node_id(node_id);
2195
0
    request->set_is_succeed(is_succeed);
2196
0
    auto pull_rowset_callback = DummyBrpcCallback<PTabletWriteSlaveDoneResult>::create_shared();
2197
0
    auto closure = AutoReleaseClosure<
2198
0
            PTabletWriteSlaveDoneRequest,
2199
0
            DummyBrpcCallback<PTabletWriteSlaveDoneResult>>::create_unique(request,
2200
0
                                                                           pull_rowset_callback);
2201
0
    closure->cntl_->set_timeout_ms(config::slave_replica_writer_rpc_timeout_sec * 1000);
2202
0
    closure->cntl_->ignore_eovercrowded();
2203
0
    stub->response_slave_tablet_pull_rowset(closure->cntl_.get(), closure->request_.get(),
2204
0
                                            closure->response_.get(), closure.get());
2205
0
    closure.release();
2206
2207
0
    pull_rowset_callback->join();
2208
0
    if (pull_rowset_callback->cntl_->Failed()) {
2209
0
        LOG(WARNING) << "failed to response result of slave replica to master replica, error="
2210
0
                     << berror(pull_rowset_callback->cntl_->ErrorCode())
2211
0
                     << ", error_text=" << pull_rowset_callback->cntl_->ErrorText()
2212
0
                     << ", master host: " << remote_host << ", tablet_id=" << tablet_id
2213
0
                     << ", txn_id=" << txn_id;
2214
0
    }
2215
0
    VLOG_CRITICAL << "succeed to response the result of slave replica pull rowset to master "
2216
0
                     "replica. master host: "
2217
0
                  << remote_host << ". is_succeed=" << is_succeed << ", tablet_id=" << tablet_id
2218
0
                  << ", slave server=" << node_id << ", txn_id=" << txn_id;
2219
0
}
2220
2221
void PInternalServiceImpl::response_slave_tablet_pull_rowset(
2222
        google::protobuf::RpcController* controller, const PTabletWriteSlaveDoneRequest* request,
2223
0
        PTabletWriteSlaveDoneResult* response, google::protobuf::Closure* done) {
2224
0
    bool ret = _heavy_work_pool.try_offer([txn_mgr = _engine.txn_manager(), request, response,
2225
0
                                           done]() {
2226
0
        brpc::ClosureGuard closure_guard(done);
2227
0
        VLOG_CRITICAL << "receive the result of slave replica pull rowset from slave replica. "
2228
0
                         "slave server="
2229
0
                      << request->node_id() << ", is_succeed=" << request->is_succeed()
2230
0
                      << ", tablet_id=" << request->tablet_id() << ", txn_id=" << request->txn_id();
2231
0
        txn_mgr->finish_slave_tablet_pull_rowset(request->txn_id(), request->tablet_id(),
2232
0
                                                 request->node_id(), request->is_succeed());
2233
0
        Status::OK().to_protobuf(response->mutable_status());
2234
0
    });
2235
0
    if (!ret) {
2236
0
        offer_failed(response, done, _heavy_work_pool);
2237
0
        return;
2238
0
    }
2239
0
}
2240
2241
void PInternalService::multiget_data(google::protobuf::RpcController* controller,
2242
                                     const PMultiGetRequest* request, PMultiGetResponse* response,
2243
0
                                     google::protobuf::Closure* done) {
2244
0
    bool ret = _heavy_work_pool.try_offer([request, response, done]() {
2245
0
        signal::SignalTaskIdKeeper keeper(request->query_id());
2246
        // multi get data by rowid
2247
0
        MonotonicStopWatch watch;
2248
0
        watch.start();
2249
0
        brpc::ClosureGuard closure_guard(done);
2250
0
        response->mutable_status()->set_status_code(0);
2251
0
        SCOPED_ATTACH_TASK(ExecEnv::GetInstance()->rowid_storage_reader_tracker());
2252
0
        Status st = RowIdStorageReader::read_by_rowids(*request, response);
2253
0
        st.to_protobuf(response->mutable_status());
2254
0
        LOG(INFO) << "multiget_data finished, cost(us):" << watch.elapsed_time() / 1000;
2255
0
    });
2256
0
    if (!ret) {
2257
0
        offer_failed(response, done, _heavy_work_pool);
2258
0
        return;
2259
0
    }
2260
0
}
2261
2262
void PInternalService::multiget_data_v2(google::protobuf::RpcController* controller,
2263
                                        const PMultiGetRequestV2* request,
2264
                                        PMultiGetResponseV2* response,
2265
3.13k
                                        google::protobuf::Closure* done) {
2266
3.13k
    std::vector<uint64_t> id_set;
2267
3.13k
    id_set.push_back(request->wg_id());
2268
3.13k
    auto wg = ExecEnv::GetInstance()->workload_group_mgr()->get_group(id_set);
2269
3.13k
    Status st = Status::OK();
2270
2271
3.13k
    if (!wg) [[unlikely]] {
2272
0
        brpc::ClosureGuard closure_guard(done);
2273
0
        st = Status::Error<TStatusCode::CANCELLED>("fail to find wg: wg id:" +
2274
0
                                                   std::to_string(request->wg_id()));
2275
0
        st.to_protobuf(response->mutable_status());
2276
0
        return;
2277
0
    }
2278
2279
3.13k
    doris::TaskScheduler* exec_sched = nullptr;
2280
3.13k
    ScannerScheduler* scan_sched = nullptr;
2281
3.13k
    ScannerScheduler* remote_scan_sched = nullptr;
2282
3.13k
    wg->get_query_scheduler(&exec_sched, &scan_sched, &remote_scan_sched);
2283
3.13k
    DCHECK(remote_scan_sched);
2284
2285
3.13k
    st = remote_scan_sched->submit_scan_task(
2286
3.13k
            SimplifiedScanTask(
2287
3.13k
                    [request, response, done]() {
2288
3.13k
                        SCOPED_ATTACH_TASK(ExecEnv::GetInstance()->rowid_storage_reader_tracker());
2289
3.13k
                        signal::set_signal_task_id(request->query_id());
2290
                        // multi get data by rowid
2291
3.13k
                        MonotonicStopWatch watch;
2292
3.13k
                        watch.start();
2293
3.13k
                        brpc::ClosureGuard closure_guard(done);
2294
3.13k
                        response->mutable_status()->set_status_code(0);
2295
3.13k
                        Status st = RowIdStorageReader::read_by_rowids(*request, response);
2296
3.13k
                        st.to_protobuf(response->mutable_status());
2297
3.13k
                        LOG(INFO) << "multiget_data finished, cost(us):"
2298
3.13k
                                  << watch.elapsed_time() / 1000;
2299
3.13k
                        return true;
2300
3.13k
                    },
2301
3.13k
                    nullptr, nullptr),
2302
3.13k
            fmt::format("{}-multiget_data_v2", print_id(request->query_id())));
2303
2304
3.13k
    if (!st.ok()) {
2305
0
        brpc::ClosureGuard closure_guard(done);
2306
0
        st.to_protobuf(response->mutable_status());
2307
0
    }
2308
3.13k
}
2309
2310
void PInternalServiceImpl::get_tablet_rowset_versions(google::protobuf::RpcController* cntl_base,
2311
                                                      const PGetTabletVersionsRequest* request,
2312
                                                      PGetTabletVersionsResponse* response,
2313
0
                                                      google::protobuf::Closure* done) {
2314
0
    brpc::ClosureGuard closure_guard(done);
2315
0
    VLOG_DEBUG << "receive get tablet versions request: " << request->DebugString();
2316
0
    _engine.get_tablet_rowset_versions(request, response);
2317
0
}
2318
2319
void PInternalService::glob(google::protobuf::RpcController* controller,
2320
                            const PGlobRequest* request, PGlobResponse* response,
2321
447
                            google::protobuf::Closure* done) {
2322
447
    bool ret = _heavy_work_pool.try_offer([request, response, done]() {
2323
447
        brpc::ClosureGuard closure_guard(done);
2324
447
        std::vector<io::FileInfo> files;
2325
447
        Status st = io::global_local_filesystem()->safe_glob(request->pattern(), &files);
2326
447
        if (st.ok()) {
2327
452
            for (auto& file : files) {
2328
452
                PGlobResponse_PFileInfo* pfile = response->add_files();
2329
452
                pfile->set_file(file.file_name);
2330
452
                pfile->set_size(file.file_size);
2331
452
            }
2332
430
        }
2333
447
        st.to_protobuf(response->mutable_status());
2334
447
    });
2335
447
    if (!ret) {
2336
0
        offer_failed(response, done, _heavy_work_pool);
2337
0
        return;
2338
0
    }
2339
447
}
2340
2341
void PInternalService::group_commit_insert(google::protobuf::RpcController* controller,
2342
                                           const PGroupCommitInsertRequest* request,
2343
                                           PGroupCommitInsertResponse* response,
2344
29
                                           google::protobuf::Closure* done) {
2345
29
    TUniqueId load_id;
2346
29
    load_id.__set_hi(request->load_id().hi());
2347
29
    load_id.__set_lo(request->load_id().lo());
2348
29
    std::shared_ptr<std::mutex> lock = std::make_shared<std::mutex>();
2349
29
    std::shared_ptr<bool> is_done = std::make_shared<bool>(false);
2350
29
    bool ret = _heavy_work_pool.try_offer([this, request, response, done, load_id, lock,
2351
29
                                           is_done]() {
2352
29
        brpc::ClosureGuard closure_guard(done);
2353
29
        std::shared_ptr<StreamLoadContext> ctx = std::make_shared<StreamLoadContext>(_exec_env);
2354
29
        auto pipe = std::make_shared<io::StreamLoadPipe>(
2355
29
                io::kMaxPipeBufferedBytes /* max_buffered_bytes */, 64 * 1024 /* min_chunk_size */,
2356
29
                -1 /* total_length */, true /* use_proto */);
2357
29
        ctx->pipe = pipe;
2358
29
        Status st = _exec_env->new_load_stream_mgr()->put(load_id, ctx);
2359
29
        if (st.ok()) {
2360
29
            try {
2361
29
                st = _exec_plan_fragment_impl(
2362
29
                        request->exec_plan_fragment_request().request(),
2363
29
                        request->exec_plan_fragment_request().version(),
2364
29
                        request->exec_plan_fragment_request().compact(),
2365
29
                        [&, response, done, load_id, lock, is_done](RuntimeState* state,
2366
29
                                                                    Status* status) {
2367
29
                            std::lock_guard<std::mutex> lock1(*lock);
2368
29
                            if (*is_done) {
2369
0
                                return;
2370
0
                            }
2371
29
                            *is_done = true;
2372
29
                            brpc::ClosureGuard cb_closure_guard(done);
2373
29
                            response->set_label(state->import_label());
2374
29
                            response->set_txn_id(state->wal_id());
2375
29
                            response->set_loaded_rows(state->num_rows_load_success());
2376
29
                            response->set_filtered_rows(state->num_rows_load_filtered());
2377
29
                            status->to_protobuf(response->mutable_status());
2378
29
                            if (!state->get_error_log_file_path().empty()) {
2379
0
                                response->set_error_url(
2380
0
                                        to_load_error_http_path(state->get_error_log_file_path()));
2381
0
                            }
2382
29
                            if (!state->get_first_error_msg().empty()) {
2383
0
                                response->set_first_error_msg(state->get_first_error_msg());
2384
0
                            }
2385
29
                            _exec_env->new_load_stream_mgr()->remove(load_id);
2386
29
                        });
2387
29
            } catch (const Exception& e) {
2388
0
                st = e.to_status();
2389
0
            } catch (const std::exception& e) {
2390
0
                st = Status::Error(ErrorCode::INTERNAL_ERROR, e.what());
2391
0
            } catch (...) {
2392
0
                st = Status::Error(ErrorCode::INTERNAL_ERROR,
2393
0
                                   "_exec_plan_fragment_impl meet unknown error");
2394
0
            }
2395
29
            if (!st.ok()) {
2396
0
                LOG(WARNING) << "exec plan fragment failed, load_id=" << print_id(load_id)
2397
0
                             << ", errmsg=" << st;
2398
0
                std::lock_guard<std::mutex> lock1(*lock);
2399
0
                if (*is_done) {
2400
0
                    closure_guard.release();
2401
0
                } else {
2402
0
                    *is_done = true;
2403
0
                    st.to_protobuf(response->mutable_status());
2404
0
                    _exec_env->new_load_stream_mgr()->remove(load_id);
2405
0
                }
2406
29
            } else {
2407
29
                closure_guard.release();
2408
66
                for (int i = 0; i < request->data().size(); ++i) {
2409
37
                    std::unique_ptr<PDataRow> row(new PDataRow());
2410
37
                    row->CopyFrom(request->data(i));
2411
37
                    st = pipe->append(std::move(row));
2412
37
                    if (!st.ok()) {
2413
0
                        break;
2414
0
                    }
2415
37
                }
2416
29
                if (st.ok()) {
2417
29
                    static_cast<void>(pipe->finish());
2418
29
                }
2419
29
            }
2420
29
        }
2421
29
    });
2422
29
    if (!ret) {
2423
0
        _exec_env->new_load_stream_mgr()->remove(load_id);
2424
0
        offer_failed(response, done, _heavy_work_pool);
2425
0
        return;
2426
0
    }
2427
29
};
2428
2429
void PInternalService::get_wal_queue_size(google::protobuf::RpcController* controller,
2430
                                          const PGetWalQueueSizeRequest* request,
2431
                                          PGetWalQueueSizeResponse* response,
2432
1.15k
                                          google::protobuf::Closure* done) {
2433
1.15k
    bool ret = _heavy_work_pool.try_offer([this, request, response, done]() {
2434
1.15k
        brpc::ClosureGuard closure_guard(done);
2435
1.15k
        Status st = Status::OK();
2436
1.15k
        auto table_id = request->table_id();
2437
1.15k
        auto count = _exec_env->wal_mgr()->get_wal_queue_size(table_id);
2438
1.15k
        response->set_size(count);
2439
1.15k
        response->mutable_status()->set_status_code(st.code());
2440
1.15k
    });
2441
1.15k
    if (!ret) {
2442
0
        offer_failed(response, done, _heavy_work_pool);
2443
0
    }
2444
1.15k
}
2445
2446
void PInternalService::get_be_resource(google::protobuf::RpcController* controller,
2447
                                       const PGetBeResourceRequest* request,
2448
                                       PGetBeResourceResponse* response,
2449
0
                                       google::protobuf::Closure* done) {
2450
0
    bool ret = _heavy_work_pool.try_offer([response, done]() {
2451
0
        brpc::ClosureGuard closure_guard(done);
2452
0
        int64_t mem_limit = MemInfo::mem_limit();
2453
0
        int64_t mem_usage = PerfCounters::get_vm_rss();
2454
2455
0
        PGlobalResourceUsage* global_resource_usage = response->mutable_global_be_resource_usage();
2456
0
        global_resource_usage->set_mem_limit(mem_limit);
2457
0
        global_resource_usage->set_mem_usage(mem_usage);
2458
2459
0
        Status st = Status::OK();
2460
0
        response->mutable_status()->set_status_code(st.code());
2461
0
    });
2462
0
    if (!ret) {
2463
0
        offer_failed(response, done, _heavy_work_pool);
2464
0
    }
2465
0
}
2466
2467
void PInternalService::delete_dictionary(google::protobuf::RpcController* controller,
2468
                                         const PDeleteDictionaryRequest* request,
2469
                                         PDeleteDictionaryResponse* response,
2470
3
                                         google::protobuf::Closure* done) {
2471
3
    brpc::ClosureGuard closure_guard(done);
2472
3
    Status st = ExecEnv::GetInstance()->dict_factory()->delete_dict(request->dictionary_id());
2473
3
    st.to_protobuf(response->mutable_status());
2474
3
}
2475
2476
void PInternalService::commit_refresh_dictionary(google::protobuf::RpcController* controller,
2477
                                                 const PCommitRefreshDictionaryRequest* request,
2478
                                                 PCommitRefreshDictionaryResponse* response,
2479
86
                                                 google::protobuf::Closure* done) {
2480
86
    brpc::ClosureGuard closure_guard(done);
2481
86
    Status st = ExecEnv::GetInstance()->dict_factory()->commit_refresh_dict(
2482
86
            request->dictionary_id(), request->version_id());
2483
86
    st.to_protobuf(response->mutable_status());
2484
86
}
2485
2486
void PInternalService::abort_refresh_dictionary(google::protobuf::RpcController* controller,
2487
                                                const PAbortRefreshDictionaryRequest* request,
2488
                                                PAbortRefreshDictionaryResponse* response,
2489
4
                                                google::protobuf::Closure* done) {
2490
4
    brpc::ClosureGuard closure_guard(done);
2491
4
    Status st = ExecEnv::GetInstance()->dict_factory()->abort_refresh_dict(request->dictionary_id(),
2492
4
                                                                           request->version_id());
2493
4
    st.to_protobuf(response->mutable_status());
2494
4
}
2495
2496
void PInternalService::get_tablet_rowsets(google::protobuf::RpcController* controller,
2497
                                          const PGetTabletRowsetsRequest* request,
2498
                                          PGetTabletRowsetsResponse* response,
2499
0
                                          google::protobuf::Closure* done) {
2500
0
    DCHECK(config::is_cloud_mode());
2501
0
    auto start_time = GetMonoTimeMicros();
2502
0
    Defer defer {
2503
0
            [&]() { g_process_remote_fetch_rowsets_latency << GetMonoTimeMicros() - start_time; }};
2504
0
    brpc::ClosureGuard closure_guard(done);
2505
0
    LOG(INFO) << "process get tablet rowsets, request=" << request->ShortDebugString();
2506
0
    if (!request->has_tablet_id() || !request->has_version_start() || !request->has_version_end()) {
2507
0
        Status::InvalidArgument("missing params tablet/version_start/version_end")
2508
0
                .to_protobuf(response->mutable_status());
2509
0
        return;
2510
0
    }
2511
0
    CloudStorageEngine& storage = ExecEnv::GetInstance()->storage_engine().to_cloud();
2512
2513
0
    auto maybe_tablet =
2514
0
            storage.tablet_mgr().get_tablet(request->tablet_id(), /*warmup data*/ false,
2515
0
                                            /*syn_delete_bitmap*/ false, /*delete_bitmap*/ nullptr,
2516
0
                                            /*local_only*/ true);
2517
0
    if (!maybe_tablet) {
2518
0
        maybe_tablet.error().to_protobuf(response->mutable_status());
2519
0
        return;
2520
0
    }
2521
0
    auto tablet = maybe_tablet.value();
2522
0
    Result<CaptureRowsetResult> ret;
2523
0
    {
2524
0
        std::shared_lock l(tablet->get_header_lock());
2525
0
        ret = tablet->capture_consistent_rowsets_unlocked(
2526
0
                {request->version_start(), request->version_end()},
2527
0
                CaptureRowsetOps {.enable_fetch_rowsets_from_peers = false});
2528
0
    }
2529
0
    if (!ret) {
2530
0
        ret.error().to_protobuf(response->mutable_status());
2531
0
        return;
2532
0
    }
2533
0
    auto rowsets = std::move(ret.value().rowsets);
2534
0
    for (const auto& rs : rowsets) {
2535
0
        RowsetMetaPB meta;
2536
0
        rs->rowset_meta()->to_rowset_pb(&meta);
2537
0
        response->mutable_rowsets()->Add(std::move(meta));
2538
0
    }
2539
0
    if (request->has_delete_bitmap_keys()) {
2540
0
        DCHECK(tablet->enable_unique_key_merge_on_write());
2541
0
        auto delete_bitmap = std::move(ret.value().delete_bitmap);
2542
0
        auto keys_pb = request->delete_bitmap_keys();
2543
0
        size_t len = keys_pb.rowset_ids().size();
2544
0
        DCHECK_EQ(len, keys_pb.segment_ids().size());
2545
0
        DCHECK_EQ(len, keys_pb.versions().size());
2546
0
        std::set<DeleteBitmap::BitmapKey> keys;
2547
0
        for (size_t i = 0; i < len; ++i) {
2548
0
            RowsetId rs_id;
2549
0
            rs_id.init(keys_pb.rowset_ids(i));
2550
0
            keys.emplace(rs_id, keys_pb.segment_ids(i), keys_pb.versions(i));
2551
0
        }
2552
0
        auto diffset = delete_bitmap->diffset(keys).to_pb();
2553
0
        *response->mutable_delete_bitmap() = std::move(diffset);
2554
0
    }
2555
0
    Status::OK().to_protobuf(response->mutable_status());
2556
0
}
2557
2558
void PInternalService::request_cdc_client(google::protobuf::RpcController* controller,
2559
                                          const PRequestCdcClientRequest* request,
2560
                                          PRequestCdcClientResult* result,
2561
3.19k
                                          google::protobuf::Closure* done) {
2562
3.19k
    bool ret = _heavy_work_pool.try_offer([this, request, result, done]() {
2563
3.19k
        _exec_env->cdc_client_mgr()->request_cdc_client_impl(request, result, done);
2564
3.19k
    });
2565
2566
3.19k
    if (!ret) {
2567
0
        offer_failed(result, done, _heavy_work_pool);
2568
0
        return;
2569
0
    }
2570
3.19k
}
2571
2572
#include "common/compile_check_avoid_end.h"
2573
} // namespace doris