Coverage Report

Created: 2025-04-28 00:11

/root/doris/be/src/exec/schema_scanner.cpp
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "exec/schema_scanner.h"
19
20
#include <gen_cpp/Descriptors_types.h>
21
#include <gen_cpp/Types_types.h>
22
#include <glog/logging.h>
23
#include <string.h>
24
25
#include <new>
26
#include <ostream>
27
#include <utility>
28
29
#include "exec/schema_scanner/schema_active_queries_scanner.h"
30
#include "exec/schema_scanner/schema_backend_active_tasks.h"
31
#include "exec/schema_scanner/schema_catalog_meta_cache_stats_scanner.h"
32
#include "exec/schema_scanner/schema_charsets_scanner.h"
33
#include "exec/schema_scanner/schema_collations_scanner.h"
34
#include "exec/schema_scanner/schema_columns_scanner.h"
35
#include "exec/schema_scanner/schema_dummy_scanner.h"
36
#include "exec/schema_scanner/schema_file_cache_statistics.h"
37
#include "exec/schema_scanner/schema_files_scanner.h"
38
#include "exec/schema_scanner/schema_metadata_name_ids_scanner.h"
39
#include "exec/schema_scanner/schema_partitions_scanner.h"
40
#include "exec/schema_scanner/schema_processlist_scanner.h"
41
#include "exec/schema_scanner/schema_profiling_scanner.h"
42
#include "exec/schema_scanner/schema_routine_load_job_scanner.h"
43
#include "exec/schema_scanner/schema_routine_scanner.h"
44
#include "exec/schema_scanner/schema_rowsets_scanner.h"
45
#include "exec/schema_scanner/schema_schema_privileges_scanner.h"
46
#include "exec/schema_scanner/schema_schemata_scanner.h"
47
#include "exec/schema_scanner/schema_table_options_scanner.h"
48
#include "exec/schema_scanner/schema_table_privileges_scanner.h"
49
#include "exec/schema_scanner/schema_table_properties_scanner.h"
50
#include "exec/schema_scanner/schema_tables_scanner.h"
51
#include "exec/schema_scanner/schema_user_privileges_scanner.h"
52
#include "exec/schema_scanner/schema_user_scanner.h"
53
#include "exec/schema_scanner/schema_variables_scanner.h"
54
#include "exec/schema_scanner/schema_views_scanner.h"
55
#include "exec/schema_scanner/schema_workload_group_privileges.h"
56
#include "exec/schema_scanner/schema_workload_group_resource_usage_scanner.h"
57
#include "exec/schema_scanner/schema_workload_groups_scanner.h"
58
#include "exec/schema_scanner/schema_workload_sched_policy_scanner.h"
59
#include "olap/hll.h"
60
#include "pipeline/dependency.h"
61
#include "runtime/define_primitive_type.h"
62
#include "runtime/fragment_mgr.h"
63
#include "runtime/types.h"
64
#include "util/string_util.h"
65
#include "util/types.h"
66
#include "vec/columns/column.h"
67
#include "vec/columns/column_complex.h"
68
#include "vec/columns/column_nullable.h"
69
#include "vec/columns/column_string.h"
70
#include "vec/columns/column_vector.h"
71
#include "vec/columns/columns_number.h"
72
#include "vec/common/string_ref.h"
73
#include "vec/core/block.h"
74
#include "vec/core/column_with_type_and_name.h"
75
#include "vec/core/types.h"
76
#include "vec/data_types/data_type.h"
77
#include "vec/data_types/data_type_factory.hpp"
78
79
namespace doris {
80
class ObjectPool;
81
82
SchemaScanner::SchemaScanner(const std::vector<ColumnDesc>& columns)
83
2
        : _is_init(false), _columns(columns), _schema_table_type(TSchemaTableType::SCH_INVALID) {}
84
85
SchemaScanner::SchemaScanner(const std::vector<ColumnDesc>& columns, TSchemaTableType::type type)
86
0
        : _is_init(false), _columns(columns), _schema_table_type(type) {}
87
88
2
SchemaScanner::~SchemaScanner() = default;
89
90
0
Status SchemaScanner::start(RuntimeState* state) {
91
0
    if (!_is_init) {
92
0
        return Status::InternalError("call Start before Init.");
93
0
    }
94
95
0
    return Status::OK();
96
0
}
97
98
0
Status SchemaScanner::get_next_block(RuntimeState* state, vectorized::Block* block, bool* eos) {
99
0
    if (_data_block == nullptr) {
100
0
        return Status::InternalError("No data left!");
101
0
    }
102
0
    DCHECK(_async_thread_running == false);
103
0
    RETURN_IF_ERROR(_scanner_status.status());
104
0
    for (size_t i = 0; i < block->columns(); i++) {
105
0
        std::move(*block->get_by_position(i).column)
106
0
                .mutate()
107
0
                ->insert_range_from(*_data_block->get_by_position(i).column, 0,
108
0
                                    _data_block->rows());
109
0
    }
110
0
    _data_block->clear_column_data();
111
0
    *eos = _eos;
112
0
    if (!*eos) {
113
0
        RETURN_IF_ERROR(get_next_block_async(state));
114
0
    }
115
0
    return Status::OK();
116
0
}
117
118
0
Status SchemaScanner::get_next_block_async(RuntimeState* state) {
119
0
    _dependency->block();
120
0
    auto task_ctx = state->get_task_execution_context();
121
0
    RETURN_IF_ERROR(ExecEnv::GetInstance()->fragment_mgr()->get_thread_pool()->submit_func(
122
0
            [this, task_ctx, state]() {
123
0
                auto task_lock = task_ctx.lock();
124
0
                if (task_lock == nullptr) {
125
0
                    return;
126
0
                }
127
0
                DCHECK(_async_thread_running == false);
128
0
                SCOPED_ATTACH_TASK(state);
129
0
                _dependency->block();
130
0
                _async_thread_running = true;
131
0
                if (!_opened) {
132
0
                    _data_block = vectorized::Block::create_unique();
133
0
                    _init_block(_data_block.get());
134
0
                    _scanner_status.update(start(state));
135
0
                    _opened = true;
136
0
                }
137
0
                bool eos = false;
138
0
                auto call_next_block_internal = [&]() -> Status {
139
0
                    RETURN_IF_CATCH_EXCEPTION(
140
0
                            { return get_next_block_internal(_data_block.get(), &eos); });
141
0
                };
142
0
                _scanner_status.update(call_next_block_internal());
143
0
                _eos = eos;
144
0
                _async_thread_running = false;
145
0
                _dependency->set_ready();
146
0
            }));
147
0
    return Status::OK();
148
0
}
149
150
0
Status SchemaScanner::get_next_block_internal(vectorized::Block* block, bool* eos) {
151
0
    if (!_is_init) {
152
0
        return Status::InternalError("used before initialized.");
153
0
    }
154
155
0
    if (nullptr == block || nullptr == eos) {
156
0
        return Status::InternalError("input pointer is nullptr.");
157
0
    }
158
159
0
    *eos = true;
160
0
    return Status::OK();
161
0
}
162
163
0
Status SchemaScanner::init(SchemaScannerParam* param, ObjectPool* pool) {
164
0
    if (_is_init) {
165
0
        return Status::OK();
166
0
    }
167
0
    if (nullptr == param || nullptr == pool) {
168
0
        return Status::InternalError("invalid parameter");
169
0
    }
170
171
0
    _param = param;
172
0
    _is_init = true;
173
174
0
    if (_param->profile) {
175
0
        _get_db_timer = ADD_TIMER(_param->profile, "GetDbTime");
176
0
        _get_table_timer = ADD_TIMER(_param->profile, "GetTableTime");
177
0
        _get_describe_timer = ADD_TIMER(_param->profile, "GetDescribeTime");
178
0
        _fill_block_timer = ADD_TIMER(_param->profile, "FillBlockTime");
179
0
    }
180
181
0
    return Status::OK();
182
0
}
183
184
0
std::unique_ptr<SchemaScanner> SchemaScanner::create(TSchemaTableType::type type) {
185
0
    switch (type) {
186
0
    case TSchemaTableType::SCH_TABLES:
187
0
        return SchemaTablesScanner::create_unique();
188
0
    case TSchemaTableType::SCH_SCHEMATA:
189
0
        return SchemaSchemataScanner::create_unique();
190
0
    case TSchemaTableType::SCH_COLUMNS:
191
0
        return SchemaColumnsScanner::create_unique();
192
0
    case TSchemaTableType::SCH_CHARSETS:
193
0
        return SchemaCharsetsScanner::create_unique();
194
0
    case TSchemaTableType::SCH_COLLATIONS:
195
0
        return SchemaCollationsScanner::create_unique();
196
0
    case TSchemaTableType::SCH_GLOBAL_VARIABLES:
197
0
        return SchemaVariablesScanner::create_unique(TVarType::GLOBAL);
198
0
    case TSchemaTableType::SCH_SESSION_VARIABLES:
199
0
    case TSchemaTableType::SCH_VARIABLES:
200
0
        return SchemaVariablesScanner::create_unique(TVarType::SESSION);
201
0
    case TSchemaTableType::SCH_VIEWS:
202
0
        return SchemaViewsScanner::create_unique();
203
0
    case TSchemaTableType::SCH_TABLE_PRIVILEGES:
204
0
        return SchemaTablePrivilegesScanner::create_unique();
205
0
    case TSchemaTableType::SCH_SCHEMA_PRIVILEGES:
206
0
        return SchemaSchemaPrivilegesScanner::create_unique();
207
0
    case TSchemaTableType::SCH_USER_PRIVILEGES:
208
0
        return SchemaUserPrivilegesScanner::create_unique();
209
0
    case TSchemaTableType::SCH_FILES:
210
0
        return SchemaFilesScanner::create_unique();
211
0
    case TSchemaTableType::SCH_PARTITIONS:
212
0
        return SchemaPartitionsScanner::create_unique();
213
0
    case TSchemaTableType::SCH_ROWSETS:
214
0
        return SchemaRowsetsScanner::create_unique();
215
0
    case TSchemaTableType::SCH_METADATA_NAME_IDS:
216
0
        return SchemaMetadataNameIdsScanner::create_unique();
217
0
    case TSchemaTableType::SCH_PROFILING:
218
0
        return SchemaProfilingScanner::create_unique();
219
0
    case TSchemaTableType::SCH_BACKEND_ACTIVE_TASKS:
220
0
        return SchemaBackendActiveTasksScanner::create_unique();
221
0
    case TSchemaTableType::SCH_ACTIVE_QUERIES:
222
0
        return SchemaActiveQueriesScanner::create_unique();
223
0
    case TSchemaTableType::SCH_WORKLOAD_GROUPS:
224
0
        return SchemaWorkloadGroupsScanner::create_unique();
225
0
    case TSchemaTableType::SCH_PROCESSLIST:
226
0
        return SchemaProcessListScanner::create_unique();
227
0
    case TSchemaTableType::SCH_PROCEDURES:
228
0
        return SchemaRoutinesScanner::create_unique();
229
0
    case TSchemaTableType::SCH_USER:
230
0
        return SchemaUserScanner::create_unique();
231
0
    case TSchemaTableType::SCH_WORKLOAD_POLICY:
232
0
        return SchemaWorkloadSchedulePolicyScanner::create_unique();
233
0
    case TSchemaTableType::SCH_TABLE_OPTIONS:
234
0
        return SchemaTableOptionsScanner::create_unique();
235
0
    case TSchemaTableType::SCH_WORKLOAD_GROUP_PRIVILEGES:
236
0
        return SchemaWorkloadGroupPrivilegesScanner::create_unique();
237
0
    case TSchemaTableType::SCH_WORKLOAD_GROUP_RESOURCE_USAGE:
238
0
        return SchemaBackendWorkloadGroupResourceUsage::create_unique();
239
0
    case TSchemaTableType::SCH_TABLE_PROPERTIES:
240
0
        return SchemaTablePropertiesScanner::create_unique();
241
0
    case TSchemaTableType::SCH_FILE_CACHE_STATISTICS:
242
0
        return SchemaFileCacheStatisticsScanner::create_unique();
243
0
    case TSchemaTableType::SCH_CATALOG_META_CACHE_STATISTICS:
244
0
        return SchemaCatalogMetaCacheStatsScanner::create_unique();
245
0
    case TSchemaTableType::SCH_ROUTINE_LOAD_JOBS:
246
0
        return SchemaRoutineLoadJobScanner::create_unique();
247
0
    default:
248
0
        return SchemaDummyScanner::create_unique();
249
0
        break;
250
0
    }
251
0
}
252
253
0
void SchemaScanner::_init_block(vectorized::Block* src_block) {
254
0
    const std::vector<SchemaScanner::ColumnDesc>& columns_desc(get_column_desc());
255
0
    for (int i = 0; i < columns_desc.size(); ++i) {
256
0
        TypeDescriptor descriptor(columns_desc[i].type);
257
0
        auto data_type = vectorized::DataTypeFactory::instance().create_data_type(descriptor, true);
258
0
        src_block->insert(vectorized::ColumnWithTypeAndName(data_type->create_column(), data_type,
259
0
                                                            columns_desc[i].name));
260
0
    }
261
0
}
262
263
Status SchemaScanner::fill_dest_column_for_range(vectorized::Block* block, size_t pos,
264
0
                                                 const std::vector<void*>& datas) {
265
0
    const ColumnDesc& col_desc = _columns[pos];
266
0
    vectorized::MutableColumnPtr column_ptr;
267
0
    column_ptr = std::move(*block->get_by_position(pos).column).assume_mutable();
268
0
    vectorized::IColumn* col_ptr = column_ptr.get();
269
270
0
    auto* nullable_column = reinterpret_cast<vectorized::ColumnNullable*>(col_ptr);
271
272
    // Resize in advance to improve insertion efficiency.
273
0
    size_t fill_num = datas.size();
274
0
    col_ptr = &nullable_column->get_nested_column();
275
0
    for (int i = 0; i < fill_num; ++i) {
276
0
        auto* data = datas[i];
277
0
        if (data == nullptr) {
278
            // For nested column need not insert default.
279
0
            nullable_column->insert_data(nullptr, 0);
280
0
            continue;
281
0
        } else {
282
0
            nullable_column->get_null_map_data().emplace_back(0);
283
0
        }
284
0
        switch (col_desc.type) {
285
0
        case TYPE_HLL: {
286
0
            auto* hll_slot = reinterpret_cast<HyperLogLog*>(data);
287
0
            assert_cast<vectorized::ColumnHLL*>(col_ptr)->get_data().emplace_back(*hll_slot);
288
0
            break;
289
0
        }
290
0
        case TYPE_VARCHAR:
291
0
        case TYPE_CHAR:
292
0
        case TYPE_STRING: {
293
0
            auto* str_slot = reinterpret_cast<StringRef*>(data);
294
0
            assert_cast<vectorized::ColumnString*>(col_ptr)->insert_data(str_slot->data,
295
0
                                                                         str_slot->size);
296
0
            break;
297
0
        }
298
299
0
        case TYPE_BOOLEAN: {
300
0
            uint8_t num = *reinterpret_cast<bool*>(data);
301
0
            assert_cast<vectorized::ColumnVector<vectorized::UInt8>*>(col_ptr)->insert_value(num);
302
0
            break;
303
0
        }
304
305
0
        case TYPE_TINYINT: {
306
0
            int8_t num = *reinterpret_cast<int8_t*>(data);
307
0
            assert_cast<vectorized::ColumnVector<vectorized::Int8>*>(col_ptr)->insert_value(num);
308
0
            break;
309
0
        }
310
311
0
        case TYPE_SMALLINT: {
312
0
            int16_t num = *reinterpret_cast<int16_t*>(data);
313
0
            assert_cast<vectorized::ColumnVector<vectorized::Int16>*>(col_ptr)->insert_value(num);
314
0
            break;
315
0
        }
316
317
0
        case TYPE_INT: {
318
0
            int32_t num = *reinterpret_cast<int32_t*>(data);
319
0
            assert_cast<vectorized::ColumnVector<vectorized::Int32>*>(col_ptr)->insert_value(num);
320
0
            break;
321
0
        }
322
323
0
        case TYPE_BIGINT: {
324
0
            int64_t num = *reinterpret_cast<int64_t*>(data);
325
0
            assert_cast<vectorized::ColumnVector<vectorized::Int64>*>(col_ptr)->insert_value(num);
326
0
            break;
327
0
        }
328
329
0
        case TYPE_LARGEINT: {
330
0
            __int128 num;
331
0
            memcpy(&num, data, sizeof(__int128));
332
0
            assert_cast<vectorized::ColumnVector<vectorized::Int128>*>(col_ptr)->insert_value(num);
333
0
            break;
334
0
        }
335
336
0
        case TYPE_FLOAT: {
337
0
            float num = *reinterpret_cast<float*>(data);
338
0
            assert_cast<vectorized::ColumnVector<vectorized::Float32>*>(col_ptr)->insert_value(num);
339
0
            break;
340
0
        }
341
342
0
        case TYPE_DOUBLE: {
343
0
            double num = *reinterpret_cast<double*>(data);
344
0
            assert_cast<vectorized::ColumnVector<vectorized::Float64>*>(col_ptr)->insert_value(num);
345
0
            break;
346
0
        }
347
348
0
        case TYPE_DATE: {
349
0
            assert_cast<vectorized::ColumnVector<vectorized::Int64>*>(col_ptr)->insert_data(
350
0
                    reinterpret_cast<char*>(data), 0);
351
0
            break;
352
0
        }
353
354
0
        case TYPE_DATEV2: {
355
0
            uint32_t num = *reinterpret_cast<uint32_t*>(data);
356
0
            assert_cast<vectorized::ColumnDateV2*>(col_ptr)->insert_value(num);
357
0
            break;
358
0
        }
359
360
0
        case TYPE_DATETIME: {
361
0
            assert_cast<vectorized::ColumnVector<vectorized::Int64>*>(col_ptr)->insert_data(
362
0
                    reinterpret_cast<char*>(data), 0);
363
0
            break;
364
0
        }
365
366
0
        case TYPE_DATETIMEV2: {
367
0
            uint64_t num = *reinterpret_cast<uint64_t*>(data);
368
0
            assert_cast<vectorized::ColumnDateTimeV2*>(col_ptr)->insert_value(num);
369
0
            break;
370
0
        }
371
372
0
        case TYPE_DECIMALV2: {
373
0
            const vectorized::Int128 num = (reinterpret_cast<PackedInt128*>(data))->value;
374
0
            assert_cast<vectorized::ColumnDecimal128V2*>(col_ptr)->insert_data(
375
0
                    reinterpret_cast<const char*>(&num), 0);
376
0
            break;
377
0
        }
378
0
        case TYPE_DECIMAL128I: {
379
0
            const vectorized::Int128 num = (reinterpret_cast<PackedInt128*>(data))->value;
380
0
            assert_cast<vectorized::ColumnDecimal128V3*>(col_ptr)->insert_data(
381
0
                    reinterpret_cast<const char*>(&num), 0);
382
0
            break;
383
0
        }
384
385
0
        case TYPE_DECIMAL32: {
386
0
            const int32_t num = *reinterpret_cast<int32_t*>(data);
387
0
            assert_cast<vectorized::ColumnDecimal32*>(col_ptr)->insert_data(
388
0
                    reinterpret_cast<const char*>(&num), 0);
389
0
            break;
390
0
        }
391
392
0
        case TYPE_DECIMAL64: {
393
0
            const int64_t num = *reinterpret_cast<int64_t*>(data);
394
0
            assert_cast<vectorized::ColumnDecimal64*>(col_ptr)->insert_data(
395
0
                    reinterpret_cast<const char*>(&num), 0);
396
0
            break;
397
0
        }
398
399
0
        default: {
400
0
            DCHECK(false) << "bad slot type: " << col_desc.type;
401
0
            std::stringstream ss;
402
0
            ss << "Fail to convert schema type:'" << col_desc.type << " on column:`"
403
0
               << std::string(col_desc.name) + "`";
404
0
            return Status::InternalError(ss.str());
405
0
        }
406
0
        }
407
0
    }
408
0
    return Status::OK();
409
0
}
410
411
0
std::string SchemaScanner::get_db_from_full_name(const std::string& full_name) {
412
0
    std::vector<std::string> part = split(full_name, ".");
413
0
    if (part.size() == 2) {
414
0
        return part[1];
415
0
    }
416
0
    return full_name;
417
0
}
418
419
Status SchemaScanner::insert_block_column(TCell cell, int col_index, vectorized::Block* block,
420
0
                                          PrimitiveType type) {
421
0
    vectorized::MutableColumnPtr mutable_col_ptr;
422
0
    mutable_col_ptr = std::move(*block->get_by_position(col_index).column).assume_mutable();
423
0
    auto* nullable_column = reinterpret_cast<vectorized::ColumnNullable*>(mutable_col_ptr.get());
424
0
    vectorized::IColumn* col_ptr = &nullable_column->get_nested_column();
425
426
0
    switch (type) {
427
0
    case TYPE_BIGINT: {
428
0
        reinterpret_cast<vectorized::ColumnVector<vectorized::Int64>*>(col_ptr)->insert_value(
429
0
                cell.longVal);
430
0
        nullable_column->get_null_map_data().emplace_back(0);
431
0
        break;
432
0
    }
433
434
0
    case TYPE_INT: {
435
0
        reinterpret_cast<vectorized::ColumnVector<vectorized::Int32>*>(col_ptr)->insert_value(
436
0
                cell.intVal);
437
0
        nullable_column->get_null_map_data().emplace_back(0);
438
0
        break;
439
0
    }
440
441
0
    case TYPE_BOOLEAN: {
442
0
        reinterpret_cast<vectorized::ColumnVector<vectorized::UInt8>*>(col_ptr)->insert_value(
443
0
                cell.boolVal);
444
0
        nullable_column->get_null_map_data().emplace_back(0);
445
0
        break;
446
0
    }
447
448
0
    case TYPE_STRING:
449
0
    case TYPE_VARCHAR:
450
0
    case TYPE_CHAR: {
451
0
        reinterpret_cast<vectorized::ColumnString*>(col_ptr)->insert_data(cell.stringVal.data(),
452
0
                                                                          cell.stringVal.size());
453
0
        nullable_column->get_null_map_data().emplace_back(0);
454
0
        break;
455
0
    }
456
457
0
    case TYPE_DATETIME: {
458
0
        std::vector<void*> datas(1);
459
0
        VecDateTimeValue src[1];
460
0
        src[0].from_date_str(cell.stringVal.data(), cell.stringVal.size());
461
0
        datas[0] = src;
462
0
        auto data = datas[0];
463
0
        reinterpret_cast<vectorized::ColumnVector<vectorized::Int64>*>(col_ptr)->insert_data(
464
0
                reinterpret_cast<char*>(data), 0);
465
0
        nullable_column->get_null_map_data().emplace_back(0);
466
0
        break;
467
0
    }
468
0
    default: {
469
0
        std::stringstream ss;
470
0
        ss << "unsupported column type:" << type;
471
0
        return Status::InternalError(ss.str());
472
0
    }
473
0
    }
474
0
    return Status::OK();
475
0
}
476
477
} // namespace doris