Coverage Report

Created: 2025-05-09 19:27

/root/doris/be/src/runtime/descriptors.h
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
// This file is copied from
18
// https://github.com/apache/impala/blob/branch-2.9.0/be/src/runtime/descriptors.h
19
// and modified by Doris
20
21
#pragma once
22
23
#include <gen_cpp/Descriptors_types.h>
24
#include <gen_cpp/Types_types.h>
25
#include <glog/logging.h>
26
#include <google/protobuf/stubs/port.h>
27
#include <stdint.h>
28
29
#include <ostream>
30
#include <string>
31
#include <unordered_map>
32
#include <utility>
33
#include <vector>
34
35
#include "common/be_mock_util.h"
36
#include "common/compiler_util.h" // IWYU pragma: keep
37
#include "common/global_types.h"
38
#include "common/object_pool.h"
39
#include "common/status.h"
40
#include "olap/utils.h"
41
#include "runtime/define_primitive_type.h"
42
#include "runtime/types.h"
43
#include "vec/data_types/data_type.h"
44
namespace google::protobuf {
45
template <typename Element>
46
class RepeatedField;
47
} // namespace google::protobuf
48
49
namespace doris {
50
51
class ObjectPool;
52
class PTupleDescriptor;
53
class PSlotDescriptor;
54
55
class SlotDescriptor {
56
public:
57
    MOCK_DEFINE(virtual ~SlotDescriptor() = default;)
58
707k
    SlotId id() const { return _id; }
59
2.96M
    const vectorized::DataTypePtr type() const { return _type; }
60
0
    TupleId parent() const { return _parent; }
61
    // Returns the column index of this slot, including partition keys.
62
    // (e.g., col_pos - num_partition_keys = the table column this slot corresponds to)
63
0
    int col_pos() const { return _col_pos; }
64
    // Returns the field index in the generated llvm struct for this slot's tuple
65
0
    int field_idx() const { return _field_idx; }
66
1.63M
    bool is_materialized() const { return _is_materialized; }
67
244
    bool is_nullable() const { return _type->is_nullable(); }
68
    vectorized::DataTypePtr get_data_type_ptr() const;
69
70
675k
    const std::string& col_name() const { return _col_name; }
71
0
    const std::string& col_name_lower_case() const { return _col_name_lower_case; }
72
73
    void to_protobuf(PSlotDescriptor* pslot) const;
74
75
    std::string debug_string() const;
76
77
    vectorized::MutableColumnPtr get_empty_mutable_column() const;
78
79
23
    int32_t col_unique_id() const { return _col_unique_id; }
80
81
0
    bool is_key() const { return _is_key; }
82
0
    const std::vector<std::string>& column_paths() const { return _column_paths; };
83
84
0
    bool is_auto_increment() const { return _is_auto_increment; }
85
86
0
    bool is_skip_bitmap_col() const { return _col_name == SKIP_BITMAP_COL; }
87
0
    bool is_sequence_col() const { return _col_name == SEQUENCE_COL; }
88
89
4
    const std::string& col_default_value() const { return _col_default_value; }
90
180
    PrimitiveType col_type() const { return _type->get_primitive_type(); }
91
92
private:
93
    friend class DescriptorTbl;
94
    friend class TupleDescriptor;
95
    friend class SchemaScanner;
96
    friend class OlapTableSchemaParam;
97
    friend class PInternalServiceImpl;
98
    friend class RowIdStorageReader;
99
    friend class Tablet;
100
    friend class TabletSchema;
101
102
    MOCK_REMOVE(const) SlotId _id;
103
    MOCK_REMOVE(const) vectorized::DataTypePtr _type;
104
    const TupleId _parent;
105
    const int _col_pos;
106
    const std::string _col_name;
107
    const std::string _col_name_lower_case;
108
109
    const int32_t _col_unique_id;
110
111
    // the idx of the slot in the tuple descriptor (0-based).
112
    // this is provided by the FE
113
    const int _slot_idx;
114
115
    // the idx of the slot in the llvm codegen'd tuple struct
116
    // this is set by TupleDescriptor during codegen and takes into account
117
    // leading null bytes.
118
    int _field_idx;
119
120
    const bool _is_materialized;
121
122
    const bool _is_key;
123
    const std::vector<std::string> _column_paths;
124
125
    const bool _is_auto_increment;
126
    const std::string _col_default_value;
127
128
    SlotDescriptor(const TSlotDescriptor& tdesc);
129
    SlotDescriptor(const PSlotDescriptor& pdesc);
130
    MOCK_DEFINE(SlotDescriptor();)
131
};
132
133
// Base class for table descriptors.
134
class TableDescriptor {
135
public:
136
    TableDescriptor(const TTableDescriptor& tdesc);
137
20
    virtual ~TableDescriptor() = default;
138
0
    int num_cols() const { return _num_cols; }
139
0
    int num_clustering_cols() const { return _num_clustering_cols; }
140
    virtual std::string debug_string() const;
141
142
    // The first _num_clustering_cols columns by position are clustering
143
    // columns.
144
0
    bool is_clustering_col(const SlotDescriptor* slot_desc) const {
145
0
        return slot_desc->col_pos() < _num_clustering_cols;
146
0
    }
147
148
0
    ::doris::TTableType::type table_type() const { return _table_type; }
149
0
    const std::string& name() const { return _name; }
150
0
    const std::string& database() const { return _database; }
151
0
    int64_t table_id() const { return _table_id; }
152
153
private:
154
    ::doris::TTableType::type _table_type;
155
    std::string _name;
156
    std::string _database;
157
    int64_t _table_id;
158
    int _num_cols;
159
    int _num_clustering_cols;
160
};
161
162
class OlapTableDescriptor : public TableDescriptor {
163
public:
164
    OlapTableDescriptor(const TTableDescriptor& tdesc);
165
    std::string debug_string() const override;
166
};
167
168
class DictionaryTableDescriptor : public TableDescriptor {
169
public:
170
    DictionaryTableDescriptor(const TTableDescriptor& tdesc);
171
    std::string debug_string() const override;
172
};
173
174
class SchemaTableDescriptor : public TableDescriptor {
175
public:
176
    SchemaTableDescriptor(const TTableDescriptor& tdesc);
177
    ~SchemaTableDescriptor() override;
178
    std::string debug_string() const override;
179
0
    TSchemaTableType::type schema_table_type() const { return _schema_table_type; }
180
181
private:
182
    TSchemaTableType::type _schema_table_type;
183
};
184
185
class BrokerTableDescriptor : public TableDescriptor {
186
public:
187
    BrokerTableDescriptor(const TTableDescriptor& tdesc);
188
    ~BrokerTableDescriptor() override;
189
    std::string debug_string() const override;
190
191
private:
192
};
193
194
class HiveTableDescriptor : public TableDescriptor {
195
public:
196
    HiveTableDescriptor(const TTableDescriptor& tdesc);
197
    ~HiveTableDescriptor() override;
198
    std::string debug_string() const override;
199
200
private:
201
};
202
203
class IcebergTableDescriptor : public TableDescriptor {
204
public:
205
    IcebergTableDescriptor(const TTableDescriptor& tdesc);
206
    ~IcebergTableDescriptor() override;
207
    std::string debug_string() const override;
208
209
private:
210
};
211
212
class MaxComputeTableDescriptor : public TableDescriptor {
213
public:
214
    MaxComputeTableDescriptor(const TTableDescriptor& tdesc);
215
    ~MaxComputeTableDescriptor() override;
216
    std::string debug_string() const override;
217
0
    std::string region() const { return _region; }
218
0
    std::string project() const { return _project; }
219
0
    std::string table() const { return _table; }
220
0
    std::string odps_url() const { return _odps_url; }
221
0
    std::string tunnel_url() const { return _tunnel_url; }
222
0
    std::string access_key() const { return _access_key; }
223
0
    std::string secret_key() const { return _secret_key; }
224
0
    std::string public_access() const { return _public_access; }
225
0
    std::string endpoint() const { return _endpoint; }
226
0
    std::string quota() const { return _quota; }
227
0
    Status init_status() const { return _init_status; }
228
229
private:
230
    std::string _region; //deprecated
231
    std::string _project;
232
    std::string _table;
233
    std::string _odps_url;   //deprecated
234
    std::string _tunnel_url; //deprecated
235
    std::string _access_key;
236
    std::string _secret_key;
237
    std::string _public_access; //deprecated
238
    std::string _endpoint;
239
    std::string _quota;
240
    Status _init_status = Status::OK();
241
};
242
243
class TrinoConnectorTableDescriptor : public TableDescriptor {
244
public:
245
    TrinoConnectorTableDescriptor(const TTableDescriptor& tdesc);
246
    ~TrinoConnectorTableDescriptor() override;
247
    std::string debug_string() const override;
248
249
private:
250
};
251
252
class EsTableDescriptor : public TableDescriptor {
253
public:
254
    EsTableDescriptor(const TTableDescriptor& tdesc);
255
    ~EsTableDescriptor() override;
256
    std::string debug_string() const override;
257
258
private:
259
};
260
261
class MySQLTableDescriptor : public TableDescriptor {
262
public:
263
    MySQLTableDescriptor(const TTableDescriptor& tdesc);
264
    std::string debug_string() const override;
265
0
    std::string mysql_db() const { return _mysql_db; }
266
0
    std::string mysql_table() const { return _mysql_table; }
267
0
    std::string host() const { return _host; }
268
0
    std::string port() const { return _port; }
269
0
    std::string user() const { return _user; }
270
0
    std::string passwd() const { return _passwd; }
271
0
    std::string charset() const { return _charset; }
272
273
private:
274
    std::string _mysql_db;
275
    std::string _mysql_table;
276
    std::string _host;
277
    std::string _port;
278
    std::string _user;
279
    std::string _passwd;
280
    std::string _charset;
281
};
282
283
class ODBCTableDescriptor : public TableDescriptor {
284
public:
285
    ODBCTableDescriptor(const TTableDescriptor& tdesc);
286
    std::string debug_string() const override;
287
0
    std::string db() const { return _db; }
288
0
    std::string table() const { return _table; }
289
0
    std::string host() const { return _host; }
290
0
    std::string port() const { return _port; }
291
0
    std::string user() const { return _user; }
292
0
    std::string passwd() const { return _passwd; }
293
0
    std::string driver() const { return _driver; }
294
0
    TOdbcTableType::type type() const { return _type; }
295
296
private:
297
    std::string _db;
298
    std::string _table;
299
    std::string _host;
300
    std::string _port;
301
    std::string _user;
302
    std::string _passwd;
303
    std::string _driver;
304
    TOdbcTableType::type _type;
305
};
306
307
class JdbcTableDescriptor : public TableDescriptor {
308
public:
309
    JdbcTableDescriptor(const TTableDescriptor& tdesc);
310
    std::string debug_string() const override;
311
0
    int64_t jdbc_catalog_id() const { return _jdbc_catalog_id; }
312
0
    const std::string& jdbc_resource_name() const { return _jdbc_resource_name; }
313
0
    const std::string& jdbc_driver_url() const { return _jdbc_driver_url; }
314
0
    const std::string& jdbc_driver_class() const { return _jdbc_driver_class; }
315
0
    const std::string& jdbc_driver_checksum() const { return _jdbc_driver_checksum; }
316
0
    const std::string& jdbc_url() const { return _jdbc_url; }
317
0
    const std::string& jdbc_table_name() const { return _jdbc_table_name; }
318
0
    const std::string& jdbc_user() const { return _jdbc_user; }
319
0
    const std::string& jdbc_passwd() const { return _jdbc_passwd; }
320
0
    int32_t connection_pool_min_size() const { return _connection_pool_min_size; }
321
0
    int32_t connection_pool_max_size() const { return _connection_pool_max_size; }
322
0
    int32_t connection_pool_max_wait_time() const { return _connection_pool_max_wait_time; }
323
0
    int32_t connection_pool_max_life_time() const { return _connection_pool_max_life_time; }
324
0
    bool connection_pool_keep_alive() const { return _connection_pool_keep_alive; }
325
326
private:
327
    int64_t _jdbc_catalog_id;
328
    std::string _jdbc_resource_name;
329
    std::string _jdbc_driver_url;
330
    std::string _jdbc_driver_class;
331
    std::string _jdbc_driver_checksum;
332
    std::string _jdbc_url;
333
    std::string _jdbc_table_name;
334
    std::string _jdbc_user;
335
    std::string _jdbc_passwd;
336
    int32_t _connection_pool_min_size;
337
    int32_t _connection_pool_max_size;
338
    int32_t _connection_pool_max_wait_time;
339
    int32_t _connection_pool_max_life_time;
340
    bool _connection_pool_keep_alive;
341
};
342
343
class TupleDescriptor {
344
public:
345
    TupleDescriptor(TupleDescriptor&&) = delete;
346
    void operator=(const TupleDescriptor&) = delete;
347
348
312k
    MOCK_DEFINE(virtual) ~TupleDescriptor() {
349
312k
        if (_own_slots) {
350
13
            for (SlotDescriptor* slot : _slots) {
351
13
                delete slot;
352
13
            }
353
4
        }
354
312k
    }
355
356
    MOCK_DEFINE(TupleDescriptor() : _id {0} {};)
357
358
546k
    int num_materialized_slots() const { return _num_materialized_slots; }
359
1.21M
    MOCK_FUNCTION const std::vector<SlotDescriptor*>& slots() const { return _slots; }
360
361
524k
    bool has_varlen_slots() const { return _has_varlen_slots; }
362
0
    const TableDescriptor* table_desc() const { return _table_desc; }
363
364
1.09M
    TupleId id() const { return _id; }
365
366
    std::string debug_string() const;
367
368
    void to_protobuf(PTupleDescriptor* ptuple) const;
369
370
private:
371
    friend class DescriptorTbl;
372
    friend class SchemaScanner;
373
    friend class OlapTableSchemaParam;
374
    friend class PInternalServiceImpl;
375
    friend class RowIdStorageReader;
376
    friend class TabletSchema;
377
378
    const TupleId _id;
379
    TableDescriptor* _table_desc = nullptr;
380
    int _num_materialized_slots;
381
    std::vector<SlotDescriptor*> _slots; // contains all slots
382
383
    // Provide quick way to check if there are variable length slots.
384
    // True if _string_slots or _collection_slots have entries.
385
    bool _has_varlen_slots;
386
    bool _own_slots = false;
387
388
    TupleDescriptor(const TTupleDescriptor& tdesc, bool own_slot = false);
389
    TupleDescriptor(const PTupleDescriptor& tdesc, bool own_slot = false);
390
391
    void add_slot(SlotDescriptor* slot);
392
};
393
394
class DescriptorTbl {
395
public:
396
    // Creates a descriptor tbl within 'pool' from thrift_tbl and returns it via 'tbl'.
397
    // Returns OK on success, otherwise error (in which case 'tbl' will be unset).
398
    static Status create(ObjectPool* pool, const TDescriptorTable& thrift_tbl, DescriptorTbl** tbl);
399
400
    TableDescriptor* get_table_descriptor(TableId id) const;
401
    TupleDescriptor* get_tuple_descriptor(TupleId id) const;
402
    SlotDescriptor* get_slot_descriptor(SlotId id) const;
403
0
    const std::vector<TTupleId>& get_row_tuples() const { return _row_tuples; }
404
405
    // return all registered tuple descriptors
406
47
    std::vector<TupleDescriptor*> get_tuple_descs() const {
407
47
        std::vector<TupleDescriptor*> descs;
408
409
104
        for (auto it : _tuple_desc_map) {
410
104
            descs.push_back(it.second);
411
104
        }
412
413
47
        return descs;
414
47
    }
415
416
    std::string debug_string() const;
417
418
private:
419
    using TableDescriptorMap = std::unordered_map<TableId, TableDescriptor*>;
420
    using TupleDescriptorMap = std::unordered_map<TupleId, TupleDescriptor*>;
421
    using SlotDescriptorMap = std::unordered_map<SlotId, SlotDescriptor*>;
422
423
    TableDescriptorMap _tbl_desc_map;
424
    TupleDescriptorMap _tuple_desc_map;
425
    SlotDescriptorMap _slot_desc_map;
426
    std::vector<TTupleId> _row_tuples;
427
428
78.1k
    DescriptorTbl() = default;
429
};
430
431
#define RETURN_IF_INVALID_TUPLE_IDX(tuple_id, tuple_idx)                                         \
432
0
    do {                                                                                         \
433
0
        if (UNLIKELY(RowDescriptor::INVALID_IDX == tuple_idx)) {                                 \
434
0
            return Status::InternalError("failed to get tuple idx with tuple id: {}", tuple_id); \
435
0
        }                                                                                        \
436
0
    } while (false)
437
438
// Records positions of tuples within row produced by ExecNode.
439
// TODO: this needs to differentiate between tuples contained in row
440
// and tuples produced by ExecNode (parallel to PlanNode.rowTupleIds and
441
// PlanNode.tupleIds); right now, we conflate the two (and distinguish based on
442
// context; for instance, HdfsScanNode uses these tids to create row batches, ie, the
443
// first case, whereas TopNNode uses these tids to copy output rows, ie, the second
444
// case)
445
class RowDescriptor {
446
public:
447
    RowDescriptor(const DescriptorTbl& desc_tbl, const std::vector<TTupleId>& row_tuples,
448
                  const std::vector<bool>& nullable_tuples);
449
450
    // standard copy c'tor, made explicit here
451
    RowDescriptor(const RowDescriptor& desc)
452
            : _tuple_desc_map(desc._tuple_desc_map),
453
              _tuple_idx_nullable_map(desc._tuple_idx_nullable_map),
454
              _tuple_idx_map(desc._tuple_idx_map),
455
0
              _has_varlen_slots(desc._has_varlen_slots) {
456
0
        auto it = desc._tuple_desc_map.begin();
457
0
        for (; it != desc._tuple_desc_map.end(); ++it) {
458
0
            _num_materialized_slots += (*it)->num_materialized_slots();
459
0
            _num_slots += (*it)->slots().size();
460
0
        }
461
0
    }
462
463
    RowDescriptor(TupleDescriptor* tuple_desc, bool is_nullable);
464
465
    RowDescriptor(const RowDescriptor& lhs_row_desc, const RowDescriptor& rhs_row_desc);
466
467
    // dummy descriptor, needed for the JNI EvalPredicate() function
468
156k
    RowDescriptor() = default;
469
470
    MOCK_DEFINE(virtual ~RowDescriptor() = default;)
471
472
622k
    int num_materialized_slots() const { return _num_materialized_slots; }
473
474
2
    int num_slots() const { return _num_slots; }
475
476
    static const int INVALID_IDX;
477
478
    // Returns INVALID_IDX if id not part of this row.
479
    int get_tuple_idx(TupleId id) const;
480
481
    // Return true if any Tuple has variable length slots.
482
0
    bool has_varlen_slots() const { return _has_varlen_slots; }
483
484
    // Return descriptors for all tuples in this row, in order of appearance.
485
390k
    MOCK_FUNCTION const std::vector<TupleDescriptor*>& tuple_descriptors() const {
486
390k
        return _tuple_desc_map;
487
390k
    }
488
489
    // Populate row_tuple_ids with our ids.
490
    void to_thrift(std::vector<TTupleId>* row_tuple_ids);
491
    void to_protobuf(google::protobuf::RepeatedField<google::protobuf::int32>* row_tuple_ids) const;
492
493
    // Return true if the tuple ids of this descriptor are a prefix
494
    // of the tuple ids of other_desc.
495
    bool is_prefix_of(const RowDescriptor& other_desc) const;
496
497
    // Return true if the tuple ids of this descriptor match tuple ids of other desc.
498
    bool equals(const RowDescriptor& other_desc) const;
499
500
    std::string debug_string() const;
501
502
    int get_column_id(int slot_id, bool force_materialize_slot = false) const;
503
504
private:
505
    // Initializes tupleIdxMap during c'tor using the _tuple_desc_map.
506
    void init_tuple_idx_map();
507
508
    // Initializes _has_varlen_slots during c'tor using the _tuple_desc_map.
509
    void init_has_varlen_slots();
510
511
    // map from position of tuple w/in row to its descriptor
512
    std::vector<TupleDescriptor*> _tuple_desc_map;
513
514
    // _tuple_idx_nullable_map[i] is true if tuple i can be null
515
    std::vector<bool> _tuple_idx_nullable_map;
516
517
    // map from TupleId to position of tuple w/in row
518
    std::vector<int> _tuple_idx_map;
519
520
    // Provide quick way to check if there are variable length slots.
521
    bool _has_varlen_slots = false;
522
523
    int _num_materialized_slots = 0;
524
    int _num_slots = 0;
525
};
526
} // namespace doris