Coverage Report

Created: 2026-03-11 17:42

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/root/doris/be/src/runtime/descriptors.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
// This file is copied from
18
// https://github.com/apache/impala/blob/branch-2.9.0/be/src/runtime/descriptors.h
19
// and modified by Doris
20
21
#pragma once
22
23
#include <gen_cpp/Descriptors_types.h>
24
#include <gen_cpp/Exprs_types.h>
25
#include <gen_cpp/Types_types.h>
26
#include <glog/logging.h>
27
#include <google/protobuf/stubs/port.h>
28
29
#include <cstdint>
30
#include <ostream>
31
#include <string>
32
#include <unordered_map>
33
#include <utility>
34
#include <vector>
35
36
#include "common/be_mock_util.h"
37
#include "common/compiler_util.h" // IWYU pragma: keep
38
#include "common/global_types.h"
39
#include "common/object_pool.h"
40
#include "common/status.h"
41
#include "core/data_type/data_type.h"
42
#include "core/data_type/define_primitive_type.h"
43
#include "storage/utils.h"
44
45
namespace google::protobuf {
46
template <typename Element>
47
class RepeatedField;
48
} // namespace google::protobuf
49
50
namespace doris {
51
#include "common/compile_check_begin.h"
52
class ObjectPool;
53
class PTupleDescriptor;
54
class PSlotDescriptor;
55
56
using TColumnAccessPaths = std::vector<TColumnAccessPath>;
57
58
class SlotDescriptor {
59
public:
60
    MOCK_DEFINE(virtual ~SlotDescriptor() = default;)
61
657k
    SlotId id() const { return _id; }
62
3.44M
    DataTypePtr type() const { return _type; }
63
0
    TupleId parent() const { return _parent; }
64
    // Returns the column index of this slot, including partition keys.
65
    // (e.g., col_pos - num_partition_keys = the table column this slot corresponds to)
66
26
    int col_pos() const { return _col_pos; }
67
    // Returns the field index in the generated llvm struct for this slot's tuple
68
0
    int field_idx() const { return _field_idx; }
69
    bool is_nullable() const;
70
    DataTypePtr get_data_type_ptr() const;
71
72
895k
    const std::string& col_name() const { return _col_name; }
73
171
    const std::string& col_name_lower_case() const { return _col_name_lower_case; }
74
75
    void to_protobuf(PSlotDescriptor* pslot) const;
76
77
    std::string debug_string() const;
78
79
    MutableColumnPtr get_empty_mutable_column() const;
80
81
351k
    MOCK_FUNCTION int32_t col_unique_id() const { return _col_unique_id; }
82
83
1
    bool is_key() const { return _is_key; }
84
0
    const std::vector<std::string>& column_paths() const { return _column_paths; };
85
86
46
    const TColumnAccessPaths& all_access_paths() const { return _all_access_paths; }
87
46
    const TColumnAccessPaths& predicate_access_paths() const { return _predicate_access_paths; }
88
89
2
    bool is_auto_increment() const { return _is_auto_increment; }
90
91
0
    bool is_skip_bitmap_col() const { return _col_name == SKIP_BITMAP_COL; }
92
0
    bool is_sequence_col() const { return _col_name == SEQUENCE_COL; }
93
94
89.0k
    const std::string& col_default_value() const { return _col_default_value; }
95
    PrimitiveType col_type() const;
96
97
272k
    std::shared_ptr<doris::TExpr> get_virtual_column_expr() const {
98
        // virtual_column_expr need do prepare.
99
272k
        return virtual_column_expr;
100
272k
    }
101
102
0
    void set_is_predicate(bool is_predicate) { _is_predicate = is_predicate; }
103
104
181
    bool is_predicate() const { return _is_predicate; }
105
106
private:
107
    friend class DescriptorTbl;
108
    friend class TupleDescriptor;
109
    friend class SchemaScanner;
110
    friend class OlapTableSchemaParam;
111
    friend class PInternalServiceImpl;
112
    friend class RowIdStorageReader;
113
    friend class Tablet;
114
    friend class TabletSchema;
115
116
    MOCK_REMOVE(const) SlotId _id;
117
    MOCK_REMOVE(const) DataTypePtr _type;
118
    const TupleId _parent;
119
    const int _col_pos;
120
    MOCK_REMOVE(const) std::string _col_name;
121
    const std::string _col_name_lower_case;
122
123
    const int32_t _col_unique_id;
124
125
    // the idx of the slot in the tuple descriptor (0-based).
126
    // this is provided by the FE
127
    const int _slot_idx;
128
129
    // the idx of the slot in the llvm codegen'd tuple struct
130
    // this is set by TupleDescriptor during codegen and takes into account
131
    // leading null bytes.
132
    int _field_idx;
133
134
    const bool _is_key;
135
    const std::vector<std::string> _column_paths;
136
137
    TColumnAccessPaths _all_access_paths;
138
    TColumnAccessPaths _predicate_access_paths;
139
140
    const bool _is_auto_increment;
141
    const std::string _col_default_value;
142
143
    std::shared_ptr<doris::TExpr> virtual_column_expr = nullptr;
144
145
    bool _is_predicate = false;
146
147
    SlotDescriptor(const TSlotDescriptor& tdesc);
148
    SlotDescriptor(const PSlotDescriptor& pdesc);
149
    MOCK_DEFINE(SlotDescriptor();)
150
};
151
152
// Base class for table descriptors.
153
class TableDescriptor {
154
public:
155
    TableDescriptor(const TTableDescriptor& tdesc);
156
185
    virtual ~TableDescriptor() = default;
157
0
    int num_cols() const { return _num_cols; }
158
0
    int num_clustering_cols() const { return _num_clustering_cols; }
159
    virtual std::string debug_string() const;
160
161
    // The first _num_clustering_cols columns by position are clustering
162
    // columns.
163
0
    bool is_clustering_col(const SlotDescriptor* slot_desc) const {
164
0
        return slot_desc->col_pos() < _num_clustering_cols;
165
0
    }
166
167
0
    ::doris::TTableType::type table_type() const { return _table_type; }
168
0
    const std::string& name() const { return _name; }
169
0
    const std::string& database() const { return _database; }
170
0
    int64_t table_id() const { return _table_id; }
171
172
private:
173
    ::doris::TTableType::type _table_type;
174
    std::string _name;
175
    std::string _database;
176
    int64_t _table_id;
177
    int _num_cols;
178
    int _num_clustering_cols;
179
};
180
181
class OlapTableDescriptor : public TableDescriptor {
182
public:
183
    OlapTableDescriptor(const TTableDescriptor& tdesc);
184
    std::string debug_string() const override;
185
};
186
187
class DictionaryTableDescriptor : public TableDescriptor {
188
public:
189
    DictionaryTableDescriptor(const TTableDescriptor& tdesc);
190
    std::string debug_string() const override;
191
};
192
193
class SchemaTableDescriptor : public TableDescriptor {
194
public:
195
    SchemaTableDescriptor(const TTableDescriptor& tdesc);
196
    ~SchemaTableDescriptor() override;
197
    std::string debug_string() const override;
198
0
    TSchemaTableType::type schema_table_type() const { return _schema_table_type; }
199
200
private:
201
    TSchemaTableType::type _schema_table_type;
202
};
203
204
class BrokerTableDescriptor : public TableDescriptor {
205
public:
206
    BrokerTableDescriptor(const TTableDescriptor& tdesc);
207
    ~BrokerTableDescriptor() override;
208
    std::string debug_string() const override;
209
210
private:
211
};
212
213
class HiveTableDescriptor : public TableDescriptor {
214
public:
215
    HiveTableDescriptor(const TTableDescriptor& tdesc);
216
    ~HiveTableDescriptor() override;
217
    std::string debug_string() const override;
218
219
private:
220
};
221
222
class IcebergTableDescriptor : public TableDescriptor {
223
public:
224
    IcebergTableDescriptor(const TTableDescriptor& tdesc);
225
    ~IcebergTableDescriptor() override;
226
    std::string debug_string() const override;
227
228
private:
229
};
230
231
class MaxComputeTableDescriptor : public TableDescriptor {
232
public:
233
    MaxComputeTableDescriptor(const TTableDescriptor& tdesc);
234
    ~MaxComputeTableDescriptor() override;
235
    std::string debug_string() const override;
236
0
    std::string region() const { return _region; }
237
0
    std::string project() const { return _project; }
238
0
    std::string table() const { return _table; }
239
0
    std::string odps_url() const { return _odps_url; }
240
0
    std::string tunnel_url() const { return _tunnel_url; }
241
0
    std::string access_key() const { return _access_key; }
242
0
    std::string secret_key() const { return _secret_key; }
243
0
    std::string public_access() const { return _public_access; }
244
0
    std::string endpoint() const { return _endpoint; }
245
0
    std::string quota() const { return _quota; }
246
0
    Status init_status() const { return _init_status; }
247
0
    std::map<std::string, std::string> properties() const { return _props; }
248
249
private:
250
    std::string _region; //deprecated
251
    std::string _project;
252
    std::string _table;
253
    std::string _odps_url;      //deprecated
254
    std::string _tunnel_url;    //deprecated
255
    std::string _access_key;    //deprecated
256
    std::string _secret_key;    //deprecated
257
    std::string _public_access; //deprecated
258
    std::string _endpoint;
259
    std::string _quota;
260
    std::map<std::string, std::string> _props;
261
    Status _init_status = Status::OK();
262
};
263
264
class TrinoConnectorTableDescriptor : public TableDescriptor {
265
public:
266
    TrinoConnectorTableDescriptor(const TTableDescriptor& tdesc);
267
    ~TrinoConnectorTableDescriptor() override;
268
    std::string debug_string() const override;
269
270
private:
271
};
272
273
class EsTableDescriptor : public TableDescriptor {
274
public:
275
    EsTableDescriptor(const TTableDescriptor& tdesc);
276
    ~EsTableDescriptor() override;
277
    std::string debug_string() const override;
278
279
private:
280
};
281
282
class MySQLTableDescriptor : public TableDescriptor {
283
public:
284
    MySQLTableDescriptor(const TTableDescriptor& tdesc);
285
    std::string debug_string() const override;
286
0
    std::string mysql_db() const { return _mysql_db; }
287
0
    std::string mysql_table() const { return _mysql_table; }
288
0
    std::string host() const { return _host; }
289
0
    std::string port() const { return _port; }
290
0
    std::string user() const { return _user; }
291
0
    std::string passwd() const { return _passwd; }
292
0
    std::string charset() const { return _charset; }
293
294
private:
295
    std::string _mysql_db;
296
    std::string _mysql_table;
297
    std::string _host;
298
    std::string _port;
299
    std::string _user;
300
    std::string _passwd;
301
    std::string _charset;
302
};
303
304
class JdbcTableDescriptor : public TableDescriptor {
305
public:
306
    JdbcTableDescriptor(const TTableDescriptor& tdesc);
307
    std::string debug_string() const override;
308
0
    int64_t jdbc_catalog_id() const { return _jdbc_catalog_id; }
309
0
    const std::string& jdbc_resource_name() const { return _jdbc_resource_name; }
310
0
    const std::string& jdbc_driver_url() const { return _jdbc_driver_url; }
311
0
    const std::string& jdbc_driver_class() const { return _jdbc_driver_class; }
312
0
    const std::string& jdbc_driver_checksum() const { return _jdbc_driver_checksum; }
313
0
    const std::string& jdbc_url() const { return _jdbc_url; }
314
0
    const std::string& jdbc_table_name() const { return _jdbc_table_name; }
315
0
    const std::string& jdbc_user() const { return _jdbc_user; }
316
0
    const std::string& jdbc_passwd() const { return _jdbc_passwd; }
317
0
    int32_t connection_pool_min_size() const { return _connection_pool_min_size; }
318
0
    int32_t connection_pool_max_size() const { return _connection_pool_max_size; }
319
0
    int32_t connection_pool_max_wait_time() const { return _connection_pool_max_wait_time; }
320
0
    int32_t connection_pool_max_life_time() const { return _connection_pool_max_life_time; }
321
0
    bool connection_pool_keep_alive() const { return _connection_pool_keep_alive; }
322
323
private:
324
    int64_t _jdbc_catalog_id;
325
    std::string _jdbc_resource_name;
326
    std::string _jdbc_driver_url;
327
    std::string _jdbc_driver_class;
328
    std::string _jdbc_driver_checksum;
329
    std::string _jdbc_url;
330
    std::string _jdbc_table_name;
331
    std::string _jdbc_user;
332
    std::string _jdbc_passwd;
333
    int32_t _connection_pool_min_size;
334
    int32_t _connection_pool_max_size;
335
    int32_t _connection_pool_max_wait_time;
336
    int32_t _connection_pool_max_life_time;
337
    bool _connection_pool_keep_alive;
338
};
339
340
class RemoteDorisTableDescriptor : public TableDescriptor {
341
public:
342
    RemoteDorisTableDescriptor(const TTableDescriptor& tdesc);
343
    ~RemoteDorisTableDescriptor() override;
344
    std::string debug_string() const override;
345
346
private:
347
};
348
349
class TupleDescriptor {
350
public:
351
    TupleDescriptor(TupleDescriptor&&) = delete;
352
    void operator=(const TupleDescriptor&) = delete;
353
354
292k
    MOCK_DEFINE(virtual) ~TupleDescriptor() {
355
292k
        if (_own_slots) {
356
13
            for (SlotDescriptor* slot : _slots) {
357
13
                delete slot;
358
13
            }
359
4
        }
360
292k
    }
361
362
    MOCK_DEFINE(TupleDescriptor() : _id {0} {};)
363
364
504k
    int num_materialized_slots() const { return _num_materialized_slots; }
365
1.33M
    MOCK_FUNCTION const std::vector<SlotDescriptor*>& slots() const { return _slots; }
366
367
487k
    bool has_varlen_slots() const { return _has_varlen_slots; }
368
0
    const TableDescriptor* table_desc() const { return _table_desc; }
369
370
1.01M
    TupleId id() const { return _id; }
371
372
    std::string debug_string() const;
373
374
    void to_protobuf(PTupleDescriptor* ptuple) const;
375
376
private:
377
    friend class DescriptorTbl;
378
    friend class SchemaScanner;
379
    friend class OlapTableSchemaParam;
380
    friend class PInternalServiceImpl;
381
    friend class RowIdStorageReader;
382
    friend class TabletSchema;
383
384
    const TupleId _id;
385
    TableDescriptor* _table_desc = nullptr;
386
    int _num_materialized_slots;
387
    std::vector<SlotDescriptor*> _slots; // contains all slots
388
389
    // Provide quick way to check if there are variable length slots.
390
    // True if _string_slots or _collection_slots have entries.
391
    bool _has_varlen_slots;
392
    bool _own_slots = false;
393
394
    TupleDescriptor(const TTupleDescriptor& tdesc, bool own_slot = false);
395
    TupleDescriptor(const PTupleDescriptor& tdesc, bool own_slot = false);
396
397
    void add_slot(SlotDescriptor* slot);
398
};
399
400
class DescriptorTbl {
401
public:
402
#ifdef BE_TEST
403
198k
    DescriptorTbl() = default;
404
198k
    virtual ~DescriptorTbl() = default;
405
#endif
406
407
    // Creates a descriptor tbl within 'pool' from thrift_tbl and returns it via 'tbl'.
408
    // Returns OK on success, otherwise error (in which case 'tbl' will be unset).
409
    static Status create(ObjectPool* pool, const TDescriptorTable& thrift_tbl, DescriptorTbl** tbl);
410
411
    TableDescriptor* get_table_descriptor(TableId id) const;
412
    TupleDescriptor* get_tuple_descriptor(TupleId id) const;
413
    MOCK_FUNCTION SlotDescriptor* get_slot_descriptor(SlotId id) const;
414
0
    const std::vector<TTupleId>& get_row_tuples() const { return _row_tuples; }
415
416
    // return all registered tuple descriptors
417
44
    std::vector<TupleDescriptor*> get_tuple_descs() const {
418
44
        std::vector<TupleDescriptor*> descs;
419
420
100
        for (auto it : _tuple_desc_map) {
421
100
            descs.push_back(it.second);
422
100
        }
423
424
44
        return descs;
425
44
    }
426
427
    std::string debug_string() const;
428
429
private:
430
    using TableDescriptorMap = std::unordered_map<TableId, TableDescriptor*>;
431
    using TupleDescriptorMap = std::unordered_map<TupleId, TupleDescriptor*>;
432
    using SlotDescriptorMap = std::unordered_map<SlotId, SlotDescriptor*>;
433
434
    TableDescriptorMap _tbl_desc_map;
435
    TupleDescriptorMap _tuple_desc_map;
436
    SlotDescriptorMap _slot_desc_map;
437
    std::vector<TTupleId> _row_tuples;
438
439
#ifndef BE_TEST
440
    DescriptorTbl() = default;
441
#endif
442
};
443
444
#define RETURN_IF_INVALID_TUPLE_IDX(tuple_id, tuple_idx)                                         \
445
0
    do {                                                                                         \
446
0
        if (UNLIKELY(RowDescriptor::INVALID_IDX == tuple_idx)) {                                 \
447
0
            return Status::InternalError("failed to get tuple idx with tuple id: {}", tuple_id); \
448
0
        }                                                                                        \
449
0
    } while (false)
450
451
// Records positions of tuples within row produced by ExecNode.
452
// TODO: this needs to differentiate between tuples contained in row
453
// and tuples produced by ExecNode (parallel to PlanNode.rowTupleIds and
454
// PlanNode.tupleIds); right now, we conflate the two (and distinguish based on
455
// context; for instance, HdfsScanNode uses these tids to create row batches, ie, the
456
// first case, whereas TopNNode uses these tids to copy output rows, ie, the second
457
// case)
458
class RowDescriptor {
459
public:
460
    RowDescriptor(const DescriptorTbl& desc_tbl, const std::vector<TTupleId>& row_tuples);
461
462
    // standard copy c'tor, made explicit here
463
    RowDescriptor(const RowDescriptor& desc)
464
0
            : _tuple_desc_map(desc._tuple_desc_map),
465
0
              _tuple_idx_map(desc._tuple_idx_map),
466
0
              _has_varlen_slots(desc._has_varlen_slots) {
467
0
        auto it = desc._tuple_desc_map.begin();
468
0
        for (; it != desc._tuple_desc_map.end(); ++it) {
469
0
            _num_materialized_slots += (*it)->num_materialized_slots();
470
0
            _num_slots += (*it)->slots().size();
471
0
        }
472
0
    }
473
474
    RowDescriptor(TupleDescriptor* tuple_desc);
475
476
    RowDescriptor(const RowDescriptor& lhs_row_desc, const RowDescriptor& rhs_row_desc);
477
478
    // dummy descriptor, needed for the JNI EvalPredicate() function
479
145k
    RowDescriptor() = default;
480
481
    MOCK_DEFINE(virtual ~RowDescriptor() = default;)
482
483
914k
    int num_materialized_slots() const { return _num_materialized_slots; }
484
485
3
    int num_slots() const { return _num_slots; }
486
487
    static const int INVALID_IDX;
488
489
    // Returns INVALID_IDX if id not part of this row.
490
    int get_tuple_idx(TupleId id) const;
491
492
    // Return true if any Tuple has variable length slots.
493
0
    bool has_varlen_slots() const { return _has_varlen_slots; }
494
495
    // Return descriptors for all tuples in this row, in order of appearance.
496
360k
    MOCK_FUNCTION const std::vector<TupleDescriptor*>& tuple_descriptors() const {
497
360k
        return _tuple_desc_map;
498
360k
    }
499
500
    // Populate row_tuple_ids with our ids.
501
    void to_thrift(std::vector<TTupleId>* row_tuple_ids);
502
    void to_protobuf(google::protobuf::RepeatedField<google::protobuf::int32>* row_tuple_ids) const;
503
504
    // Return true if the tuple ids of this descriptor are a prefix
505
    // of the tuple ids of other_desc.
506
    bool is_prefix_of(const RowDescriptor& other_desc) const;
507
508
    // Return true if the tuple ids of this descriptor match tuple ids of other desc.
509
    bool equals(const RowDescriptor& other_desc) const;
510
511
    std::string debug_string() const;
512
513
    int get_column_id(int slot_id) const;
514
515
private:
516
    // Initializes tupleIdxMap during c'tor using the _tuple_desc_map.
517
    void init_tuple_idx_map();
518
519
    // Initializes _has_varlen_slots during c'tor using the _tuple_desc_map.
520
    void init_has_varlen_slots();
521
522
    // map from position of tuple w/in row to its descriptor
523
    std::vector<TupleDescriptor*> _tuple_desc_map;
524
525
    // map from TupleId to position of tuple w/in row
526
    std::vector<int> _tuple_idx_map;
527
528
    // Provide quick way to check if there are variable length slots.
529
    bool _has_varlen_slots = false;
530
531
    int _num_materialized_slots = 0;
532
    int _num_slots = 0;
533
};
534
#include "common/compile_check_end.h"
535
} // namespace doris