Coverage Report

Created: 2025-08-09 13:00

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/root/doris/be/src/runtime/descriptors.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
// This file is copied from
18
// https://github.com/apache/impala/blob/branch-2.9.0/be/src/runtime/descriptors.h
19
// and modified by Doris
20
21
#pragma once
22
23
#include <gen_cpp/Descriptors_types.h>
24
#include <gen_cpp/Exprs_types.h>
25
#include <gen_cpp/Types_types.h>
26
#include <glog/logging.h>
27
#include <google/protobuf/stubs/port.h>
28
#include <stdint.h>
29
30
#include <ostream>
31
#include <string>
32
#include <unordered_map>
33
#include <utility>
34
#include <vector>
35
36
#include "common/be_mock_util.h"
37
#include "common/compiler_util.h" // IWYU pragma: keep
38
#include "common/global_types.h"
39
#include "common/object_pool.h"
40
#include "common/status.h"
41
#include "olap/utils.h"
42
#include "runtime/define_primitive_type.h"
43
#include "runtime/types.h"
44
#include "vec/data_types/data_type.h"
45
namespace google::protobuf {
46
template <typename Element>
47
class RepeatedField;
48
} // namespace google::protobuf
49
50
namespace doris {
51
52
class ObjectPool;
53
class PTupleDescriptor;
54
class PSlotDescriptor;
55
56
class SlotDescriptor {
57
public:
58
    MOCK_DEFINE(virtual ~SlotDescriptor() = default;)
59
657k
    SlotId id() const { return _id; }
60
3.43M
    const vectorized::DataTypePtr type() const { return _type; }
61
0
    TupleId parent() const { return _parent; }
62
    // Returns the column index of this slot, including partition keys.
63
    // (e.g., col_pos - num_partition_keys = the table column this slot corresponds to)
64
0
    int col_pos() const { return _col_pos; }
65
    // Returns the field index in the generated llvm struct for this slot's tuple
66
0
    int field_idx() const { return _field_idx; }
67
1.60M
    bool is_materialized() const { return _is_materialized; }
68
    bool is_nullable() const;
69
    vectorized::DataTypePtr get_data_type_ptr() const;
70
71
894k
    const std::string& col_name() const { return _col_name; }
72
0
    const std::string& col_name_lower_case() const { return _col_name_lower_case; }
73
74
    void to_protobuf(PSlotDescriptor* pslot) const;
75
76
    std::string debug_string() const;
77
78
    vectorized::MutableColumnPtr get_empty_mutable_column() const;
79
80
93.0k
    int32_t col_unique_id() const { return _col_unique_id; }
81
82
1
    bool is_key() const { return _is_key; }
83
0
    const std::vector<std::string>& column_paths() const { return _column_paths; };
84
85
0
    bool is_auto_increment() const { return _is_auto_increment; }
86
87
0
    bool is_skip_bitmap_col() const { return _col_name == SKIP_BITMAP_COL; }
88
0
    bool is_sequence_col() const { return _col_name == SEQUENCE_COL; }
89
90
88.9k
    const std::string& col_default_value() const { return _col_default_value; }
91
    PrimitiveType col_type() const;
92
93
271k
    std::shared_ptr<doris::TExpr> get_virtual_column_expr() const {
94
        // virtual_column_expr need do prepare.
95
271k
        return virtual_column_expr;
96
271k
    }
97
98
private:
99
    friend class DescriptorTbl;
100
    friend class TupleDescriptor;
101
    friend class SchemaScanner;
102
    friend class OlapTableSchemaParam;
103
    friend class PInternalServiceImpl;
104
    friend class RowIdStorageReader;
105
    friend class Tablet;
106
    friend class TabletSchema;
107
108
    MOCK_REMOVE(const) SlotId _id;
109
    MOCK_REMOVE(const) vectorized::DataTypePtr _type;
110
    const TupleId _parent;
111
    const int _col_pos;
112
    MOCK_REMOVE(const) std::string _col_name;
113
    const std::string _col_name_lower_case;
114
115
    const int32_t _col_unique_id;
116
117
    // the idx of the slot in the tuple descriptor (0-based).
118
    // this is provided by the FE
119
    const int _slot_idx;
120
121
    // the idx of the slot in the llvm codegen'd tuple struct
122
    // this is set by TupleDescriptor during codegen and takes into account
123
    // leading null bytes.
124
    int _field_idx;
125
126
    const bool _is_materialized;
127
128
    const bool _is_key;
129
    const std::vector<std::string> _column_paths;
130
131
    const bool _is_auto_increment;
132
    const std::string _col_default_value;
133
134
    std::shared_ptr<doris::TExpr> virtual_column_expr = nullptr;
135
136
    SlotDescriptor(const TSlotDescriptor& tdesc);
137
    SlotDescriptor(const PSlotDescriptor& pdesc);
138
    MOCK_DEFINE(SlotDescriptor();)
139
};
140
141
// Base class for table descriptors.
142
class TableDescriptor {
143
public:
144
    TableDescriptor(const TTableDescriptor& tdesc);
145
27
    virtual ~TableDescriptor() = default;
146
0
    int num_cols() const { return _num_cols; }
147
0
    int num_clustering_cols() const { return _num_clustering_cols; }
148
    virtual std::string debug_string() const;
149
150
    // The first _num_clustering_cols columns by position are clustering
151
    // columns.
152
0
    bool is_clustering_col(const SlotDescriptor* slot_desc) const {
153
0
        return slot_desc->col_pos() < _num_clustering_cols;
154
0
    }
155
156
0
    ::doris::TTableType::type table_type() const { return _table_type; }
157
0
    const std::string& name() const { return _name; }
158
0
    const std::string& database() const { return _database; }
159
0
    int64_t table_id() const { return _table_id; }
160
161
private:
162
    ::doris::TTableType::type _table_type;
163
    std::string _name;
164
    std::string _database;
165
    int64_t _table_id;
166
    int _num_cols;
167
    int _num_clustering_cols;
168
};
169
170
class OlapTableDescriptor : public TableDescriptor {
171
public:
172
    OlapTableDescriptor(const TTableDescriptor& tdesc);
173
    std::string debug_string() const override;
174
};
175
176
class DictionaryTableDescriptor : public TableDescriptor {
177
public:
178
    DictionaryTableDescriptor(const TTableDescriptor& tdesc);
179
    std::string debug_string() const override;
180
};
181
182
class SchemaTableDescriptor : public TableDescriptor {
183
public:
184
    SchemaTableDescriptor(const TTableDescriptor& tdesc);
185
    ~SchemaTableDescriptor() override;
186
    std::string debug_string() const override;
187
0
    TSchemaTableType::type schema_table_type() const { return _schema_table_type; }
188
189
private:
190
    TSchemaTableType::type _schema_table_type;
191
};
192
193
class BrokerTableDescriptor : public TableDescriptor {
194
public:
195
    BrokerTableDescriptor(const TTableDescriptor& tdesc);
196
    ~BrokerTableDescriptor() override;
197
    std::string debug_string() const override;
198
199
private:
200
};
201
202
class HiveTableDescriptor : public TableDescriptor {
203
public:
204
    HiveTableDescriptor(const TTableDescriptor& tdesc);
205
    ~HiveTableDescriptor() override;
206
    std::string debug_string() const override;
207
208
private:
209
};
210
211
class IcebergTableDescriptor : public TableDescriptor {
212
public:
213
    IcebergTableDescriptor(const TTableDescriptor& tdesc);
214
    ~IcebergTableDescriptor() override;
215
    std::string debug_string() const override;
216
217
private:
218
};
219
220
class MaxComputeTableDescriptor : public TableDescriptor {
221
public:
222
    MaxComputeTableDescriptor(const TTableDescriptor& tdesc);
223
    ~MaxComputeTableDescriptor() override;
224
    std::string debug_string() const override;
225
0
    std::string region() const { return _region; }
226
0
    std::string project() const { return _project; }
227
0
    std::string table() const { return _table; }
228
0
    std::string odps_url() const { return _odps_url; }
229
0
    std::string tunnel_url() const { return _tunnel_url; }
230
0
    std::string access_key() const { return _access_key; }
231
0
    std::string secret_key() const { return _secret_key; }
232
0
    std::string public_access() const { return _public_access; }
233
0
    std::string endpoint() const { return _endpoint; }
234
0
    std::string quota() const { return _quota; }
235
0
    Status init_status() const { return _init_status; }
236
237
private:
238
    std::string _region; //deprecated
239
    std::string _project;
240
    std::string _table;
241
    std::string _odps_url;   //deprecated
242
    std::string _tunnel_url; //deprecated
243
    std::string _access_key;
244
    std::string _secret_key;
245
    std::string _public_access; //deprecated
246
    std::string _endpoint;
247
    std::string _quota;
248
    Status _init_status = Status::OK();
249
};
250
251
class TrinoConnectorTableDescriptor : public TableDescriptor {
252
public:
253
    TrinoConnectorTableDescriptor(const TTableDescriptor& tdesc);
254
    ~TrinoConnectorTableDescriptor() override;
255
    std::string debug_string() const override;
256
257
private:
258
};
259
260
class EsTableDescriptor : public TableDescriptor {
261
public:
262
    EsTableDescriptor(const TTableDescriptor& tdesc);
263
    ~EsTableDescriptor() override;
264
    std::string debug_string() const override;
265
266
private:
267
};
268
269
class MySQLTableDescriptor : public TableDescriptor {
270
public:
271
    MySQLTableDescriptor(const TTableDescriptor& tdesc);
272
    std::string debug_string() const override;
273
0
    std::string mysql_db() const { return _mysql_db; }
274
0
    std::string mysql_table() const { return _mysql_table; }
275
0
    std::string host() const { return _host; }
276
0
    std::string port() const { return _port; }
277
0
    std::string user() const { return _user; }
278
0
    std::string passwd() const { return _passwd; }
279
0
    std::string charset() const { return _charset; }
280
281
private:
282
    std::string _mysql_db;
283
    std::string _mysql_table;
284
    std::string _host;
285
    std::string _port;
286
    std::string _user;
287
    std::string _passwd;
288
    std::string _charset;
289
};
290
291
class JdbcTableDescriptor : public TableDescriptor {
292
public:
293
    JdbcTableDescriptor(const TTableDescriptor& tdesc);
294
    std::string debug_string() const override;
295
0
    int64_t jdbc_catalog_id() const { return _jdbc_catalog_id; }
296
0
    const std::string& jdbc_resource_name() const { return _jdbc_resource_name; }
297
0
    const std::string& jdbc_driver_url() const { return _jdbc_driver_url; }
298
0
    const std::string& jdbc_driver_class() const { return _jdbc_driver_class; }
299
0
    const std::string& jdbc_driver_checksum() const { return _jdbc_driver_checksum; }
300
0
    const std::string& jdbc_url() const { return _jdbc_url; }
301
0
    const std::string& jdbc_table_name() const { return _jdbc_table_name; }
302
0
    const std::string& jdbc_user() const { return _jdbc_user; }
303
0
    const std::string& jdbc_passwd() const { return _jdbc_passwd; }
304
0
    int32_t connection_pool_min_size() const { return _connection_pool_min_size; }
305
0
    int32_t connection_pool_max_size() const { return _connection_pool_max_size; }
306
0
    int32_t connection_pool_max_wait_time() const { return _connection_pool_max_wait_time; }
307
0
    int32_t connection_pool_max_life_time() const { return _connection_pool_max_life_time; }
308
0
    bool connection_pool_keep_alive() const { return _connection_pool_keep_alive; }
309
310
private:
311
    int64_t _jdbc_catalog_id;
312
    std::string _jdbc_resource_name;
313
    std::string _jdbc_driver_url;
314
    std::string _jdbc_driver_class;
315
    std::string _jdbc_driver_checksum;
316
    std::string _jdbc_url;
317
    std::string _jdbc_table_name;
318
    std::string _jdbc_user;
319
    std::string _jdbc_passwd;
320
    int32_t _connection_pool_min_size;
321
    int32_t _connection_pool_max_size;
322
    int32_t _connection_pool_max_wait_time;
323
    int32_t _connection_pool_max_life_time;
324
    bool _connection_pool_keep_alive;
325
};
326
327
class TupleDescriptor {
328
public:
329
    TupleDescriptor(TupleDescriptor&&) = delete;
330
    void operator=(const TupleDescriptor&) = delete;
331
332
292k
    MOCK_DEFINE(virtual) ~TupleDescriptor() {
333
292k
        if (_own_slots) {
334
13
            for (SlotDescriptor* slot : _slots) {
335
13
                delete slot;
336
13
            }
337
4
        }
338
292k
    }
339
340
    MOCK_DEFINE(TupleDescriptor() : _id {0} {};)
341
342
504k
    int num_materialized_slots() const { return _num_materialized_slots; }
343
1.32M
    MOCK_FUNCTION const std::vector<SlotDescriptor*>& slots() const { return _slots; }
344
345
487k
    bool has_varlen_slots() const { return _has_varlen_slots; }
346
0
    const TableDescriptor* table_desc() const { return _table_desc; }
347
348
1.01M
    TupleId id() const { return _id; }
349
350
    std::string debug_string() const;
351
352
    void to_protobuf(PTupleDescriptor* ptuple) const;
353
354
private:
355
    friend class DescriptorTbl;
356
    friend class SchemaScanner;
357
    friend class OlapTableSchemaParam;
358
    friend class PInternalServiceImpl;
359
    friend class RowIdStorageReader;
360
    friend class TabletSchema;
361
362
    const TupleId _id;
363
    TableDescriptor* _table_desc = nullptr;
364
    int _num_materialized_slots;
365
    std::vector<SlotDescriptor*> _slots; // contains all slots
366
367
    // Provide quick way to check if there are variable length slots.
368
    // True if _string_slots or _collection_slots have entries.
369
    bool _has_varlen_slots;
370
    bool _own_slots = false;
371
372
    TupleDescriptor(const TTupleDescriptor& tdesc, bool own_slot = false);
373
    TupleDescriptor(const PTupleDescriptor& tdesc, bool own_slot = false);
374
375
    void add_slot(SlotDescriptor* slot);
376
};
377
378
class DescriptorTbl {
379
public:
380
    // Creates a descriptor tbl within 'pool' from thrift_tbl and returns it via 'tbl'.
381
    // Returns OK on success, otherwise error (in which case 'tbl' will be unset).
382
    static Status create(ObjectPool* pool, const TDescriptorTable& thrift_tbl, DescriptorTbl** tbl);
383
384
    TableDescriptor* get_table_descriptor(TableId id) const;
385
    TupleDescriptor* get_tuple_descriptor(TupleId id) const;
386
    SlotDescriptor* get_slot_descriptor(SlotId id) const;
387
0
    const std::vector<TTupleId>& get_row_tuples() const { return _row_tuples; }
388
389
    // return all registered tuple descriptors
390
47
    std::vector<TupleDescriptor*> get_tuple_descs() const {
391
47
        std::vector<TupleDescriptor*> descs;
392
393
104
        for (auto it : _tuple_desc_map) {
394
104
            descs.push_back(it.second);
395
104
        }
396
397
47
        return descs;
398
47
    }
399
400
    std::string debug_string() const;
401
402
private:
403
    using TableDescriptorMap = std::unordered_map<TableId, TableDescriptor*>;
404
    using TupleDescriptorMap = std::unordered_map<TupleId, TupleDescriptor*>;
405
    using SlotDescriptorMap = std::unordered_map<SlotId, SlotDescriptor*>;
406
407
    TableDescriptorMap _tbl_desc_map;
408
    TupleDescriptorMap _tuple_desc_map;
409
    SlotDescriptorMap _slot_desc_map;
410
    std::vector<TTupleId> _row_tuples;
411
412
76.2k
    DescriptorTbl() = default;
413
};
414
415
#define RETURN_IF_INVALID_TUPLE_IDX(tuple_id, tuple_idx)                                         \
416
0
    do {                                                                                         \
417
0
        if (UNLIKELY(RowDescriptor::INVALID_IDX == tuple_idx)) {                                 \
418
0
            return Status::InternalError("failed to get tuple idx with tuple id: {}", tuple_id); \
419
0
        }                                                                                        \
420
0
    } while (false)
421
422
// Records positions of tuples within row produced by ExecNode.
423
// TODO: this needs to differentiate between tuples contained in row
424
// and tuples produced by ExecNode (parallel to PlanNode.rowTupleIds and
425
// PlanNode.tupleIds); right now, we conflate the two (and distinguish based on
426
// context; for instance, HdfsScanNode uses these tids to create row batches, ie, the
427
// first case, whereas TopNNode uses these tids to copy output rows, ie, the second
428
// case)
429
class RowDescriptor {
430
public:
431
    RowDescriptor(const DescriptorTbl& desc_tbl, const std::vector<TTupleId>& row_tuples,
432
                  const std::vector<bool>& nullable_tuples);
433
434
    // standard copy c'tor, made explicit here
435
    RowDescriptor(const RowDescriptor& desc)
436
0
            : _tuple_desc_map(desc._tuple_desc_map),
437
0
              _tuple_idx_nullable_map(desc._tuple_idx_nullable_map),
438
0
              _tuple_idx_map(desc._tuple_idx_map),
439
0
              _has_varlen_slots(desc._has_varlen_slots) {
440
0
        auto it = desc._tuple_desc_map.begin();
441
0
        for (; it != desc._tuple_desc_map.end(); ++it) {
442
0
            _num_materialized_slots += (*it)->num_materialized_slots();
443
0
            _num_slots += (*it)->slots().size();
444
0
        }
445
0
    }
446
447
    RowDescriptor(TupleDescriptor* tuple_desc, bool is_nullable);
448
449
    RowDescriptor(const RowDescriptor& lhs_row_desc, const RowDescriptor& rhs_row_desc);
450
451
    // dummy descriptor, needed for the JNI EvalPredicate() function
452
144k
    RowDescriptor() = default;
453
454
    MOCK_DEFINE(virtual ~RowDescriptor() = default;)
455
456
875k
    int num_materialized_slots() const { return _num_materialized_slots; }
457
458
2
    int num_slots() const { return _num_slots; }
459
460
    static const int INVALID_IDX;
461
462
    // Returns INVALID_IDX if id not part of this row.
463
    int get_tuple_idx(TupleId id) const;
464
465
    // Return true if any Tuple has variable length slots.
466
0
    bool has_varlen_slots() const { return _has_varlen_slots; }
467
468
    // Return descriptors for all tuples in this row, in order of appearance.
469
360k
    MOCK_FUNCTION const std::vector<TupleDescriptor*>& tuple_descriptors() const {
470
360k
        return _tuple_desc_map;
471
360k
    }
472
473
    // Populate row_tuple_ids with our ids.
474
    void to_thrift(std::vector<TTupleId>* row_tuple_ids);
475
    void to_protobuf(google::protobuf::RepeatedField<google::protobuf::int32>* row_tuple_ids) const;
476
477
    // Return true if the tuple ids of this descriptor are a prefix
478
    // of the tuple ids of other_desc.
479
    bool is_prefix_of(const RowDescriptor& other_desc) const;
480
481
    // Return true if the tuple ids of this descriptor match tuple ids of other desc.
482
    bool equals(const RowDescriptor& other_desc) const;
483
484
    std::string debug_string() const;
485
486
    int get_column_id(int slot_id, bool force_materialize_slot = false) const;
487
488
private:
489
    // Initializes tupleIdxMap during c'tor using the _tuple_desc_map.
490
    void init_tuple_idx_map();
491
492
    // Initializes _has_varlen_slots during c'tor using the _tuple_desc_map.
493
    void init_has_varlen_slots();
494
495
    // map from position of tuple w/in row to its descriptor
496
    std::vector<TupleDescriptor*> _tuple_desc_map;
497
498
    // _tuple_idx_nullable_map[i] is true if tuple i can be null
499
    std::vector<bool> _tuple_idx_nullable_map;
500
501
    // map from TupleId to position of tuple w/in row
502
    std::vector<int> _tuple_idx_map;
503
504
    // Provide quick way to check if there are variable length slots.
505
    bool _has_varlen_slots = false;
506
507
    int _num_materialized_slots = 0;
508
    int _num_slots = 0;
509
};
510
} // namespace doris