Coverage Report

Created: 2024-11-21 13:02

/root/doris/be/src/runtime/descriptors.h
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
// This file is copied from
18
// https://github.com/apache/impala/blob/branch-2.9.0/be/src/runtime/descriptors.h
19
// and modified by Doris
20
21
#pragma once
22
23
#include <gen_cpp/Descriptors_types.h>
24
#include <gen_cpp/Types_types.h>
25
#include <glog/logging.h>
26
#include <google/protobuf/stubs/port.h>
27
#include <stdint.h>
28
29
#include <ostream>
30
#include <string>
31
#include <unordered_map>
32
#include <utility>
33
#include <vector>
34
35
#include "common/compiler_util.h" // IWYU pragma: keep
36
#include "common/global_types.h"
37
#include "common/status.h"
38
#include "runtime/define_primitive_type.h"
39
#include "runtime/types.h"
40
#include "vec/data_types/data_type.h"
41
42
namespace google::protobuf {
43
template <typename Element>
44
class RepeatedField;
45
} // namespace google::protobuf
46
47
namespace doris {
48
49
class ObjectPool;
50
class PTupleDescriptor;
51
class PSlotDescriptor;
52
53
class SlotDescriptor {
54
public:
55
    // virtual ~SlotDescriptor() {};
56
1.24k
    SlotId id() const { return _id; }
57
956
    const TypeDescriptor& type() const { return _type; }
58
0
    TupleId parent() const { return _parent; }
59
    // Returns the column index of this slot, including partition keys.
60
    // (e.g., col_pos - num_partition_keys = the table column this slot corresponds to)
61
0
    int col_pos() const { return _col_pos; }
62
    // Returns the field index in the generated llvm struct for this slot's tuple
63
0
    int field_idx() const { return _field_idx; }
64
394
    bool is_materialized() const { return _is_materialized; }
65
529
    bool is_nullable() const { return _is_nullable; }
66
67
438
    const std::string& col_name() const { return _col_name; }
68
0
    const std::string& col_name_lower_case() const { return _col_name_lower_case; }
69
70
    void to_protobuf(PSlotDescriptor* pslot) const;
71
72
    std::string debug_string() const;
73
74
    vectorized::MutableColumnPtr get_empty_mutable_column() const;
75
76
    doris::vectorized::DataTypePtr get_data_type_ptr() const;
77
78
23
    int32_t col_unique_id() const { return _col_unique_id; }
79
80
0
    bool is_key() const { return _is_key; }
81
50
    bool need_materialize() const { return _need_materialize; }
82
0
    const std::vector<std::string>& column_paths() const { return _column_paths; };
83
84
14
    bool is_auto_increment() const { return _is_auto_increment; }
85
86
4
    const std::string& col_default_value() const { return _col_default_value; }
87
206
    PrimitiveType col_type() const { return _col_type; }
88
89
private:
90
    friend class DescriptorTbl;
91
    friend class TupleDescriptor;
92
    friend class SchemaScanner;
93
    friend class OlapTableSchemaParam;
94
    friend class PInternalServiceImpl;
95
    friend class Tablet;
96
    friend class TabletSchema;
97
98
    const SlotId _id;
99
    const TypeDescriptor _type;
100
    const TupleId _parent;
101
    const int _col_pos;
102
    bool _is_nullable;
103
    const std::string _col_name;
104
    const std::string _col_name_lower_case;
105
106
    const int32_t _col_unique_id;
107
    const PrimitiveType _col_type;
108
109
    // the idx of the slot in the tuple descriptor (0-based).
110
    // this is provided by the FE
111
    const int _slot_idx;
112
113
    // the idx of the slot in the llvm codegen'd tuple struct
114
    // this is set by TupleDescriptor during codegen and takes into account
115
    // leading null bytes.
116
    int _field_idx;
117
118
    const bool _is_materialized;
119
120
    const bool _is_key;
121
    const bool _need_materialize;
122
    const std::vector<std::string> _column_paths;
123
124
    const bool _is_auto_increment;
125
    const std::string _col_default_value;
126
127
    SlotDescriptor(const TSlotDescriptor& tdesc);
128
    SlotDescriptor(const PSlotDescriptor& pdesc);
129
};
130
131
// Base class for table descriptors.
132
class TableDescriptor {
133
public:
134
    TableDescriptor(const TTableDescriptor& tdesc);
135
3
    virtual ~TableDescriptor() = default;
136
0
    int num_cols() const { return _num_cols; }
137
0
    int num_clustering_cols() const { return _num_clustering_cols; }
138
    virtual std::string debug_string() const;
139
140
    // The first _num_clustering_cols columns by position are clustering
141
    // columns.
142
0
    bool is_clustering_col(const SlotDescriptor* slot_desc) const {
143
0
        return slot_desc->col_pos() < _num_clustering_cols;
144
0
    }
145
146
0
    ::doris::TTableType::type table_type() const { return _table_type; }
147
0
    const std::string& name() const { return _name; }
148
0
    const std::string& database() const { return _database; }
149
0
    int64_t table_id() const { return _table_id; }
150
151
private:
152
    ::doris::TTableType::type _table_type;
153
    std::string _name;
154
    std::string _database;
155
    int64_t _table_id;
156
    int _num_cols;
157
    int _num_clustering_cols;
158
};
159
160
class OlapTableDescriptor : public TableDescriptor {
161
public:
162
    OlapTableDescriptor(const TTableDescriptor& tdesc);
163
    std::string debug_string() const override;
164
};
165
166
class SchemaTableDescriptor : public TableDescriptor {
167
public:
168
    SchemaTableDescriptor(const TTableDescriptor& tdesc);
169
    ~SchemaTableDescriptor() override;
170
    std::string debug_string() const override;
171
0
    TSchemaTableType::type schema_table_type() const { return _schema_table_type; }
172
173
private:
174
    TSchemaTableType::type _schema_table_type;
175
};
176
177
class BrokerTableDescriptor : public TableDescriptor {
178
public:
179
    BrokerTableDescriptor(const TTableDescriptor& tdesc);
180
    ~BrokerTableDescriptor() override;
181
    std::string debug_string() const override;
182
183
private:
184
};
185
186
class HiveTableDescriptor : public TableDescriptor {
187
public:
188
    HiveTableDescriptor(const TTableDescriptor& tdesc);
189
    ~HiveTableDescriptor() override;
190
    std::string debug_string() const override;
191
192
private:
193
};
194
195
class IcebergTableDescriptor : public TableDescriptor {
196
public:
197
    IcebergTableDescriptor(const TTableDescriptor& tdesc);
198
    ~IcebergTableDescriptor() override;
199
    std::string debug_string() const override;
200
201
private:
202
};
203
204
class MaxComputeTableDescriptor : public TableDescriptor {
205
public:
206
    MaxComputeTableDescriptor(const TTableDescriptor& tdesc);
207
    ~MaxComputeTableDescriptor() override;
208
    std::string debug_string() const override;
209
0
    std::string region() const { return _region; }
210
0
    std::string project() const { return _project; }
211
0
    std::string table() const { return _table; }
212
0
    std::string odps_url() const { return _odps_url; }
213
0
    std::string tunnel_url() const { return _tunnel_url; }
214
0
    std::string access_key() const { return _access_key; }
215
0
    std::string secret_key() const { return _secret_key; }
216
0
    std::string public_access() const { return _public_access; }
217
0
    std::string endpoint() const { return _endpoint; }
218
0
    std::string quota() const { return _quota; }
219
0
    Status init_status() const { return _init_status; }
220
221
private:
222
    std::string _region; //deprecated
223
    std::string _project;
224
    std::string _table;
225
    std::string _odps_url;   //deprecated
226
    std::string _tunnel_url; //deprecated
227
    std::string _access_key;
228
    std::string _secret_key;
229
    std::string _public_access; //deprecated
230
    std::string _endpoint;
231
    std::string _quota;
232
    Status _init_status = Status::OK();
233
};
234
235
class EsTableDescriptor : public TableDescriptor {
236
public:
237
    EsTableDescriptor(const TTableDescriptor& tdesc);
238
    ~EsTableDescriptor() override;
239
    std::string debug_string() const override;
240
241
private:
242
};
243
244
class MySQLTableDescriptor : public TableDescriptor {
245
public:
246
    MySQLTableDescriptor(const TTableDescriptor& tdesc);
247
    std::string debug_string() const override;
248
0
    std::string mysql_db() const { return _mysql_db; }
249
0
    std::string mysql_table() const { return _mysql_table; }
250
0
    std::string host() const { return _host; }
251
0
    std::string port() const { return _port; }
252
0
    std::string user() const { return _user; }
253
0
    std::string passwd() const { return _passwd; }
254
0
    std::string charset() const { return _charset; }
255
256
private:
257
    std::string _mysql_db;
258
    std::string _mysql_table;
259
    std::string _host;
260
    std::string _port;
261
    std::string _user;
262
    std::string _passwd;
263
    std::string _charset;
264
};
265
266
class ODBCTableDescriptor : public TableDescriptor {
267
public:
268
    ODBCTableDescriptor(const TTableDescriptor& tdesc);
269
    std::string debug_string() const override;
270
0
    std::string db() const { return _db; }
271
0
    std::string table() const { return _table; }
272
0
    std::string host() const { return _host; }
273
0
    std::string port() const { return _port; }
274
0
    std::string user() const { return _user; }
275
0
    std::string passwd() const { return _passwd; }
276
0
    std::string driver() const { return _driver; }
277
0
    TOdbcTableType::type type() const { return _type; }
278
279
private:
280
    std::string _db;
281
    std::string _table;
282
    std::string _host;
283
    std::string _port;
284
    std::string _user;
285
    std::string _passwd;
286
    std::string _driver;
287
    TOdbcTableType::type _type;
288
};
289
290
class JdbcTableDescriptor : public TableDescriptor {
291
public:
292
    JdbcTableDescriptor(const TTableDescriptor& tdesc);
293
    std::string debug_string() const override;
294
0
    int64_t jdbc_catalog_id() const { return _jdbc_catalog_id; }
295
0
    const std::string& jdbc_resource_name() const { return _jdbc_resource_name; }
296
0
    const std::string& jdbc_driver_url() const { return _jdbc_driver_url; }
297
0
    const std::string& jdbc_driver_class() const { return _jdbc_driver_class; }
298
0
    const std::string& jdbc_driver_checksum() const { return _jdbc_driver_checksum; }
299
0
    const std::string& jdbc_url() const { return _jdbc_url; }
300
0
    const std::string& jdbc_table_name() const { return _jdbc_table_name; }
301
0
    const std::string& jdbc_user() const { return _jdbc_user; }
302
0
    const std::string& jdbc_passwd() const { return _jdbc_passwd; }
303
0
    int32_t connection_pool_min_size() const { return _connection_pool_min_size; }
304
0
    int32_t connection_pool_max_size() const { return _connection_pool_max_size; }
305
0
    int32_t connection_pool_max_wait_time() const { return _connection_pool_max_wait_time; }
306
0
    int32_t connection_pool_max_life_time() const { return _connection_pool_max_life_time; }
307
0
    bool connection_pool_keep_alive() const { return _connection_pool_keep_alive; }
308
309
private:
310
    int64_t _jdbc_catalog_id;
311
    std::string _jdbc_resource_name;
312
    std::string _jdbc_driver_url;
313
    std::string _jdbc_driver_class;
314
    std::string _jdbc_driver_checksum;
315
    std::string _jdbc_url;
316
    std::string _jdbc_table_name;
317
    std::string _jdbc_user;
318
    std::string _jdbc_passwd;
319
    int32_t _connection_pool_min_size;
320
    int32_t _connection_pool_max_size;
321
    int32_t _connection_pool_max_wait_time;
322
    int32_t _connection_pool_max_life_time;
323
    bool _connection_pool_keep_alive;
324
};
325
326
class TupleDescriptor {
327
public:
328
    TupleDescriptor(TupleDescriptor&&) = delete;
329
    void operator=(const TupleDescriptor&) = delete;
330
331
76
    ~TupleDescriptor() {
332
76
        if (_own_slots) {
333
28
            for (SlotDescriptor* slot : _slots) {
334
28
                delete slot;
335
28
            }
336
7
        }
337
76
    }
338
10
    int num_materialized_slots() const { return _num_materialized_slots; }
339
407
    const std::vector<SlotDescriptor*>& slots() const { return _slots; }
340
341
43
    bool has_varlen_slots() const { return _has_varlen_slots; }
342
0
    const TableDescriptor* table_desc() const { return _table_desc; }
343
344
89
    TupleId id() const { return _id; }
345
346
    std::string debug_string() const;
347
348
    void to_protobuf(PTupleDescriptor* ptuple) const;
349
350
private:
351
    friend class DescriptorTbl;
352
    friend class SchemaScanner;
353
    friend class OlapTableSchemaParam;
354
    friend class PInternalServiceImpl;
355
    friend class TabletSchema;
356
357
    const TupleId _id;
358
    TableDescriptor* _table_desc = nullptr;
359
    int _num_materialized_slots;
360
    std::vector<SlotDescriptor*> _slots; // contains all slots
361
362
    // Provide quick way to check if there are variable length slots.
363
    // True if _string_slots or _collection_slots have entries.
364
    bool _has_varlen_slots;
365
    bool _own_slots = false;
366
367
    TupleDescriptor(const TTupleDescriptor& tdesc, bool own_slot = false);
368
    TupleDescriptor(const PTupleDescriptor& tdesc, bool own_slot = false);
369
370
    void add_slot(SlotDescriptor* slot);
371
};
372
373
class DescriptorTbl {
374
public:
375
    // Creates a descriptor tbl within 'pool' from thrift_tbl and returns it via 'tbl'.
376
    // Returns OK on success, otherwise error (in which case 'tbl' will be unset).
377
    static Status create(ObjectPool* pool, const TDescriptorTable& thrift_tbl, DescriptorTbl** tbl);
378
379
    TableDescriptor* get_table_descriptor(TableId id) const;
380
    TupleDescriptor* get_tuple_descriptor(TupleId id) const;
381
    SlotDescriptor* get_slot_descriptor(SlotId id) const;
382
0
    const std::vector<TTupleId>& get_row_tuples() const { return _row_tuples; }
383
384
    // return all registered tuple descriptors
385
0
    std::vector<TupleDescriptor*> get_tuple_descs() const {
386
0
        std::vector<TupleDescriptor*> descs;
387
0
388
0
        for (auto it : _tuple_desc_map) {
389
0
            descs.push_back(it.second);
390
0
        }
391
0
392
0
        return descs;
393
0
    }
394
395
    std::string debug_string() const;
396
397
private:
398
    using TableDescriptorMap = std::unordered_map<TableId, TableDescriptor*>;
399
    using TupleDescriptorMap = std::unordered_map<TupleId, TupleDescriptor*>;
400
    using SlotDescriptorMap = std::unordered_map<SlotId, SlotDescriptor*>;
401
402
    TableDescriptorMap _tbl_desc_map;
403
    TupleDescriptorMap _tuple_desc_map;
404
    SlotDescriptorMap _slot_desc_map;
405
    std::vector<TTupleId> _row_tuples;
406
407
28
    DescriptorTbl() = default;
408
};
409
410
#define RETURN_IF_INVALID_TUPLE_IDX(tuple_id, tuple_idx)                                         \
411
0
    do {                                                                                         \
412
0
        if (UNLIKELY(RowDescriptor::INVALID_IDX == tuple_idx)) {                                 \
413
0
            return Status::InternalError("failed to get tuple idx with tuple id: {}", tuple_id); \
414
0
        }                                                                                        \
415
0
    } while (false)
416
417
// Records positions of tuples within row produced by ExecNode.
418
// TODO: this needs to differentiate between tuples contained in row
419
// and tuples produced by ExecNode (parallel to PlanNode.rowTupleIds and
420
// PlanNode.tupleIds); right now, we conflate the two (and distinguish based on
421
// context; for instance, HdfsScanNode uses these tids to create row batches, ie, the
422
// first case, whereas TopNNode uses these tids to copy output rows, ie, the second
423
// case)
424
class RowDescriptor {
425
public:
426
    RowDescriptor(const DescriptorTbl& desc_tbl, const std::vector<TTupleId>& row_tuples,
427
                  const std::vector<bool>& nullable_tuples);
428
429
    // standard copy c'tor, made explicit here
430
    RowDescriptor(const RowDescriptor& desc)
431
            : _tuple_desc_map(desc._tuple_desc_map),
432
              _tuple_idx_nullable_map(desc._tuple_idx_nullable_map),
433
              _tuple_idx_map(desc._tuple_idx_map),
434
0
              _has_varlen_slots(desc._has_varlen_slots) {
435
0
        auto it = desc._tuple_desc_map.begin();
436
0
        for (; it != desc._tuple_desc_map.end(); ++it) {
437
0
            _num_materialized_slots += (*it)->num_materialized_slots();
438
0
            _num_slots += (*it)->slots().size();
439
0
        }
440
0
    }
441
442
    RowDescriptor(TupleDescriptor* tuple_desc, bool is_nullable);
443
444
    RowDescriptor(const RowDescriptor& lhs_row_desc, const RowDescriptor& rhs_row_desc);
445
446
    // dummy descriptor, needed for the JNI EvalPredicate() function
447
8
    RowDescriptor() = default;
448
449
0
    int num_materialized_slots() const { return _num_materialized_slots; }
450
451
0
    int num_slots() const { return _num_slots; }
452
453
    static const int INVALID_IDX;
454
455
    // Returns INVALID_IDX if id not part of this row.
456
    int get_tuple_idx(TupleId id) const;
457
458
    // Return true if any Tuple has variable length slots.
459
0
    bool has_varlen_slots() const { return _has_varlen_slots; }
460
461
    // Return descriptors for all tuples in this row, in order of appearance.
462
3
    const std::vector<TupleDescriptor*>& tuple_descriptors() const { return _tuple_desc_map; }
463
464
    // Populate row_tuple_ids with our ids.
465
    void to_thrift(std::vector<TTupleId>* row_tuple_ids);
466
    void to_protobuf(google::protobuf::RepeatedField<google::protobuf::int32>* row_tuple_ids) const;
467
468
    // Return true if the tuple ids of this descriptor are a prefix
469
    // of the tuple ids of other_desc.
470
    bool is_prefix_of(const RowDescriptor& other_desc) const;
471
472
    // Return true if the tuple ids of this descriptor match tuple ids of other desc.
473
    bool equals(const RowDescriptor& other_desc) const;
474
475
    std::string debug_string() const;
476
477
    int get_column_id(int slot_id, bool force_materialize_slot = false) const;
478
479
private:
480
    // Initializes tupleIdxMap during c'tor using the _tuple_desc_map.
481
    void init_tuple_idx_map();
482
483
    // Initializes _has_varlen_slots during c'tor using the _tuple_desc_map.
484
    void init_has_varlen_slots();
485
486
    // map from position of tuple w/in row to its descriptor
487
    std::vector<TupleDescriptor*> _tuple_desc_map;
488
489
    // _tuple_idx_nullable_map[i] is true if tuple i can be null
490
    std::vector<bool> _tuple_idx_nullable_map;
491
492
    // map from TupleId to position of tuple w/in row
493
    std::vector<int> _tuple_idx_map;
494
495
    // Provide quick way to check if there are variable length slots.
496
    bool _has_varlen_slots = false;
497
498
    int _num_materialized_slots = 0;
499
    int _num_slots = 0;
500
};
501
502
} // namespace doris