/root/doris/be/src/runtime/descriptors.h
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | // This file is copied from |
18 | | // https://github.com/apache/impala/blob/branch-2.9.0/be/src/runtime/descriptors.h |
19 | | // and modified by Doris |
20 | | |
21 | | #pragma once |
22 | | |
23 | | #include <gen_cpp/Descriptors_types.h> |
24 | | #include <gen_cpp/Types_types.h> |
25 | | #include <glog/logging.h> |
26 | | #include <google/protobuf/stubs/port.h> |
27 | | #include <stdint.h> |
28 | | |
29 | | #include <ostream> |
30 | | #include <string> |
31 | | #include <unordered_map> |
32 | | #include <utility> |
33 | | #include <vector> |
34 | | |
35 | | #include "common/be_mock_util.h" |
36 | | #include "common/compiler_util.h" // IWYU pragma: keep |
37 | | #include "common/global_types.h" |
38 | | #include "common/object_pool.h" |
39 | | #include "common/status.h" |
40 | | #include "olap/utils.h" |
41 | | #include "runtime/define_primitive_type.h" |
42 | | #include "runtime/types.h" |
43 | | #include "vec/data_types/data_type.h" |
44 | | namespace google::protobuf { |
45 | | template <typename Element> |
46 | | class RepeatedField; |
47 | | } // namespace google::protobuf |
48 | | |
49 | | namespace doris { |
50 | | |
51 | | class ObjectPool; |
52 | | class PTupleDescriptor; |
53 | | class PSlotDescriptor; |
54 | | |
55 | | class SlotDescriptor { |
56 | | public: |
57 | | MOCK_DEFINE(virtual ~SlotDescriptor() = default;) |
58 | 1.39k | SlotId id() const { return _id; } |
59 | 1.16k | const TypeDescriptor& type() const { return _type; } |
60 | 0 | TupleId parent() const { return _parent; } |
61 | | // Returns the column index of this slot, including partition keys. |
62 | | // (e.g., col_pos - num_partition_keys = the table column this slot corresponds to) |
63 | 0 | int col_pos() const { return _col_pos; } |
64 | | // Returns the field index in the generated llvm struct for this slot's tuple |
65 | 0 | int field_idx() const { return _field_idx; } |
66 | 759 | bool is_materialized() const { return _is_materialized; } |
67 | 421 | bool is_nullable() const { return _is_nullable; } |
68 | | |
69 | 445 | const std::string& col_name() const { return _col_name; } |
70 | 0 | const std::string& col_name_lower_case() const { return _col_name_lower_case; } |
71 | | |
72 | | void to_protobuf(PSlotDescriptor* pslot) const; |
73 | | |
74 | | std::string debug_string() const; |
75 | | |
76 | | vectorized::MutableColumnPtr get_empty_mutable_column() const; |
77 | | |
78 | | MOCK_FUNCTION doris::vectorized::DataTypePtr get_data_type_ptr() const; |
79 | | |
80 | 23 | int32_t col_unique_id() const { return _col_unique_id; } |
81 | | |
82 | 0 | bool is_key() const { return _is_key; } |
83 | 0 | const std::vector<std::string>& column_paths() const { return _column_paths; }; |
84 | | |
85 | 0 | bool is_auto_increment() const { return _is_auto_increment; } |
86 | | |
87 | 0 | bool is_skip_bitmap_col() const { return _col_name == SKIP_BITMAP_COL; } |
88 | 0 | bool is_sequence_col() const { return _col_name == SEQUENCE_COL; } |
89 | | |
90 | 4 | const std::string& col_default_value() const { return _col_default_value; } |
91 | 180 | PrimitiveType col_type() const { return _type.type; } |
92 | | |
93 | | private: |
94 | | friend class DescriptorTbl; |
95 | | friend class TupleDescriptor; |
96 | | friend class SchemaScanner; |
97 | | friend class OlapTableSchemaParam; |
98 | | friend class PInternalServiceImpl; |
99 | | friend class RowIdStorageReader; |
100 | | friend class Tablet; |
101 | | friend class TabletSchema; |
102 | | |
103 | | MOCK_REMOVE(const) SlotId _id; |
104 | | const TypeDescriptor _type; |
105 | | const TupleId _parent; |
106 | | const int _col_pos; |
107 | | bool _is_nullable; |
108 | | const std::string _col_name; |
109 | | const std::string _col_name_lower_case; |
110 | | |
111 | | const int32_t _col_unique_id; |
112 | | |
113 | | // the idx of the slot in the tuple descriptor (0-based). |
114 | | // this is provided by the FE |
115 | | const int _slot_idx; |
116 | | |
117 | | // the idx of the slot in the llvm codegen'd tuple struct |
118 | | // this is set by TupleDescriptor during codegen and takes into account |
119 | | // leading null bytes. |
120 | | int _field_idx; |
121 | | |
122 | | const bool _is_materialized; |
123 | | |
124 | | const bool _is_key; |
125 | | const std::vector<std::string> _column_paths; |
126 | | |
127 | | const bool _is_auto_increment; |
128 | | const std::string _col_default_value; |
129 | | |
130 | | SlotDescriptor(const TSlotDescriptor& tdesc); |
131 | | SlotDescriptor(const PSlotDescriptor& pdesc); |
132 | | MOCK_DEFINE(SlotDescriptor();) |
133 | | }; |
134 | | |
135 | | // Base class for table descriptors. |
136 | | class TableDescriptor { |
137 | | public: |
138 | | TableDescriptor(const TTableDescriptor& tdesc); |
139 | 17 | virtual ~TableDescriptor() = default; |
140 | 0 | int num_cols() const { return _num_cols; } |
141 | 0 | int num_clustering_cols() const { return _num_clustering_cols; } |
142 | | virtual std::string debug_string() const; |
143 | | |
144 | | // The first _num_clustering_cols columns by position are clustering |
145 | | // columns. |
146 | 0 | bool is_clustering_col(const SlotDescriptor* slot_desc) const { |
147 | 0 | return slot_desc->col_pos() < _num_clustering_cols; |
148 | 0 | } |
149 | | |
150 | 0 | ::doris::TTableType::type table_type() const { return _table_type; } |
151 | 0 | const std::string& name() const { return _name; } |
152 | 0 | const std::string& database() const { return _database; } |
153 | 0 | int64_t table_id() const { return _table_id; } |
154 | | |
155 | | private: |
156 | | ::doris::TTableType::type _table_type; |
157 | | std::string _name; |
158 | | std::string _database; |
159 | | int64_t _table_id; |
160 | | int _num_cols; |
161 | | int _num_clustering_cols; |
162 | | }; |
163 | | |
164 | | class OlapTableDescriptor : public TableDescriptor { |
165 | | public: |
166 | | OlapTableDescriptor(const TTableDescriptor& tdesc); |
167 | | std::string debug_string() const override; |
168 | | }; |
169 | | |
170 | | class SchemaTableDescriptor : public TableDescriptor { |
171 | | public: |
172 | | SchemaTableDescriptor(const TTableDescriptor& tdesc); |
173 | | ~SchemaTableDescriptor() override; |
174 | | std::string debug_string() const override; |
175 | 0 | TSchemaTableType::type schema_table_type() const { return _schema_table_type; } |
176 | | |
177 | | private: |
178 | | TSchemaTableType::type _schema_table_type; |
179 | | }; |
180 | | |
181 | | class BrokerTableDescriptor : public TableDescriptor { |
182 | | public: |
183 | | BrokerTableDescriptor(const TTableDescriptor& tdesc); |
184 | | ~BrokerTableDescriptor() override; |
185 | | std::string debug_string() const override; |
186 | | |
187 | | private: |
188 | | }; |
189 | | |
190 | | class HiveTableDescriptor : public TableDescriptor { |
191 | | public: |
192 | | HiveTableDescriptor(const TTableDescriptor& tdesc); |
193 | | ~HiveTableDescriptor() override; |
194 | | std::string debug_string() const override; |
195 | | |
196 | | private: |
197 | | }; |
198 | | |
199 | | class IcebergTableDescriptor : public TableDescriptor { |
200 | | public: |
201 | | IcebergTableDescriptor(const TTableDescriptor& tdesc); |
202 | | ~IcebergTableDescriptor() override; |
203 | | std::string debug_string() const override; |
204 | | |
205 | | private: |
206 | | }; |
207 | | |
208 | | class MaxComputeTableDescriptor : public TableDescriptor { |
209 | | public: |
210 | | MaxComputeTableDescriptor(const TTableDescriptor& tdesc); |
211 | | ~MaxComputeTableDescriptor() override; |
212 | | std::string debug_string() const override; |
213 | 0 | std::string region() const { return _region; } |
214 | 0 | std::string project() const { return _project; } |
215 | 0 | std::string table() const { return _table; } |
216 | 0 | std::string odps_url() const { return _odps_url; } |
217 | 0 | std::string tunnel_url() const { return _tunnel_url; } |
218 | 0 | std::string access_key() const { return _access_key; } |
219 | 0 | std::string secret_key() const { return _secret_key; } |
220 | 0 | std::string public_access() const { return _public_access; } |
221 | 0 | std::string endpoint() const { return _endpoint; } |
222 | 0 | std::string quota() const { return _quota; } |
223 | 0 | Status init_status() const { return _init_status; } |
224 | | |
225 | | private: |
226 | | std::string _region; //deprecated |
227 | | std::string _project; |
228 | | std::string _table; |
229 | | std::string _odps_url; //deprecated |
230 | | std::string _tunnel_url; //deprecated |
231 | | std::string _access_key; |
232 | | std::string _secret_key; |
233 | | std::string _public_access; //deprecated |
234 | | std::string _endpoint; |
235 | | std::string _quota; |
236 | | Status _init_status = Status::OK(); |
237 | | }; |
238 | | |
239 | | class TrinoConnectorTableDescriptor : public TableDescriptor { |
240 | | public: |
241 | | TrinoConnectorTableDescriptor(const TTableDescriptor& tdesc); |
242 | | ~TrinoConnectorTableDescriptor() override; |
243 | | std::string debug_string() const override; |
244 | | |
245 | | private: |
246 | | }; |
247 | | |
248 | | class EsTableDescriptor : public TableDescriptor { |
249 | | public: |
250 | | EsTableDescriptor(const TTableDescriptor& tdesc); |
251 | | ~EsTableDescriptor() override; |
252 | | std::string debug_string() const override; |
253 | | |
254 | | private: |
255 | | }; |
256 | | |
257 | | class MySQLTableDescriptor : public TableDescriptor { |
258 | | public: |
259 | | MySQLTableDescriptor(const TTableDescriptor& tdesc); |
260 | | std::string debug_string() const override; |
261 | 0 | std::string mysql_db() const { return _mysql_db; } |
262 | 0 | std::string mysql_table() const { return _mysql_table; } |
263 | 0 | std::string host() const { return _host; } |
264 | 0 | std::string port() const { return _port; } |
265 | 0 | std::string user() const { return _user; } |
266 | 0 | std::string passwd() const { return _passwd; } |
267 | 0 | std::string charset() const { return _charset; } |
268 | | |
269 | | private: |
270 | | std::string _mysql_db; |
271 | | std::string _mysql_table; |
272 | | std::string _host; |
273 | | std::string _port; |
274 | | std::string _user; |
275 | | std::string _passwd; |
276 | | std::string _charset; |
277 | | }; |
278 | | |
279 | | class ODBCTableDescriptor : public TableDescriptor { |
280 | | public: |
281 | | ODBCTableDescriptor(const TTableDescriptor& tdesc); |
282 | | std::string debug_string() const override; |
283 | 0 | std::string db() const { return _db; } |
284 | 0 | std::string table() const { return _table; } |
285 | 0 | std::string host() const { return _host; } |
286 | 0 | std::string port() const { return _port; } |
287 | 0 | std::string user() const { return _user; } |
288 | 0 | std::string passwd() const { return _passwd; } |
289 | 0 | std::string driver() const { return _driver; } |
290 | 0 | TOdbcTableType::type type() const { return _type; } |
291 | | |
292 | | private: |
293 | | std::string _db; |
294 | | std::string _table; |
295 | | std::string _host; |
296 | | std::string _port; |
297 | | std::string _user; |
298 | | std::string _passwd; |
299 | | std::string _driver; |
300 | | TOdbcTableType::type _type; |
301 | | }; |
302 | | |
303 | | class JdbcTableDescriptor : public TableDescriptor { |
304 | | public: |
305 | | JdbcTableDescriptor(const TTableDescriptor& tdesc); |
306 | | std::string debug_string() const override; |
307 | 0 | int64_t jdbc_catalog_id() const { return _jdbc_catalog_id; } |
308 | 0 | const std::string& jdbc_resource_name() const { return _jdbc_resource_name; } |
309 | 0 | const std::string& jdbc_driver_url() const { return _jdbc_driver_url; } |
310 | 0 | const std::string& jdbc_driver_class() const { return _jdbc_driver_class; } |
311 | 0 | const std::string& jdbc_driver_checksum() const { return _jdbc_driver_checksum; } |
312 | 0 | const std::string& jdbc_url() const { return _jdbc_url; } |
313 | 0 | const std::string& jdbc_table_name() const { return _jdbc_table_name; } |
314 | 0 | const std::string& jdbc_user() const { return _jdbc_user; } |
315 | 0 | const std::string& jdbc_passwd() const { return _jdbc_passwd; } |
316 | 0 | int32_t connection_pool_min_size() const { return _connection_pool_min_size; } |
317 | 0 | int32_t connection_pool_max_size() const { return _connection_pool_max_size; } |
318 | 0 | int32_t connection_pool_max_wait_time() const { return _connection_pool_max_wait_time; } |
319 | 0 | int32_t connection_pool_max_life_time() const { return _connection_pool_max_life_time; } |
320 | 0 | bool connection_pool_keep_alive() const { return _connection_pool_keep_alive; } |
321 | | |
322 | | private: |
323 | | int64_t _jdbc_catalog_id; |
324 | | std::string _jdbc_resource_name; |
325 | | std::string _jdbc_driver_url; |
326 | | std::string _jdbc_driver_class; |
327 | | std::string _jdbc_driver_checksum; |
328 | | std::string _jdbc_url; |
329 | | std::string _jdbc_table_name; |
330 | | std::string _jdbc_user; |
331 | | std::string _jdbc_passwd; |
332 | | int32_t _connection_pool_min_size; |
333 | | int32_t _connection_pool_max_size; |
334 | | int32_t _connection_pool_max_wait_time; |
335 | | int32_t _connection_pool_max_life_time; |
336 | | bool _connection_pool_keep_alive; |
337 | | }; |
338 | | |
339 | | class TupleDescriptor { |
340 | | public: |
341 | | TupleDescriptor(TupleDescriptor&&) = delete; |
342 | | void operator=(const TupleDescriptor&) = delete; |
343 | | |
344 | 200 | MOCK_DEFINE(virtual) ~TupleDescriptor() { |
345 | 200 | if (_own_slots) { |
346 | 13 | for (SlotDescriptor* slot : _slots) { |
347 | 13 | delete slot; |
348 | 13 | } |
349 | 4 | } |
350 | 200 | } |
351 | | |
352 | | MOCK_DEFINE(TupleDescriptor() : _id {0} {};) |
353 | | |
354 | 432 | int num_materialized_slots() const { return _num_materialized_slots; } |
355 | 792 | MOCK_FUNCTION const std::vector<SlotDescriptor*>& slots() const { return _slots; } |
356 | | |
357 | 455 | bool has_varlen_slots() const { return _has_varlen_slots; } |
358 | 0 | const TableDescriptor* table_desc() const { return _table_desc; } |
359 | | |
360 | 915 | TupleId id() const { return _id; } |
361 | | |
362 | | std::string debug_string() const; |
363 | | |
364 | | void to_protobuf(PTupleDescriptor* ptuple) const; |
365 | | |
366 | | private: |
367 | | friend class DescriptorTbl; |
368 | | friend class SchemaScanner; |
369 | | friend class OlapTableSchemaParam; |
370 | | friend class PInternalServiceImpl; |
371 | | friend class RowIdStorageReader; |
372 | | friend class TabletSchema; |
373 | | |
374 | | const TupleId _id; |
375 | | TableDescriptor* _table_desc = nullptr; |
376 | | int _num_materialized_slots; |
377 | | std::vector<SlotDescriptor*> _slots; // contains all slots |
378 | | |
379 | | // Provide quick way to check if there are variable length slots. |
380 | | // True if _string_slots or _collection_slots have entries. |
381 | | bool _has_varlen_slots; |
382 | | bool _own_slots = false; |
383 | | |
384 | | TupleDescriptor(const TTupleDescriptor& tdesc, bool own_slot = false); |
385 | | TupleDescriptor(const PTupleDescriptor& tdesc, bool own_slot = false); |
386 | | |
387 | | void add_slot(SlotDescriptor* slot); |
388 | | }; |
389 | | |
390 | | class DescriptorTbl { |
391 | | public: |
392 | | // Creates a descriptor tbl within 'pool' from thrift_tbl and returns it via 'tbl'. |
393 | | // Returns OK on success, otherwise error (in which case 'tbl' will be unset). |
394 | | static Status create(ObjectPool* pool, const TDescriptorTable& thrift_tbl, DescriptorTbl** tbl); |
395 | | |
396 | | TableDescriptor* get_table_descriptor(TableId id) const; |
397 | | TupleDescriptor* get_tuple_descriptor(TupleId id) const; |
398 | | SlotDescriptor* get_slot_descriptor(SlotId id) const; |
399 | 0 | const std::vector<TTupleId>& get_row_tuples() const { return _row_tuples; } |
400 | | |
401 | | // return all registered tuple descriptors |
402 | 33 | std::vector<TupleDescriptor*> get_tuple_descs() const { |
403 | 33 | std::vector<TupleDescriptor*> descs; |
404 | | |
405 | 66 | for (auto it : _tuple_desc_map) { |
406 | 66 | descs.push_back(it.second); |
407 | 66 | } |
408 | | |
409 | 33 | return descs; |
410 | 33 | } |
411 | | |
412 | | std::string debug_string() const; |
413 | | |
414 | | private: |
415 | | using TableDescriptorMap = std::unordered_map<TableId, TableDescriptor*>; |
416 | | using TupleDescriptorMap = std::unordered_map<TupleId, TupleDescriptor*>; |
417 | | using SlotDescriptorMap = std::unordered_map<SlotId, SlotDescriptor*>; |
418 | | |
419 | | TableDescriptorMap _tbl_desc_map; |
420 | | TupleDescriptorMap _tuple_desc_map; |
421 | | SlotDescriptorMap _slot_desc_map; |
422 | | std::vector<TTupleId> _row_tuples; |
423 | | |
424 | 93 | DescriptorTbl() = default; |
425 | | }; |
426 | | |
427 | | #define RETURN_IF_INVALID_TUPLE_IDX(tuple_id, tuple_idx) \ |
428 | 0 | do { \ |
429 | 0 | if (UNLIKELY(RowDescriptor::INVALID_IDX == tuple_idx)) { \ |
430 | 0 | return Status::InternalError("failed to get tuple idx with tuple id: {}", tuple_id); \ |
431 | 0 | } \ |
432 | 0 | } while (false) |
433 | | |
434 | | // Records positions of tuples within row produced by ExecNode. |
435 | | // TODO: this needs to differentiate between tuples contained in row |
436 | | // and tuples produced by ExecNode (parallel to PlanNode.rowTupleIds and |
437 | | // PlanNode.tupleIds); right now, we conflate the two (and distinguish based on |
438 | | // context; for instance, HdfsScanNode uses these tids to create row batches, ie, the |
439 | | // first case, whereas TopNNode uses these tids to copy output rows, ie, the second |
440 | | // case) |
441 | | class RowDescriptor { |
442 | | public: |
443 | | RowDescriptor(const DescriptorTbl& desc_tbl, const std::vector<TTupleId>& row_tuples, |
444 | | const std::vector<bool>& nullable_tuples); |
445 | | |
446 | | // standard copy c'tor, made explicit here |
447 | | RowDescriptor(const RowDescriptor& desc) |
448 | | : _tuple_desc_map(desc._tuple_desc_map), |
449 | | _tuple_idx_nullable_map(desc._tuple_idx_nullable_map), |
450 | | _tuple_idx_map(desc._tuple_idx_map), |
451 | 0 | _has_varlen_slots(desc._has_varlen_slots) { |
452 | 0 | auto it = desc._tuple_desc_map.begin(); |
453 | 0 | for (; it != desc._tuple_desc_map.end(); ++it) { |
454 | 0 | _num_materialized_slots += (*it)->num_materialized_slots(); |
455 | 0 | _num_slots += (*it)->slots().size(); |
456 | 0 | } |
457 | 0 | } |
458 | | |
459 | | RowDescriptor(TupleDescriptor* tuple_desc, bool is_nullable); |
460 | | |
461 | | RowDescriptor(const RowDescriptor& lhs_row_desc, const RowDescriptor& rhs_row_desc); |
462 | | |
463 | | // dummy descriptor, needed for the JNI EvalPredicate() function |
464 | 155 | RowDescriptor() = default; |
465 | | |
466 | | MOCK_DEFINE(virtual ~RowDescriptor() = default;) |
467 | | |
468 | 28 | int num_materialized_slots() const { return _num_materialized_slots; } |
469 | | |
470 | 2 | int num_slots() const { return _num_slots; } |
471 | | |
472 | | static const int INVALID_IDX; |
473 | | |
474 | | // Returns INVALID_IDX if id not part of this row. |
475 | | int get_tuple_idx(TupleId id) const; |
476 | | |
477 | | // Return true if any Tuple has variable length slots. |
478 | 0 | bool has_varlen_slots() const { return _has_varlen_slots; } |
479 | | |
480 | | // Return descriptors for all tuples in this row, in order of appearance. |
481 | 22 | MOCK_FUNCTION const std::vector<TupleDescriptor*>& tuple_descriptors() const { |
482 | 22 | return _tuple_desc_map; |
483 | 22 | } |
484 | | |
485 | | // Populate row_tuple_ids with our ids. |
486 | | void to_thrift(std::vector<TTupleId>* row_tuple_ids); |
487 | | void to_protobuf(google::protobuf::RepeatedField<google::protobuf::int32>* row_tuple_ids) const; |
488 | | |
489 | | // Return true if the tuple ids of this descriptor are a prefix |
490 | | // of the tuple ids of other_desc. |
491 | | bool is_prefix_of(const RowDescriptor& other_desc) const; |
492 | | |
493 | | // Return true if the tuple ids of this descriptor match tuple ids of other desc. |
494 | | bool equals(const RowDescriptor& other_desc) const; |
495 | | |
496 | | std::string debug_string() const; |
497 | | |
498 | | int get_column_id(int slot_id, bool force_materialize_slot = false) const; |
499 | | |
500 | | private: |
501 | | // Initializes tupleIdxMap during c'tor using the _tuple_desc_map. |
502 | | void init_tuple_idx_map(); |
503 | | |
504 | | // Initializes _has_varlen_slots during c'tor using the _tuple_desc_map. |
505 | | void init_has_varlen_slots(); |
506 | | |
507 | | // map from position of tuple w/in row to its descriptor |
508 | | std::vector<TupleDescriptor*> _tuple_desc_map; |
509 | | |
510 | | // _tuple_idx_nullable_map[i] is true if tuple i can be null |
511 | | std::vector<bool> _tuple_idx_nullable_map; |
512 | | |
513 | | // map from TupleId to position of tuple w/in row |
514 | | std::vector<int> _tuple_idx_map; |
515 | | |
516 | | // Provide quick way to check if there are variable length slots. |
517 | | bool _has_varlen_slots = false; |
518 | | |
519 | | int _num_materialized_slots = 0; |
520 | | int _num_slots = 0; |
521 | | }; |
522 | | } // namespace doris |