/root/doris/be/src/olap/field.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #pragma once |
19 | | |
20 | | #include <cstddef> |
21 | | #include <sstream> |
22 | | #include <string> |
23 | | |
24 | | #include "olap/key_coder.h" |
25 | | #include "olap/olap_common.h" |
26 | | #include "olap/olap_define.h" |
27 | | #include "olap/row_cursor_cell.h" |
28 | | #include "olap/tablet_schema.h" |
29 | | #include "olap/types.h" |
30 | | #include "olap/utils.h" |
31 | | #include "runtime/collection_value.h" |
32 | | #include "runtime/map_value.h" |
33 | | #include "util/hash_util.hpp" |
34 | | #include "util/slice.h" |
35 | | #include "vec/common/arena.h" |
36 | | #include "vec/json/path_in_data.h" |
37 | | |
38 | | namespace doris { |
39 | | #include "common/compile_check_begin.h" |
40 | | // A Field is used to represent a column in memory format. |
41 | | // User can use this class to access or deal with column data in memory. |
42 | | class Field { |
43 | | public: |
44 | | Field(const TabletColumn& column) |
45 | 100M | : _type_info(get_type_info(&column)), |
46 | 100M | _desc(column), |
47 | 100M | _length(column.length()), |
48 | 100M | _key_coder(get_key_coder(column.type())), |
49 | 100M | _name(column.name()), |
50 | 100M | _index_size(column.index_length()), |
51 | 100M | _is_nullable(column.is_nullable()), |
52 | 100M | _unique_id(column.unique_id()), |
53 | 100M | _parent_unique_id(column.parent_unique_id()), |
54 | 100M | _is_extracted_column(column.is_extracted_column()), |
55 | 100M | _path(column.path_info_ptr()) {} |
56 | | |
57 | 101M | virtual ~Field() = default; |
58 | | |
59 | 97.4M | size_t size() const { return _type_info->size(); } |
60 | 0 | size_t length() const { return _length; } |
61 | 0 | size_t field_size() const { return size() + 1; } |
62 | 0 | size_t index_size() const { return _index_size; } |
63 | 63.5M | int32_t unique_id() const { return _unique_id; } |
64 | 62.5k | int32_t parent_unique_id() const { return _parent_unique_id; } |
65 | 55.2M | bool is_extracted_column() const { return _is_extracted_column; } |
66 | 98.0M | const std::string& name() const { return _name; } |
67 | 0 | const vectorized::PathInDataPtr& path() const { return _path; } |
68 | | |
69 | 1.48M | virtual void set_to_max(char* buf) const { return _type_info->set_to_max(buf); } |
70 | 1.48M | |
71 | | virtual void set_to_min(char* buf) const { return _type_info->set_to_min(buf); } |
72 | 1.76M | |
73 | 1.76M | void set_long_text_buf(char** buf) { _long_text_buf = buf; } |
74 | | |
75 | 0 | virtual size_t get_variable_len() const { return 0; } |
76 | | |
77 | | virtual Field* clone() const { |
78 | | auto* local = new Field(_desc); |
79 | 1.95M | this->clone(local); |
80 | 1.95M | return local; |
81 | 1.95M | } |
82 | | |
83 | 1.95M | // Only compare column content, without considering nullptr condition. |
84 | 1.95M | // RETURNS: |
85 | 1.95M | // 0 means equal, |
86 | | // -1 means left less than right, |
87 | 7.82M | // 1 means left bigger than right |
88 | | int compare(const void* left, const void* right) const { return _type_info->cmp(left, right); } |
89 | 1.00M | |
90 | | // Compare two types of cell. |
91 | 2.32M | // This function differs compare in that this function compare cell which |
92 | 2.32M | // will consider the condition which cell may be nullptr. While compare only |
93 | 2.32M | // compare column content without considering nullptr condition. |
94 | 2.32M | // Only compare column content, without considering nullptr condition. |
95 | 2.32M | // RETURNS: |
96 | | // 0 means equal, |
97 | | // -1 means left less than right, |
98 | | // 1 means left bigger than right |
99 | | template <typename LhsCellType, typename RhsCellType> |
100 | | int compare_cell(const LhsCellType& lhs, const RhsCellType& rhs) const { |
101 | | bool l_null = lhs.is_null(); |
102 | 1.65M | bool r_null = rhs.is_null(); |
103 | | if (l_null != r_null) { |
104 | | return l_null ? -1 : 1; |
105 | | } |
106 | | return l_null ? 0 : _type_info->cmp(lhs.cell_ptr(), rhs.cell_ptr()); |
107 | | } |
108 | | |
109 | | // deep copy source cell' content to destination cell. |
110 | | // For string type, this will allocate data form arena, |
111 | | // and copy source's content. |
112 | | template <typename DstCellType, typename SrcCellType> |
113 | | void deep_copy(DstCellType* dst, const SrcCellType& src, vectorized::Arena& arena) const { |
114 | 3.89M | bool is_null = src.is_null(); |
115 | 3.89M | dst->set_is_null(is_null); |
116 | 3.89M | if (is_null) { |
117 | 3.89M | return; |
118 | 18.4E | } |
119 | 220k | _type_info->deep_copy(dst->mutable_cell_ptr(), src.cell_ptr(), arena); |
120 | 3.67M | } |
121 | 3.89M | |
122 | | // used by init scan key stored in string format |
123 | | // value_string should end with '\0' |
124 | | Status from_string(char* buf, const std::string& value_string, const int precision = 0, |
125 | | const int scale = 0) const { |
126 | | if (type() == FieldType::OLAP_FIELD_TYPE_STRING && !value_string.empty()) { |
127 | | auto slice = reinterpret_cast<Slice*>(buf); |
128 | | if (slice->size < value_string.size()) { |
129 | | *_long_text_buf = static_cast<char*>(realloc(*_long_text_buf, value_string.size())); |
130 | | slice->data = *_long_text_buf; |
131 | | slice->size = value_string.size(); |
132 | | } |
133 | | } |
134 | | return _type_info->from_string(buf, value_string, precision, scale); |
135 | | } |
136 | | |
137 | | FieldType type() const { return _type_info->type(); } |
138 | | const TypeInfo* type_info() const { return _type_info.get(); } |
139 | | bool is_nullable() const { return _is_nullable; } |
140 | | |
141 | | // similar to `full_encode_ascending`, but only encode part (the first `index_size` bytes) of the value. |
142 | | // only applicable to string type |
143 | | void encode_ascending(const void* value, std::string* buf) const { |
144 | | _key_coder->encode_ascending(value, _index_size, buf); |
145 | | } |
146 | | |
147 | | // encode the provided `value` into `buf`. |
148 | | void full_encode_ascending(const void* value, std::string* buf) const { |
149 | | _key_coder->full_encode_ascending(value, buf); |
150 | | } |
151 | | void add_sub_field(std::unique_ptr<Field> sub_field) { |
152 | | _sub_fields.emplace_back(std::move(sub_field)); |
153 | | } |
154 | | Field* get_sub_field(size_t i) const { return _sub_fields[i].get(); } |
155 | | size_t get_sub_field_count() const { return _sub_fields.size(); } |
156 | | |
157 | | void set_precision(int32_t precision) { _precision = precision; } |
158 | | void set_scale(int32_t scale) { _scale = scale; } |
159 | | int32_t get_precision() const { return _precision; } |
160 | | int32_t get_scale() const { return _scale; } |
161 | | const TabletColumn& get_desc() const { return _desc; } |
162 | 7.55M | |
163 | 7.55M | int32_t get_unique_id() const { |
164 | 0 | return is_extracted_column() ? parent_unique_id() : unique_id(); |
165 | 0 | } |
166 | 0 |
|
167 | 0 | protected: |
168 | 0 | TypeInfoPtr _type_info; |
169 | 0 | TabletColumn _desc; |
170 | 0 | // unit : byte |
171 | 7.55M | // except for strings, other types have fixed lengths |
172 | 7.55M | // Note that, the struct type itself has fixed length, but due to |
173 | | // its number of subfields is a variable, so the actual length of |
174 | | // a struct field is not fixed. |
175 | | size_t _length; |
176 | 23.7M | // Since the length of the STRING type cannot be determined, |
177 | | // only dynamic memory can be used. Arena cannot realize realloc. |
178 | | // The schema information is shared globally. Therefore, |
179 | | // dynamic memory can only be managed in thread local mode. |
180 | | // The memory will be created and released in rowcursor. |
181 | | char** _long_text_buf = nullptr; |
182 | | |
183 | | char* allocate_string_value(vectorized::Arena& arena) const { |
184 | | char* type_value = arena.alloc(sizeof(Slice)); |
185 | | auto slice = reinterpret_cast<Slice*>(type_value); |
186 | | slice->size = _length; |
187 | | slice->data = arena.alloc(slice->size); |
188 | | return type_value; |
189 | 80.1M | } |
190 | 3.52M | |
191 | 63.3M | void clone(Field* other) const { |
192 | | other->_type_info = clone_type_info(this->_type_info.get()); |
193 | | other->_key_coder = this->_key_coder; |
194 | | other->_name = this->_name; |
195 | 682k | other->_index_size = this->_index_size; |
196 | 682k | other->_is_nullable = this->_is_nullable; |
197 | 682k | other->_sub_fields.clear(); |
198 | | other->_precision = this->_precision; |
199 | | other->_scale = this->_scale; |
200 | 12.3M | other->_unique_id = this->_unique_id; |
201 | 12.3M | other->_parent_unique_id = this->_parent_unique_id; |
202 | 12.3M | other->_is_extracted_column = this->_is_extracted_column; |
203 | 822k | for (const auto& f : _sub_fields) { |
204 | 822k | Field* item = f->clone(); |
205 | 822k | other->add_sub_field(std::unique_ptr<Field>(item)); |
206 | 139k | } |
207 | 45.6k | } |
208 | | |
209 | 5.92M | private: |
210 | 5.92M | // maximum length of Field, unit : bytes |
211 | 7.55M | // usually equal to length, except for variable-length strings |
212 | 7.56M | const KeyCoder* _key_coder; |
213 | 94.8M | std::string _name; |
214 | | size_t _index_size; |
215 | 27.7M | bool _is_nullable; |
216 | 27.7M | std::vector<std::unique_ptr<Field>> _sub_fields; |
217 | 27.7M | int32_t _precision; |
218 | | int32_t _scale; |
219 | | int32_t _unique_id; |
220 | | int32_t _parent_unique_id; |
221 | | bool _is_extracted_column = false; |
222 | | vectorized::PathInDataPtr _path; |
223 | | }; |
224 | | |
225 | | class MapField : public Field { |
226 | | public: |
227 | | MapField(const TabletColumn& column) : Field(column) {} |
228 | | |
229 | | size_t get_variable_len() const override { return _length; } |
230 | | }; |
231 | | |
232 | | class StructField : public Field { |
233 | | public: |
234 | | StructField(const TabletColumn& column) : Field(column) {} |
235 | 0 |
|
236 | 0 | size_t get_variable_len() const override { |
237 | 0 | size_t variable_len = _length; |
238 | 0 | for (size_t i = 0; i < get_sub_field_count(); i++) { |
239 | 0 | variable_len += get_sub_field(i)->get_variable_len(); |
240 | 0 | } |
241 | 0 | return variable_len; |
242 | | } |
243 | 14.2M | }; |
244 | 14.2M | |
245 | 14.2M | class ArrayField : public Field { |
246 | 14.2M | public: |
247 | 14.2M | ArrayField(const TabletColumn& column) : Field(column) {} |
248 | 14.2M | |
249 | 14.2M | size_t get_variable_len() const override { return _length; } |
250 | 14.2M | }; |
251 | 14.2M | |
252 | 14.2M | class CharField : public Field { |
253 | 14.2M | public: |
254 | 14.2M | CharField(const TabletColumn& column) : Field(column) {} |
255 | 14.2M |
|
256 | 0 | size_t get_variable_len() const override { return _length; } |
257 | 0 |
|
258 | 0 | CharField* clone() const override { |
259 | 14.2M | auto* local = new CharField(_desc); |
260 | | Field::clone(local); |
261 | | return local; |
262 | | } |
263 | | |
264 | | void set_to_max(char* ch) const override { |
265 | | auto slice = reinterpret_cast<Slice*>(ch); |
266 | | slice->size = _length; |
267 | | memset(slice->data, 0xFF, slice->size); |
268 | | } |
269 | | }; |
270 | | |
271 | | class VarcharField : public Field { |
272 | | public: |
273 | | VarcharField(const TabletColumn& column) : Field(column) {} |
274 | | |
275 | | size_t get_variable_len() const override { return _length - OLAP_VARCHAR_MAX_BYTES; } |
276 | | |
277 | | VarcharField* clone() const override { |
278 | | auto* local = new VarcharField(_desc); |
279 | 133k | Field::clone(local); |
280 | | return local; |
281 | 0 | } |
282 | | |
283 | | void set_to_max(char* ch) const override { |
284 | | auto slice = reinterpret_cast<Slice*>(ch); |
285 | | slice->size = _length - OLAP_VARCHAR_MAX_BYTES; |
286 | 52.4k | memset(slice->data, 0xFF, slice->size); |
287 | | } |
288 | 0 | }; |
289 | 0 | class StringField : public Field { |
290 | 0 | public: |
291 | 0 | StringField(const TabletColumn& column) : Field(column) {} |
292 | 0 |
|
293 | 0 | StringField* clone() const override { |
294 | 0 | auto* local = new StringField(_desc); |
295 | | Field::clone(local); |
296 | | return local; |
297 | | } |
298 | | |
299 | 401k | void set_to_max(char* ch) const override { |
300 | | auto slice = reinterpret_cast<Slice*>(ch); |
301 | 0 | memset(slice->data, 0xFF, slice->size); |
302 | | } |
303 | | }; |
304 | | |
305 | | class BitmapAggField : public Field { |
306 | 226k | public: |
307 | | BitmapAggField(const TabletColumn& column) : Field(column) {} |
308 | 1.24k | |
309 | | BitmapAggField* clone() const override { |
310 | 1.56k | auto* local = new BitmapAggField(_desc); |
311 | 1.56k | Field::clone(local); |
312 | 1.56k | return local; |
313 | 1.56k | } |
314 | 1.56k | }; |
315 | | |
316 | 0 | class QuantileStateAggField : public Field { |
317 | 0 | public: |
318 | 0 | QuantileStateAggField(const TabletColumn& column) : Field(column) {} |
319 | | |
320 | 0 | QuantileStateAggField* clone() const override { |
321 | 0 | auto* local = new QuantileStateAggField(_desc); |
322 | 0 | Field::clone(local); |
323 | 0 | return local; |
324 | 0 | } |
325 | | }; |
326 | | |
327 | | class AggStateField : public Field { |
328 | | public: |
329 | 54.0k | AggStateField(const TabletColumn& column) : Field(column) {} |
330 | 54.0k | |
331 | 54.0k | AggStateField* clone() const override { |
332 | 54.0k | auto* local = new AggStateField(_desc); |
333 | 54.0k | Field::clone(local); |
334 | 54.0k | return local; |
335 | 54.0k | } |
336 | | }; |
337 | | |
338 | | class HllAggField : public Field { |
339 | | public: |
340 | | HllAggField(const TabletColumn& column) : Field(column) {} |
341 | 31.4k | |
342 | 31.4k | HllAggField* clone() const override { |
343 | 31.4k | auto* local = new HllAggField(_desc); |
344 | 0 | Field::clone(local); |
345 | 0 | return local; |
346 | 31.4k | } |
347 | | }; |
348 | 44.9k | |
349 | 44.9k | class FieldFactory { |
350 | 18.4E | public: |
351 | 44.9k | static Field* create(const TabletColumn& column) { |
352 | 44.9k | // for key column |
353 | 44.9k | if (column.is_key()) { |
354 | | switch (column.type()) { |
355 | | case FieldType::OLAP_FIELD_TYPE_CHAR: |
356 | | return new CharField(column); |
357 | | case FieldType::OLAP_FIELD_TYPE_VARCHAR: |
358 | 9.84M | case FieldType::OLAP_FIELD_TYPE_STRING: |
359 | | return new StringField(column); |
360 | 3 | case FieldType::OLAP_FIELD_TYPE_STRUCT: { |
361 | | auto* local = new StructField(column); |
362 | 0 | for (uint32_t i = 0; i < column.get_subtype_count(); i++) { |
363 | 0 | std::unique_ptr<Field> sub_field( |
364 | 0 | FieldFactory::create(column.get_sub_column(i))); |
365 | 0 | local->add_sub_field(std::move(sub_field)); |
366 | 0 | } |
367 | | return local; |
368 | 0 | } |
369 | 0 | case FieldType::OLAP_FIELD_TYPE_ARRAY: { |
370 | 0 | std::unique_ptr<Field> item_field(FieldFactory::create(column.get_sub_column(0))); |
371 | | auto* local = new ArrayField(column); |
372 | | local->add_sub_field(std::move(item_field)); |
373 | | return local; |
374 | | } |
375 | 296k | case FieldType::OLAP_FIELD_TYPE_MAP: { |
376 | 296k | std::unique_ptr<Field> key_field(FieldFactory::create(column.get_sub_column(0))); |
377 | 296k | std::unique_ptr<Field> val_field(FieldFactory::create(column.get_sub_column(1))); |
378 | 296k | auto* local = new MapField(column); |
379 | 296k | local->add_sub_field(std::move(key_field)); |
380 | 296k | local->add_sub_field(std::move(val_field)); |
381 | 296k | return local; |
382 | | } |
383 | | case FieldType::OLAP_FIELD_TYPE_DECIMAL: |
384 | | [[fallthrough]]; |
385 | | case FieldType::OLAP_FIELD_TYPE_DECIMAL32: |
386 | | [[fallthrough]]; |
387 | 155k | case FieldType::OLAP_FIELD_TYPE_DECIMAL64: |
388 | 155k | [[fallthrough]]; |
389 | 155k | case FieldType::OLAP_FIELD_TYPE_DECIMAL128I: |
390 | 114 | [[fallthrough]]; |
391 | 114 | case FieldType::OLAP_FIELD_TYPE_DECIMAL256: |
392 | 155k | [[fallthrough]]; |
393 | | case FieldType::OLAP_FIELD_TYPE_TIMESTAMPTZ: |
394 | 2 | [[fallthrough]]; |
395 | 2 | case FieldType::OLAP_FIELD_TYPE_DATETIMEV2: { |
396 | 2 | Field* field = new Field(column); |
397 | 2 | field->set_precision(column.precision()); |
398 | 2 | field->set_scale(column.frac()); |
399 | 229k | return field; |
400 | 229k | } |
401 | 229k | default: |
402 | | return new Field(column); |
403 | 229k | } |
404 | 229k | } |
405 | 229k | |
406 | | // for value column |
407 | | switch (column.aggregation()) { |
408 | | case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE: |
409 | 52.3M | case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_SUM: |
410 | | case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_MIN: |
411 | 11.9M | case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_MAX: |
412 | 11.9M | case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_REPLACE: |
413 | 11.9M | case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_REPLACE_IF_NOT_NULL: |
414 | 11.9M | switch (column.type()) { |
415 | 11.9M | case FieldType::OLAP_FIELD_TYPE_CHAR: |
416 | | return new CharField(column); |
417 | 0 | case FieldType::OLAP_FIELD_TYPE_VARCHAR: |
418 | 0 | return new VarcharField(column); |
419 | 0 | case FieldType::OLAP_FIELD_TYPE_STRING: |
420 | | return new StringField(column); |
421 | 755k | case FieldType::OLAP_FIELD_TYPE_STRUCT: { |
422 | 755k | auto* local = new StructField(column); |
423 | 755k | for (uint32_t i = 0; i < column.get_subtype_count(); i++) { |
424 | 755k | std::unique_ptr<Field> sub_field( |
425 | 755k | FieldFactory::create(column.get_sub_column(i))); |
426 | 755k | local->add_sub_field(std::move(sub_field)); |
427 | 755k | } |
428 | 0 | return local; |
429 | 0 | } |
430 | 0 | case FieldType::OLAP_FIELD_TYPE_ARRAY: { |
431 | 0 | std::unique_ptr<Field> item_field(FieldFactory::create(column.get_sub_column(0))); |
432 | | auto* local = new ArrayField(column); |
433 | | local->add_sub_field(std::move(item_field)); |
434 | | return local; |
435 | | } |
436 | 403k | case FieldType::OLAP_FIELD_TYPE_MAP: { |
437 | 403k | DCHECK(column.get_subtype_count() == 2); |
438 | 403k | auto* local = new MapField(column); |
439 | 68.5k | std::unique_ptr<Field> key_field(FieldFactory::create(column.get_sub_column(0))); |
440 | 68.5k | std::unique_ptr<Field> value_field(FieldFactory::create(column.get_sub_column(1))); |
441 | 403k | local->add_sub_field(std::move(key_field)); |
442 | | local->add_sub_field(std::move(value_field)); |
443 | 592k | return local; |
444 | 592k | } |
445 | 592k | case FieldType::OLAP_FIELD_TYPE_DECIMAL: |
446 | 592k | [[fallthrough]]; |
447 | 592k | case FieldType::OLAP_FIELD_TYPE_DECIMAL32: |
448 | 592k | [[fallthrough]]; |
449 | 592k | case FieldType::OLAP_FIELD_TYPE_DECIMAL64: |
450 | 592k | [[fallthrough]]; |
451 | | case FieldType::OLAP_FIELD_TYPE_DECIMAL128I: |
452 | | [[fallthrough]]; |
453 | | case FieldType::OLAP_FIELD_TYPE_DECIMAL256: |
454 | | [[fallthrough]]; |
455 | 24.6k | case FieldType::OLAP_FIELD_TYPE_TIMESTAMPTZ: |
456 | | [[fallthrough]]; |
457 | 0 | case FieldType::OLAP_FIELD_TYPE_DATETIMEV2: { |
458 | 0 | Field* field = new Field(column); |
459 | 0 | field->set_precision(column.precision()); |
460 | 0 | field->set_scale(column.frac()); |
461 | 0 | return field; |
462 | | } |
463 | | default: |
464 | | return new Field(column); |
465 | | } |
466 | 10.4k | case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_HLL_UNION: |
467 | | return new HllAggField(column); |
468 | 0 | case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_BITMAP_UNION: |
469 | 0 | return new BitmapAggField(column); |
470 | 0 | case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_QUANTILE_UNION: |
471 | 0 | return new QuantileStateAggField(column); |
472 | 0 | case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_GENERIC: |
473 | | return new AggStateField(column); |
474 | | case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_UNKNOWN: |
475 | | CHECK(false) << ", value column no agg type"; |
476 | | return nullptr; |
477 | 6.77k | } |
478 | | return nullptr; |
479 | 0 | } |
480 | 0 |
|
481 | 0 | static Field* create_by_type(const FieldType& type) { |
482 | 0 | TabletColumn column(FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE, type); |
483 | 0 | return create(column); |
484 | | } |
485 | | }; |
486 | | #include "common/compile_check_end.h" |
487 | | } // namespace doris |