Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #pragma once |
19 | | |
20 | | #include <cstddef> |
21 | | #include <sstream> |
22 | | #include <string> |
23 | | |
24 | | #include "core/arena.h" |
25 | | #include "core/value/map_value.h" |
26 | | #include "runtime/collection_value.h" |
27 | | #include "storage/key_coder.h" |
28 | | #include "storage/olap_common.h" |
29 | | #include "storage/olap_define.h" |
30 | | #include "storage/tablet/tablet_schema.h" |
31 | | #include "storage/types.h" |
32 | | #include "storage/utils.h" |
33 | | #include "util/hash_util.hpp" |
34 | | #include "util/json/path_in_data.h" |
35 | | #include "util/slice.h" |
36 | | |
37 | | namespace doris { |
38 | | // A Field is used to represent a column in memory format. |
39 | | // User can use this class to access or deal with column data in memory. |
40 | | class StorageField { |
41 | | public: |
42 | | StorageField(const TabletColumn& column) |
43 | 82.5M | : _type_info(get_type_info(&column)), |
44 | 82.5M | _desc(column), |
45 | 82.5M | _length(column.length()), |
46 | 82.5M | _key_coder(get_key_coder(column.type())), |
47 | 82.5M | _name(column.name()), |
48 | 82.5M | _index_size(column.index_length()), |
49 | 82.5M | _is_nullable(column.is_nullable()), |
50 | 82.5M | _unique_id(column.unique_id()), |
51 | 82.5M | _parent_unique_id(column.parent_unique_id()), |
52 | 82.5M | _is_extracted_column(column.is_extracted_column()), |
53 | 82.5M | _path(column.path_info_ptr()) {} |
54 | | |
55 | 82.9M | virtual ~StorageField() = default; |
56 | | |
57 | 5.75M | size_t size() const { return _type_info->size(); } |
58 | 1.11k | size_t length() const { return _length; } |
59 | 0 | size_t field_size() const { return size() + 1; } |
60 | 0 | size_t index_size() const { return _index_size; } |
61 | 38.3M | int32_t unique_id() const { return _unique_id; } |
62 | 94.7k | int32_t parent_unique_id() const { return _parent_unique_id; } |
63 | 37.2M | bool is_extracted_column() const { return _is_extracted_column; } |
64 | 57.8M | const std::string& name() const { return _name; } |
65 | 0 | const PathInDataPtr& path() const { return _path; } |
66 | | |
67 | 0 | virtual void set_to_max(char* buf) const { return _type_info->set_to_max(buf); } |
68 | | |
69 | 2 | virtual void set_to_min(char* buf) const { return _type_info->set_to_min(buf); } |
70 | | |
71 | 2.53M | virtual StorageField* clone() const { |
72 | 2.53M | auto* local = new StorageField(_desc); |
73 | 2.53M | this->clone(local); |
74 | 2.53M | return local; |
75 | 2.53M | } |
76 | | |
77 | 53.2M | FieldType type() const { return _type_info->type(); } |
78 | 48.0k | const TypeInfo* type_info() const { return _type_info.get(); } |
79 | 43.3M | bool is_nullable() const { return _is_nullable; } |
80 | | |
81 | | // similar to `full_encode_ascending`, but only encode part (the first `index_size` bytes) of the value. |
82 | | // only applicable to string type |
83 | 636k | void encode_ascending(const void* value, std::string* buf) const { |
84 | 636k | _key_coder->encode_ascending(value, _index_size, buf); |
85 | 636k | } |
86 | | |
87 | | // encode the provided `value` into `buf`. |
88 | 8.32M | void full_encode_ascending(const void* value, std::string* buf) const { |
89 | 8.32M | _key_coder->full_encode_ascending(value, buf); |
90 | 8.32M | } |
91 | 727k | void add_sub_field(std::unique_ptr<StorageField> sub_field) { |
92 | 727k | _sub_fields.emplace_back(std::move(sub_field)); |
93 | 727k | } |
94 | 138k | StorageField* get_sub_field(size_t i) const { return _sub_fields[i].get(); } |
95 | 42.5k | size_t get_sub_field_count() const { return _sub_fields.size(); } |
96 | | |
97 | 5.05M | void set_precision(int32_t precision) { _precision = precision; } |
98 | 5.05M | void set_scale(int32_t scale) { _scale = scale; } |
99 | 0 | int32_t get_precision() const { return _precision; } |
100 | 18.7M | int32_t get_scale() const { return _scale; } |
101 | 69.0M | const TabletColumn& get_desc() const { return _desc; } |
102 | | |
103 | 18.6M | int32_t get_unique_id() const { |
104 | 18.6M | return is_extracted_column() ? parent_unique_id() : unique_id(); |
105 | 18.6M | } |
106 | | |
107 | | protected: |
108 | | TypeInfoPtr _type_info; |
109 | | TabletColumn _desc; |
110 | | // unit : byte |
111 | | // except for strings, other types have fixed lengths |
112 | | // Note that, the struct type itself has fixed length, but due to |
113 | | // its number of subfields is a variable, so the actual length of |
114 | | // a struct field is not fixed. |
115 | | size_t _length; |
116 | | |
117 | 5.81M | void clone(StorageField* other) const { |
118 | 5.81M | other->_type_info = clone_type_info(this->_type_info.get()); |
119 | 5.81M | other->_key_coder = this->_key_coder; |
120 | 5.81M | other->_name = this->_name; |
121 | 5.81M | other->_index_size = this->_index_size; |
122 | 5.81M | other->_is_nullable = this->_is_nullable; |
123 | 5.81M | other->_sub_fields.clear(); |
124 | 5.81M | other->_precision = this->_precision; |
125 | 5.81M | other->_scale = this->_scale; |
126 | 5.81M | other->_unique_id = this->_unique_id; |
127 | 5.81M | other->_parent_unique_id = this->_parent_unique_id; |
128 | 5.81M | other->_is_extracted_column = this->_is_extracted_column; |
129 | 5.81M | for (const auto& f : _sub_fields) { |
130 | 0 | StorageField* item = f->clone(); |
131 | 0 | other->add_sub_field(std::unique_ptr<StorageField>(item)); |
132 | 0 | } |
133 | 5.81M | } |
134 | | |
135 | | private: |
136 | | // maximum length of Field, unit : bytes |
137 | | // usually equal to length, except for variable-length strings |
138 | | const KeyCoder* _key_coder; |
139 | | std::string _name; |
140 | | size_t _index_size; |
141 | | bool _is_nullable; |
142 | | std::vector<std::unique_ptr<StorageField>> _sub_fields; |
143 | | int32_t _precision; |
144 | | int32_t _scale; |
145 | | int32_t _unique_id; |
146 | | int32_t _parent_unique_id; |
147 | | bool _is_extracted_column = false; |
148 | | PathInDataPtr _path; |
149 | | }; |
150 | | |
151 | | class MapField : public StorageField { |
152 | | public: |
153 | 106k | MapField(const TabletColumn& column) : StorageField(column) {} |
154 | | }; |
155 | | |
156 | | class StructField : public StorageField { |
157 | | public: |
158 | 39.9k | StructField(const TabletColumn& column) : StorageField(column) {} |
159 | | }; |
160 | | |
161 | | class ArrayField : public StorageField { |
162 | | public: |
163 | 390k | ArrayField(const TabletColumn& column) : StorageField(column) {} |
164 | | }; |
165 | | |
166 | | class CharField : public StorageField { |
167 | | public: |
168 | 232k | CharField(const TabletColumn& column) : StorageField(column) {} |
169 | | |
170 | 1.62k | CharField* clone() const override { |
171 | 1.62k | auto* local = new CharField(_desc); |
172 | 1.62k | StorageField::clone(local); |
173 | 1.62k | return local; |
174 | 1.62k | } |
175 | | |
176 | 0 | void set_to_max(char* ch) const override { |
177 | 0 | auto slice = reinterpret_cast<Slice*>(ch); |
178 | 0 | slice->size = _length; |
179 | 0 | memset(slice->data, 0xFF, slice->size); |
180 | 0 | } |
181 | | }; |
182 | | |
183 | | class VarcharField : public StorageField { |
184 | | public: |
185 | 8.25M | VarcharField(const TabletColumn& column) : StorageField(column) {} |
186 | | |
187 | 0 | VarcharField* clone() const override { |
188 | 0 | auto* local = new VarcharField(_desc); |
189 | 0 | StorageField::clone(local); |
190 | 0 | return local; |
191 | 0 | } |
192 | | |
193 | 2 | void set_to_max(char* ch) const override { |
194 | 2 | auto slice = reinterpret_cast<Slice*>(ch); |
195 | 2 | slice->size = _length - OLAP_VARCHAR_MAX_BYTES; |
196 | 2 | memset(slice->data, 0xFF, slice->size); |
197 | 2 | } |
198 | | }; |
199 | | class StringField : public StorageField { |
200 | | public: |
201 | 41.2M | StringField(const TabletColumn& column) : StorageField(column) {} |
202 | | |
203 | 3.28M | StringField* clone() const override { |
204 | 3.28M | auto* local = new StringField(_desc); |
205 | 3.28M | StorageField::clone(local); |
206 | 3.28M | return local; |
207 | 3.28M | } |
208 | | |
209 | 0 | void set_to_max(char* ch) const override { |
210 | 0 | auto slice = reinterpret_cast<Slice*>(ch); |
211 | 0 | memset(slice->data, 0xFF, slice->size); |
212 | 0 | } |
213 | | }; |
214 | | |
215 | | class BitmapAggField : public StorageField { |
216 | | public: |
217 | 25.5k | BitmapAggField(const TabletColumn& column) : StorageField(column) {} |
218 | | |
219 | 0 | BitmapAggField* clone() const override { |
220 | 0 | auto* local = new BitmapAggField(_desc); |
221 | 0 | StorageField::clone(local); |
222 | 0 | return local; |
223 | 0 | } |
224 | | }; |
225 | | |
226 | | class QuantileStateAggField : public StorageField { |
227 | | public: |
228 | 10.6k | QuantileStateAggField(const TabletColumn& column) : StorageField(column) {} |
229 | | |
230 | 0 | QuantileStateAggField* clone() const override { |
231 | 0 | auto* local = new QuantileStateAggField(_desc); |
232 | 0 | StorageField::clone(local); |
233 | 0 | return local; |
234 | 0 | } |
235 | | }; |
236 | | |
237 | | class AggStateField : public StorageField { |
238 | | public: |
239 | 6.30k | AggStateField(const TabletColumn& column) : StorageField(column) {} |
240 | | |
241 | 0 | AggStateField* clone() const override { |
242 | 0 | auto* local = new AggStateField(_desc); |
243 | 0 | StorageField::clone(local); |
244 | 0 | return local; |
245 | 0 | } |
246 | | }; |
247 | | |
248 | | class HllAggField : public StorageField { |
249 | | public: |
250 | 15.2k | HllAggField(const TabletColumn& column) : StorageField(column) {} |
251 | | |
252 | 0 | HllAggField* clone() const override { |
253 | 0 | auto* local = new HllAggField(_desc); |
254 | 0 | StorageField::clone(local); |
255 | 0 | return local; |
256 | 0 | } |
257 | | }; |
258 | | |
259 | | class StorageFieldFactory { |
260 | | public: |
261 | 77.2M | static StorageField* create(const TabletColumn& column) { |
262 | | // for key column |
263 | 77.2M | if (column.is_key()) { |
264 | 36.5M | switch (column.type()) { |
265 | 35.7k | case FieldType::OLAP_FIELD_TYPE_CHAR: |
266 | 35.7k | return new CharField(column); |
267 | 33.8M | case FieldType::OLAP_FIELD_TYPE_VARCHAR: |
268 | 33.8M | case FieldType::OLAP_FIELD_TYPE_STRING: |
269 | 33.8M | return new StringField(column); |
270 | 0 | case FieldType::OLAP_FIELD_TYPE_STRUCT: { |
271 | 0 | auto* local = new StructField(column); |
272 | 0 | for (uint32_t i = 0; i < column.get_subtype_count(); i++) { |
273 | 0 | std::unique_ptr<StorageField> sub_field( |
274 | 0 | StorageFieldFactory::create(column.get_sub_column(i))); |
275 | 0 | local->add_sub_field(std::move(sub_field)); |
276 | 0 | } |
277 | 0 | return local; |
278 | 33.8M | } |
279 | 0 | case FieldType::OLAP_FIELD_TYPE_ARRAY: { |
280 | 0 | std::unique_ptr<StorageField> item_field( |
281 | 0 | StorageFieldFactory::create(column.get_sub_column(0))); |
282 | 0 | auto* local = new ArrayField(column); |
283 | 0 | local->add_sub_field(std::move(item_field)); |
284 | 0 | return local; |
285 | 33.8M | } |
286 | 0 | case FieldType::OLAP_FIELD_TYPE_MAP: { |
287 | 0 | std::unique_ptr<StorageField> key_field( |
288 | 0 | StorageFieldFactory::create(column.get_sub_column(0))); |
289 | 0 | std::unique_ptr<StorageField> val_field( |
290 | 0 | StorageFieldFactory::create(column.get_sub_column(1))); |
291 | 0 | auto* local = new MapField(column); |
292 | 0 | local->add_sub_field(std::move(key_field)); |
293 | 0 | local->add_sub_field(std::move(val_field)); |
294 | 0 | return local; |
295 | 33.8M | } |
296 | 4.10k | case FieldType::OLAP_FIELD_TYPE_DECIMAL: |
297 | 4.10k | [[fallthrough]]; |
298 | 38.0k | case FieldType::OLAP_FIELD_TYPE_DECIMAL32: |
299 | 38.0k | [[fallthrough]]; |
300 | 46.4k | case FieldType::OLAP_FIELD_TYPE_DECIMAL64: |
301 | 46.4k | [[fallthrough]]; |
302 | 92.6k | case FieldType::OLAP_FIELD_TYPE_DECIMAL128I: |
303 | 92.6k | [[fallthrough]]; |
304 | 99.4k | case FieldType::OLAP_FIELD_TYPE_DECIMAL256: |
305 | 99.4k | [[fallthrough]]; |
306 | 176k | case FieldType::OLAP_FIELD_TYPE_TIMESTAMPTZ: |
307 | 176k | [[fallthrough]]; |
308 | 281k | case FieldType::OLAP_FIELD_TYPE_DATETIMEV2: { |
309 | 281k | StorageField* field = new StorageField(column); |
310 | 281k | field->set_precision(column.precision()); |
311 | 281k | field->set_scale(column.frac()); |
312 | 281k | return field; |
313 | 176k | } |
314 | 2.43M | default: |
315 | 2.43M | return new StorageField(column); |
316 | 36.5M | } |
317 | 36.5M | } |
318 | | |
319 | | // for value column |
320 | 40.6M | switch (column.aggregation()) { |
321 | 39.7M | case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE: |
322 | 39.9M | case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_SUM: |
323 | 39.9M | case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_MIN: |
324 | 40.0M | case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_MAX: |
325 | 40.4M | case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_REPLACE: |
326 | 40.5M | case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_REPLACE_IF_NOT_NULL: |
327 | 40.5M | switch (column.type()) { |
328 | 195k | case FieldType::OLAP_FIELD_TYPE_CHAR: |
329 | 195k | return new CharField(column); |
330 | 8.27M | case FieldType::OLAP_FIELD_TYPE_VARCHAR: |
331 | 8.27M | return new VarcharField(column); |
332 | 4.39M | case FieldType::OLAP_FIELD_TYPE_STRING: |
333 | 4.39M | return new StringField(column); |
334 | 40.0k | case FieldType::OLAP_FIELD_TYPE_STRUCT: { |
335 | 40.0k | auto* local = new StructField(column); |
336 | 160k | for (uint32_t i = 0; i < column.get_subtype_count(); i++) { |
337 | 120k | std::unique_ptr<StorageField> sub_field( |
338 | 120k | StorageFieldFactory::create(column.get_sub_column(i))); |
339 | 120k | local->add_sub_field(std::move(sub_field)); |
340 | 120k | } |
341 | 40.0k | return local; |
342 | 0 | } |
343 | 392k | case FieldType::OLAP_FIELD_TYPE_ARRAY: { |
344 | 392k | std::unique_ptr<StorageField> item_field( |
345 | 392k | StorageFieldFactory::create(column.get_sub_column(0))); |
346 | 392k | auto* local = new ArrayField(column); |
347 | 392k | local->add_sub_field(std::move(item_field)); |
348 | 392k | return local; |
349 | 0 | } |
350 | 107k | case FieldType::OLAP_FIELD_TYPE_MAP: { |
351 | 107k | DCHECK(column.get_subtype_count() == 2); |
352 | 107k | auto* local = new MapField(column); |
353 | 107k | std::unique_ptr<StorageField> key_field( |
354 | 107k | StorageFieldFactory::create(column.get_sub_column(0))); |
355 | 107k | std::unique_ptr<StorageField> value_field( |
356 | 107k | StorageFieldFactory::create(column.get_sub_column(1))); |
357 | 107k | local->add_sub_field(std::move(key_field)); |
358 | 107k | local->add_sub_field(std::move(value_field)); |
359 | 107k | return local; |
360 | 0 | } |
361 | 6.02k | case FieldType::OLAP_FIELD_TYPE_DECIMAL: |
362 | 6.02k | [[fallthrough]]; |
363 | 80.0k | case FieldType::OLAP_FIELD_TYPE_DECIMAL32: |
364 | 80.0k | [[fallthrough]]; |
365 | 230k | case FieldType::OLAP_FIELD_TYPE_DECIMAL64: |
366 | 230k | [[fallthrough]]; |
367 | 401k | case FieldType::OLAP_FIELD_TYPE_DECIMAL128I: |
368 | 401k | [[fallthrough]]; |
369 | 411k | case FieldType::OLAP_FIELD_TYPE_DECIMAL256: |
370 | 411k | [[fallthrough]]; |
371 | 553k | case FieldType::OLAP_FIELD_TYPE_TIMESTAMPTZ: |
372 | 553k | [[fallthrough]]; |
373 | 4.77M | case FieldType::OLAP_FIELD_TYPE_DATETIMEV2: { |
374 | 4.77M | StorageField* field = new StorageField(column); |
375 | 4.77M | field->set_precision(column.precision()); |
376 | 4.77M | field->set_scale(column.frac()); |
377 | 4.77M | return field; |
378 | 553k | } |
379 | 22.4M | default: |
380 | 22.4M | return new StorageField(column); |
381 | 40.5M | } |
382 | 15.2k | case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_HLL_UNION: |
383 | 15.2k | return new HllAggField(column); |
384 | 25.5k | case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_BITMAP_UNION: |
385 | 25.5k | return new BitmapAggField(column); |
386 | 10.7k | case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_QUANTILE_UNION: |
387 | 10.7k | return new QuantileStateAggField(column); |
388 | 6.33k | case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_GENERIC: |
389 | 6.33k | return new AggStateField(column); |
390 | 0 | case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_UNKNOWN: |
391 | 0 | CHECK(false) << ", value column no agg type"; |
392 | 0 | return nullptr; |
393 | 40.6M | } |
394 | 0 | return nullptr; |
395 | 40.6M | } |
396 | | |
397 | | static StorageField* create_by_type(const FieldType& type) { |
398 | | TabletColumn column(FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE, type); |
399 | | return create(column); |
400 | | } |
401 | | }; |
402 | | } // namespace doris |