Coverage Report

Created: 2026-03-13 10:59

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/storage/field.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <cstddef>
21
#include <sstream>
22
#include <string>
23
24
#include "core/arena.h"
25
#include "core/value/map_value.h"
26
#include "runtime/collection_value.h"
27
#include "storage/key_coder.h"
28
#include "storage/olap_common.h"
29
#include "storage/olap_define.h"
30
#include "storage/row_cursor_cell.h"
31
#include "storage/tablet/tablet_schema.h"
32
#include "storage/types.h"
33
#include "storage/utils.h"
34
#include "util/hash_util.hpp"
35
#include "util/json/path_in_data.h"
36
#include "util/slice.h"
37
38
namespace doris {
39
#include "common/compile_check_begin.h"
40
// A Field is used to represent a column in memory format.
41
// User can use this class to access or deal with column data in memory.
42
class StorageField {
43
public:
44
    StorageField(const TabletColumn& column)
45
100M
            : _type_info(get_type_info(&column)),
46
100M
              _desc(column),
47
100M
              _length(column.length()),
48
100M
              _key_coder(get_key_coder(column.type())),
49
100M
              _name(column.name()),
50
100M
              _index_size(column.index_length()),
51
100M
              _is_nullable(column.is_nullable()),
52
100M
              _unique_id(column.unique_id()),
53
100M
              _parent_unique_id(column.parent_unique_id()),
54
100M
              _is_extracted_column(column.is_extracted_column()),
55
100M
              _path(column.path_info_ptr()) {}
56
57
101M
    virtual ~StorageField() = default;
58
59
97.7M
    size_t size() const { return _type_info->size(); }
60
0
    size_t length() const { return _length; }
61
0
    size_t field_size() const { return size() + 1; }
62
0
    size_t index_size() const { return _index_size; }
63
64.6M
    int32_t unique_id() const { return _unique_id; }
64
66.6k
    int32_t parent_unique_id() const { return _parent_unique_id; }
65
56.2M
    bool is_extracted_column() const { return _is_extracted_column; }
66
100M
    const std::string& name() const { return _name; }
67
0
    const PathInDataPtr& path() const { return _path; }
68
69
0
    virtual void set_to_max(char* buf) const { return _type_info->set_to_max(buf); }
70
71
2
    virtual void set_to_min(char* buf) const { return _type_info->set_to_min(buf); }
72
73
0
    void set_long_text_buf(char** buf) { _long_text_buf = buf; }
74
75
7.73M
    virtual size_t get_variable_len() const { return 0; }
76
77
2.15M
    virtual StorageField* clone() const {
78
2.15M
        auto* local = new StorageField(_desc);
79
2.15M
        this->clone(local);
80
2.15M
        return local;
81
2.15M
    }
82
83
    // Only compare column content, without considering nullptr condition.
84
    // RETURNS:
85
    //      0 means equal,
86
    //      -1 means left less than right,
87
    //      1 means left bigger than right
88
0
    int compare(const void* left, const void* right) const { return _type_info->cmp(left, right); }
89
90
    // Compare two types of cell.
91
    // This function differs compare in that this function compare cell which
92
    // will consider the condition which cell may be nullptr. While compare only
93
    // compare column content without considering nullptr condition.
94
    // Only compare column content, without considering nullptr condition.
95
    // RETURNS:
96
    //      0 means equal,
97
    //      -1 means left less than right,
98
    //      1 means left bigger than right
99
    template <typename LhsCellType, typename RhsCellType>
100
3.85M
    int compare_cell(const LhsCellType& lhs, const RhsCellType& rhs) const {
101
3.85M
        bool l_null = lhs.is_null();
102
3.85M
        bool r_null = rhs.is_null();
103
3.85M
        if (l_null != r_null) {
104
18.4E
            return l_null ? -1 : 1;
105
219k
        }
106
3.63M
        return l_null ? 0 : _type_info->cmp(lhs.cell_ptr(), rhs.cell_ptr());
107
3.85M
    }
108
109
    // deep copy source cell' content to destination cell.
110
    // For string type, this will allocate data form arena,
111
    // and copy source's content.
112
    template <typename DstCellType, typename SrcCellType>
113
    void deep_copy(DstCellType* dst, const SrcCellType& src, Arena& arena) const {
114
        bool is_null = src.is_null();
115
        dst->set_is_null(is_null);
116
        if (is_null) {
117
            return;
118
        }
119
        _type_info->deep_copy(dst->mutable_cell_ptr(), src.cell_ptr(), arena);
120
    }
121
122
    // used by init scan key stored in string format
123
    // value_string should end with '\0'
124
    Status from_string(char* buf, const std::string& value_string, const int precision = 0,
125
7.49M
                       const int scale = 0) const {
126
7.49M
        if (type() == FieldType::OLAP_FIELD_TYPE_STRING && !value_string.empty()) {
127
0
            auto slice = reinterpret_cast<Slice*>(buf);
128
0
            if (slice->size < value_string.size()) {
129
0
                *_long_text_buf = static_cast<char*>(realloc(*_long_text_buf, value_string.size()));
130
0
                slice->data = *_long_text_buf;
131
0
                slice->size = value_string.size();
132
0
            }
133
0
        }
134
7.49M
        return _type_info->from_string(buf, value_string, precision, scale);
135
7.49M
    }
136
137
81.0M
    FieldType type() const { return _type_info->type(); }
138
52.6k
    const TypeInfo* type_info() const { return _type_info.get(); }
139
64.5M
    bool is_nullable() const { return _is_nullable; }
140
141
    // similar to `full_encode_ascending`, but only encode part (the first `index_size` bytes) of the value.
142
    // only applicable to string type
143
675k
    void encode_ascending(const void* value, std::string* buf) const {
144
675k
        _key_coder->encode_ascending(value, _index_size, buf);
145
675k
    }
146
147
    // encode the provided `value` into `buf`.
148
12.6M
    void full_encode_ascending(const void* value, std::string* buf) const {
149
12.6M
        _key_coder->full_encode_ascending(value, buf);
150
12.6M
    }
151
869k
    void add_sub_field(std::unique_ptr<StorageField> sub_field) {
152
869k
        _sub_fields.emplace_back(std::move(sub_field));
153
869k
    }
154
144k
    StorageField* get_sub_field(size_t i) const { return _sub_fields[i].get(); }
155
45.2k
    size_t get_sub_field_count() const { return _sub_fields.size(); }
156
157
5.95M
    void set_precision(int32_t precision) { _precision = precision; }
158
5.95M
    void set_scale(int32_t scale) { _scale = scale; }
159
7.49M
    int32_t get_precision() const { return _precision; }
160
7.49M
    int32_t get_scale() const { return _scale; }
161
96.5M
    const TabletColumn& get_desc() const { return _desc; }
162
163
28.2M
    int32_t get_unique_id() const {
164
28.2M
        return is_extracted_column() ? parent_unique_id() : unique_id();
165
28.2M
    }
166
167
protected:
168
    TypeInfoPtr _type_info;
169
    TabletColumn _desc;
170
    // unit : byte
171
    // except for strings, other types have fixed lengths
172
    // Note that, the struct type itself has fixed length, but due to
173
    // its number of subfields is a variable, so the actual length of
174
    // a struct field is not fixed.
175
    size_t _length;
176
    // Since the length of the STRING type cannot be determined,
177
    // only dynamic memory can be used. Arena cannot realize realloc.
178
    // The schema information is shared globally. Therefore,
179
    // dynamic memory can only be managed in thread local mode.
180
    // The memory will be created and released in rowcursor.
181
    char** _long_text_buf = nullptr;
182
183
0
    char* allocate_string_value(Arena& arena) const {
184
0
        char* type_value = arena.alloc(sizeof(Slice));
185
0
        auto slice = reinterpret_cast<Slice*>(type_value);
186
0
        slice->size = _length;
187
0
        slice->data = arena.alloc(slice->size);
188
0
        return type_value;
189
0
    }
190
191
14.3M
    void clone(StorageField* other) const {
192
14.3M
        other->_type_info = clone_type_info(this->_type_info.get());
193
14.3M
        other->_key_coder = this->_key_coder;
194
14.3M
        other->_name = this->_name;
195
14.3M
        other->_index_size = this->_index_size;
196
14.3M
        other->_is_nullable = this->_is_nullable;
197
14.3M
        other->_sub_fields.clear();
198
14.3M
        other->_precision = this->_precision;
199
14.3M
        other->_scale = this->_scale;
200
14.3M
        other->_unique_id = this->_unique_id;
201
14.3M
        other->_parent_unique_id = this->_parent_unique_id;
202
14.3M
        other->_is_extracted_column = this->_is_extracted_column;
203
14.3M
        for (const auto& f : _sub_fields) {
204
0
            StorageField* item = f->clone();
205
0
            other->add_sub_field(std::unique_ptr<StorageField>(item));
206
0
        }
207
14.3M
    }
208
209
private:
210
    // maximum length of Field, unit : bytes
211
    // usually equal to length, except for variable-length strings
212
    const KeyCoder* _key_coder;
213
    std::string _name;
214
    size_t _index_size;
215
    bool _is_nullable;
216
    std::vector<std::unique_ptr<StorageField>> _sub_fields;
217
    int32_t _precision;
218
    int32_t _scale;
219
    int32_t _unique_id;
220
    int32_t _parent_unique_id;
221
    bool _is_extracted_column = false;
222
    PathInDataPtr _path;
223
};
224
225
class MapField : public StorageField {
226
public:
227
134k
    MapField(const TabletColumn& column) : StorageField(column) {}
228
229
0
    size_t get_variable_len() const override { return _length; }
230
};
231
232
class StructField : public StorageField {
233
public:
234
51.5k
    StructField(const TabletColumn& column) : StorageField(column) {}
235
236
0
    size_t get_variable_len() const override {
237
0
        size_t variable_len = _length;
238
0
        for (size_t i = 0; i < get_sub_field_count(); i++) {
239
0
            variable_len += get_sub_field(i)->get_variable_len();
240
0
        }
241
0
        return variable_len;
242
0
    }
243
};
244
245
class ArrayField : public StorageField {
246
public:
247
461k
    ArrayField(const TabletColumn& column) : StorageField(column) {}
248
249
0
    size_t get_variable_len() const override { return _length; }
250
};
251
252
class CharField : public StorageField {
253
public:
254
231k
    CharField(const TabletColumn& column) : StorageField(column) {}
255
256
1.28k
    size_t get_variable_len() const override { return _length; }
257
258
1.57k
    CharField* clone() const override {
259
1.57k
        auto* local = new CharField(_desc);
260
1.57k
        StorageField::clone(local);
261
1.57k
        return local;
262
1.57k
    }
263
264
0
    void set_to_max(char* ch) const override {
265
0
        auto slice = reinterpret_cast<Slice*>(ch);
266
0
        slice->size = _length;
267
0
        memset(slice->data, 0xFF, slice->size);
268
0
    }
269
};
270
271
class VarcharField : public StorageField {
272
public:
273
9.79M
    VarcharField(const TabletColumn& column) : StorageField(column) {}
274
275
2
    size_t get_variable_len() const override { return _length - OLAP_VARCHAR_MAX_BYTES; }
276
277
0
    VarcharField* clone() const override {
278
0
        auto* local = new VarcharField(_desc);
279
0
        StorageField::clone(local);
280
0
        return local;
281
0
    }
282
283
2
    void set_to_max(char* ch) const override {
284
2
        auto slice = reinterpret_cast<Slice*>(ch);
285
2
        slice->size = _length - OLAP_VARCHAR_MAX_BYTES;
286
2
        memset(slice->data, 0xFF, slice->size);
287
2
    }
288
};
289
class StringField : public StorageField {
290
public:
291
52.3M
    StringField(const TabletColumn& column) : StorageField(column) {}
292
293
12.2M
    StringField* clone() const override {
294
12.2M
        auto* local = new StringField(_desc);
295
12.2M
        StorageField::clone(local);
296
12.2M
        return local;
297
12.2M
    }
298
299
0
    void set_to_max(char* ch) const override {
300
0
        auto slice = reinterpret_cast<Slice*>(ch);
301
0
        memset(slice->data, 0xFF, slice->size);
302
0
    }
303
};
304
305
class BitmapAggField : public StorageField {
306
public:
307
25.2k
    BitmapAggField(const TabletColumn& column) : StorageField(column) {}
308
309
0
    BitmapAggField* clone() const override {
310
0
        auto* local = new BitmapAggField(_desc);
311
0
        StorageField::clone(local);
312
0
        return local;
313
0
    }
314
};
315
316
class QuantileStateAggField : public StorageField {
317
public:
318
10.3k
    QuantileStateAggField(const TabletColumn& column) : StorageField(column) {}
319
320
0
    QuantileStateAggField* clone() const override {
321
0
        auto* local = new QuantileStateAggField(_desc);
322
0
        StorageField::clone(local);
323
0
        return local;
324
0
    }
325
};
326
327
class AggStateField : public StorageField {
328
public:
329
6.60k
    AggStateField(const TabletColumn& column) : StorageField(column) {}
330
331
0
    AggStateField* clone() const override {
332
0
        auto* local = new AggStateField(_desc);
333
0
        StorageField::clone(local);
334
0
        return local;
335
0
    }
336
};
337
338
class HllAggField : public StorageField {
339
public:
340
15.3k
    HllAggField(const TabletColumn& column) : StorageField(column) {}
341
342
0
    HllAggField* clone() const override {
343
0
        auto* local = new HllAggField(_desc);
344
0
        StorageField::clone(local);
345
0
        return local;
346
0
    }
347
};
348
349
class StorageFieldFactory {
350
public:
351
87.1M
    static StorageField* create(const TabletColumn& column) {
352
        // for key column
353
87.1M
        if (column.is_key()) {
354
38.7M
            switch (column.type()) {
355
41.0k
            case FieldType::OLAP_FIELD_TYPE_CHAR:
356
41.0k
                return new CharField(column);
357
35.4M
            case FieldType::OLAP_FIELD_TYPE_VARCHAR:
358
35.4M
            case FieldType::OLAP_FIELD_TYPE_STRING:
359
35.4M
                return new StringField(column);
360
0
            case FieldType::OLAP_FIELD_TYPE_STRUCT: {
361
0
                auto* local = new StructField(column);
362
0
                for (uint32_t i = 0; i < column.get_subtype_count(); i++) {
363
0
                    std::unique_ptr<StorageField> sub_field(
364
0
                            StorageFieldFactory::create(column.get_sub_column(i)));
365
0
                    local->add_sub_field(std::move(sub_field));
366
0
                }
367
0
                return local;
368
35.4M
            }
369
0
            case FieldType::OLAP_FIELD_TYPE_ARRAY: {
370
0
                std::unique_ptr<StorageField> item_field(
371
0
                        StorageFieldFactory::create(column.get_sub_column(0)));
372
0
                auto* local = new ArrayField(column);
373
0
                local->add_sub_field(std::move(item_field));
374
0
                return local;
375
35.4M
            }
376
0
            case FieldType::OLAP_FIELD_TYPE_MAP: {
377
0
                std::unique_ptr<StorageField> key_field(
378
0
                        StorageFieldFactory::create(column.get_sub_column(0)));
379
0
                std::unique_ptr<StorageField> val_field(
380
0
                        StorageFieldFactory::create(column.get_sub_column(1)));
381
0
                auto* local = new MapField(column);
382
0
                local->add_sub_field(std::move(key_field));
383
0
                local->add_sub_field(std::move(val_field));
384
0
                return local;
385
35.4M
            }
386
458
            case FieldType::OLAP_FIELD_TYPE_DECIMAL:
387
458
                [[fallthrough]];
388
37.8k
            case FieldType::OLAP_FIELD_TYPE_DECIMAL32:
389
37.8k
                [[fallthrough]];
390
46.3k
            case FieldType::OLAP_FIELD_TYPE_DECIMAL64:
391
46.3k
                [[fallthrough]];
392
97.7k
            case FieldType::OLAP_FIELD_TYPE_DECIMAL128I:
393
97.7k
                [[fallthrough]];
394
103k
            case FieldType::OLAP_FIELD_TYPE_DECIMAL256:
395
103k
                [[fallthrough]];
396
165k
            case FieldType::OLAP_FIELD_TYPE_TIMESTAMPTZ:
397
165k
                [[fallthrough]];
398
268k
            case FieldType::OLAP_FIELD_TYPE_DATETIMEV2: {
399
268k
                StorageField* field = new StorageField(column);
400
268k
                field->set_precision(column.precision());
401
268k
                field->set_scale(column.frac());
402
268k
                return field;
403
165k
            }
404
2.93M
            default:
405
2.93M
                return new StorageField(column);
406
38.7M
            }
407
38.7M
        }
408
409
        // for value column
410
48.4M
        switch (column.aggregation()) {
411
47.4M
        case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE:
412
47.6M
        case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_SUM:
413
47.7M
        case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_MIN:
414
47.7M
        case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_MAX:
415
48.1M
        case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_REPLACE:
416
48.3M
        case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_REPLACE_IF_NOT_NULL:
417
48.3M
            switch (column.type()) {
418
189k
            case FieldType::OLAP_FIELD_TYPE_CHAR:
419
189k
                return new CharField(column);
420
9.82M
            case FieldType::OLAP_FIELD_TYPE_VARCHAR:
421
9.82M
                return new VarcharField(column);
422
5.23M
            case FieldType::OLAP_FIELD_TYPE_STRING:
423
5.23M
                return new StringField(column);
424
51.7k
            case FieldType::OLAP_FIELD_TYPE_STRUCT: {
425
51.7k
                auto* local = new StructField(column);
426
187k
                for (uint32_t i = 0; i < column.get_subtype_count(); i++) {
427
136k
                    std::unique_ptr<StorageField> sub_field(
428
136k
                            StorageFieldFactory::create(column.get_sub_column(i)));
429
136k
                    local->add_sub_field(std::move(sub_field));
430
136k
                }
431
51.7k
                return local;
432
0
            }
433
463k
            case FieldType::OLAP_FIELD_TYPE_ARRAY: {
434
463k
                std::unique_ptr<StorageField> item_field(
435
463k
                        StorageFieldFactory::create(column.get_sub_column(0)));
436
463k
                auto* local = new ArrayField(column);
437
463k
                local->add_sub_field(std::move(item_field));
438
463k
                return local;
439
0
            }
440
135k
            case FieldType::OLAP_FIELD_TYPE_MAP: {
441
135k
                DCHECK(column.get_subtype_count() == 2);
442
135k
                auto* local = new MapField(column);
443
135k
                std::unique_ptr<StorageField> key_field(
444
135k
                        StorageFieldFactory::create(column.get_sub_column(0)));
445
135k
                std::unique_ptr<StorageField> value_field(
446
135k
                        StorageFieldFactory::create(column.get_sub_column(1)));
447
135k
                local->add_sub_field(std::move(key_field));
448
135k
                local->add_sub_field(std::move(value_field));
449
135k
                return local;
450
0
            }
451
4.01k
            case FieldType::OLAP_FIELD_TYPE_DECIMAL:
452
4.01k
                [[fallthrough]];
453
97.4k
            case FieldType::OLAP_FIELD_TYPE_DECIMAL32:
454
97.4k
                [[fallthrough]];
455
340k
            case FieldType::OLAP_FIELD_TYPE_DECIMAL64:
456
340k
                [[fallthrough]];
457
527k
            case FieldType::OLAP_FIELD_TYPE_DECIMAL128I:
458
527k
                [[fallthrough]];
459
536k
            case FieldType::OLAP_FIELD_TYPE_DECIMAL256:
460
536k
                [[fallthrough]];
461
669k
            case FieldType::OLAP_FIELD_TYPE_TIMESTAMPTZ:
462
669k
                [[fallthrough]];
463
5.68M
            case FieldType::OLAP_FIELD_TYPE_DATETIMEV2: {
464
5.68M
                StorageField* field = new StorageField(column);
465
5.68M
                field->set_precision(column.precision());
466
5.68M
                field->set_scale(column.frac());
467
5.68M
                return field;
468
669k
            }
469
26.8M
            default:
470
26.8M
                return new StorageField(column);
471
48.3M
            }
472
15.4k
        case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_HLL_UNION:
473
15.4k
            return new HllAggField(column);
474
25.2k
        case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_BITMAP_UNION:
475
25.2k
            return new BitmapAggField(column);
476
10.3k
        case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_QUANTILE_UNION:
477
10.3k
            return new QuantileStateAggField(column);
478
6.68k
        case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_GENERIC:
479
6.68k
            return new AggStateField(column);
480
0
        case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_UNKNOWN:
481
0
            CHECK(false) << ", value column no agg type";
482
0
            return nullptr;
483
48.4M
        }
484
0
        return nullptr;
485
48.4M
    }
486
487
    static StorageField* create_by_type(const FieldType& type) {
488
        TabletColumn column(FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE, type);
489
        return create(column);
490
    }
491
};
492
#include "common/compile_check_end.h"
493
} // namespace doris