Coverage Report

Created: 2026-04-01 07:28

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/storage/field.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <cstddef>
21
#include <sstream>
22
#include <string>
23
24
#include "core/arena.h"
25
#include "core/value/map_value.h"
26
#include "runtime/collection_value.h"
27
#include "storage/key_coder.h"
28
#include "storage/olap_common.h"
29
#include "storage/olap_define.h"
30
#include "storage/tablet/tablet_schema.h"
31
#include "storage/types.h"
32
#include "storage/utils.h"
33
#include "util/hash_util.hpp"
34
#include "util/json/path_in_data.h"
35
#include "util/slice.h"
36
37
namespace doris {
38
#include "common/compile_check_begin.h"
39
// A Field is used to represent a column in memory format.
40
// User can use this class to access or deal with column data in memory.
41
class StorageField {
42
public:
43
    StorageField(const TabletColumn& column)
44
111M
            : _type_info(get_type_info(&column)),
45
111M
              _desc(column),
46
111M
              _length(column.length()),
47
111M
              _key_coder(get_key_coder(column.type())),
48
111M
              _name(column.name()),
49
111M
              _index_size(column.index_length()),
50
111M
              _is_nullable(column.is_nullable()),
51
111M
              _unique_id(column.unique_id()),
52
111M
              _parent_unique_id(column.parent_unique_id()),
53
111M
              _is_extracted_column(column.is_extracted_column()),
54
111M
              _path(column.path_info_ptr()) {}
55
56
113M
    virtual ~StorageField() = default;
57
58
5.25M
    size_t size() const { return _type_info->size(); }
59
1.17k
    size_t length() const { return _length; }
60
0
    size_t field_size() const { return size() + 1; }
61
0
    size_t index_size() const { return _index_size; }
62
57.4M
    int32_t unique_id() const { return _unique_id; }
63
72.2k
    int32_t parent_unique_id() const { return _parent_unique_id; }
64
55.7M
    bool is_extracted_column() const { return _is_extracted_column; }
65
86.3M
    const std::string& name() const { return _name; }
66
0
    const PathInDataPtr& path() const { return _path; }
67
68
0
    virtual void set_to_max(char* buf) const { return _type_info->set_to_max(buf); }
69
70
2
    virtual void set_to_min(char* buf) const { return _type_info->set_to_min(buf); }
71
72
2.28M
    virtual StorageField* clone() const {
73
2.28M
        auto* local = new StorageField(_desc);
74
2.28M
        this->clone(local);
75
2.28M
        return local;
76
2.28M
    }
77
78
77.3M
    FieldType type() const { return _type_info->type(); }
79
52.2k
    const TypeInfo* type_info() const { return _type_info.get(); }
80
63.4M
    bool is_nullable() const { return _is_nullable; }
81
82
    // similar to `full_encode_ascending`, but only encode part (the first `index_size` bytes) of the value.
83
    // only applicable to string type
84
596k
    void encode_ascending(const void* value, std::string* buf) const {
85
596k
        _key_coder->encode_ascending(value, _index_size, buf);
86
596k
    }
87
88
    // encode the provided `value` into `buf`.
89
12.1M
    void full_encode_ascending(const void* value, std::string* buf) const {
90
12.1M
        _key_coder->full_encode_ascending(value, buf);
91
12.1M
    }
92
852k
    void add_sub_field(std::unique_ptr<StorageField> sub_field) {
93
852k
        _sub_fields.emplace_back(std::move(sub_field));
94
852k
    }
95
144k
    StorageField* get_sub_field(size_t i) const { return _sub_fields[i].get(); }
96
45.2k
    size_t get_sub_field_count() const { return _sub_fields.size(); }
97
98
6.69M
    void set_precision(int32_t precision) { _precision = precision; }
99
6.69M
    void set_scale(int32_t scale) { _scale = scale; }
100
0
    int32_t get_precision() const { return _precision; }
101
24.2M
    int32_t get_scale() const { return _scale; }
102
101M
    const TabletColumn& get_desc() const { return _desc; }
103
104
27.9M
    int32_t get_unique_id() const {
105
27.9M
        return is_extracted_column() ? parent_unique_id() : unique_id();
106
27.9M
    }
107
108
protected:
109
    TypeInfoPtr _type_info;
110
    TabletColumn _desc;
111
    // unit : byte
112
    // except for strings, other types have fixed lengths
113
    // Note that, the struct type itself has fixed length, but due to
114
    // its number of subfields is a variable, so the actual length of
115
    // a struct field is not fixed.
116
    size_t _length;
117
118
7.98M
    void clone(StorageField* other) const {
119
7.98M
        other->_type_info = clone_type_info(this->_type_info.get());
120
7.98M
        other->_key_coder = this->_key_coder;
121
7.98M
        other->_name = this->_name;
122
7.98M
        other->_index_size = this->_index_size;
123
7.98M
        other->_is_nullable = this->_is_nullable;
124
7.98M
        other->_sub_fields.clear();
125
7.98M
        other->_precision = this->_precision;
126
7.98M
        other->_scale = this->_scale;
127
7.98M
        other->_unique_id = this->_unique_id;
128
7.98M
        other->_parent_unique_id = this->_parent_unique_id;
129
7.98M
        other->_is_extracted_column = this->_is_extracted_column;
130
7.98M
        for (const auto& f : _sub_fields) {
131
0
            StorageField* item = f->clone();
132
0
            other->add_sub_field(std::unique_ptr<StorageField>(item));
133
0
        }
134
7.98M
    }
135
136
private:
137
    // maximum length of Field, unit : bytes
138
    // usually equal to length, except for variable-length strings
139
    const KeyCoder* _key_coder;
140
    std::string _name;
141
    size_t _index_size;
142
    bool _is_nullable;
143
    std::vector<std::unique_ptr<StorageField>> _sub_fields;
144
    int32_t _precision;
145
    int32_t _scale;
146
    int32_t _unique_id;
147
    int32_t _parent_unique_id;
148
    bool _is_extracted_column = false;
149
    PathInDataPtr _path;
150
};
151
152
class MapField : public StorageField {
153
public:
154
127k
    MapField(const TabletColumn& column) : StorageField(column) {}
155
};
156
157
class StructField : public StorageField {
158
public:
159
48.1k
    StructField(const TabletColumn& column) : StorageField(column) {}
160
};
161
162
class ArrayField : public StorageField {
163
public:
164
425k
    ArrayField(const TabletColumn& column) : StorageField(column) {}
165
};
166
167
class CharField : public StorageField {
168
public:
169
228k
    CharField(const TabletColumn& column) : StorageField(column) {}
170
171
2.07k
    CharField* clone() const override {
172
2.07k
        auto* local = new CharField(_desc);
173
2.07k
        StorageField::clone(local);
174
2.07k
        return local;
175
2.07k
    }
176
177
0
    void set_to_max(char* ch) const override {
178
0
        auto slice = reinterpret_cast<Slice*>(ch);
179
0
        slice->size = _length;
180
0
        memset(slice->data, 0xFF, slice->size);
181
0
    }
182
};
183
184
class VarcharField : public StorageField {
185
public:
186
11.2M
    VarcharField(const TabletColumn& column) : StorageField(column) {}
187
188
0
    VarcharField* clone() const override {
189
0
        auto* local = new VarcharField(_desc);
190
0
        StorageField::clone(local);
191
0
        return local;
192
0
    }
193
194
2
    void set_to_max(char* ch) const override {
195
2
        auto slice = reinterpret_cast<Slice*>(ch);
196
2
        slice->size = _length - OLAP_VARCHAR_MAX_BYTES;
197
2
        memset(slice->data, 0xFF, slice->size);
198
2
    }
199
};
200
class StringField : public StorageField {
201
public:
202
58.7M
    StringField(const TabletColumn& column) : StorageField(column) {}
203
204
5.70M
    StringField* clone() const override {
205
5.70M
        auto* local = new StringField(_desc);
206
5.70M
        StorageField::clone(local);
207
5.70M
        return local;
208
5.70M
    }
209
210
0
    void set_to_max(char* ch) const override {
211
0
        auto slice = reinterpret_cast<Slice*>(ch);
212
0
        memset(slice->data, 0xFF, slice->size);
213
0
    }
214
};
215
216
class BitmapAggField : public StorageField {
217
public:
218
25.4k
    BitmapAggField(const TabletColumn& column) : StorageField(column) {}
219
220
0
    BitmapAggField* clone() const override {
221
0
        auto* local = new BitmapAggField(_desc);
222
0
        StorageField::clone(local);
223
0
        return local;
224
0
    }
225
};
226
227
class QuantileStateAggField : public StorageField {
228
public:
229
10.7k
    QuantileStateAggField(const TabletColumn& column) : StorageField(column) {}
230
231
0
    QuantileStateAggField* clone() const override {
232
0
        auto* local = new QuantileStateAggField(_desc);
233
0
        StorageField::clone(local);
234
0
        return local;
235
0
    }
236
};
237
238
class AggStateField : public StorageField {
239
public:
240
6.61k
    AggStateField(const TabletColumn& column) : StorageField(column) {}
241
242
0
    AggStateField* clone() const override {
243
0
        auto* local = new AggStateField(_desc);
244
0
        StorageField::clone(local);
245
0
        return local;
246
0
    }
247
};
248
249
class HllAggField : public StorageField {
250
public:
251
16.0k
    HllAggField(const TabletColumn& column) : StorageField(column) {}
252
253
0
    HllAggField* clone() const override {
254
0
        auto* local = new HllAggField(_desc);
255
0
        StorageField::clone(local);
256
0
        return local;
257
0
    }
258
};
259
260
class StorageFieldFactory {
261
public:
262
105M
    static StorageField* create(const TabletColumn& column) {
263
        // for key column
264
105M
        if (column.is_key()) {
265
50.5M
            switch (column.type()) {
266
38.6k
            case FieldType::OLAP_FIELD_TYPE_CHAR:
267
38.6k
                return new CharField(column);
268
47.6M
            case FieldType::OLAP_FIELD_TYPE_VARCHAR:
269
47.6M
            case FieldType::OLAP_FIELD_TYPE_STRING:
270
47.6M
                return new StringField(column);
271
0
            case FieldType::OLAP_FIELD_TYPE_STRUCT: {
272
0
                auto* local = new StructField(column);
273
0
                for (uint32_t i = 0; i < column.get_subtype_count(); i++) {
274
0
                    std::unique_ptr<StorageField> sub_field(
275
0
                            StorageFieldFactory::create(column.get_sub_column(i)));
276
0
                    local->add_sub_field(std::move(sub_field));
277
0
                }
278
0
                return local;
279
47.6M
            }
280
0
            case FieldType::OLAP_FIELD_TYPE_ARRAY: {
281
0
                std::unique_ptr<StorageField> item_field(
282
0
                        StorageFieldFactory::create(column.get_sub_column(0)));
283
0
                auto* local = new ArrayField(column);
284
0
                local->add_sub_field(std::move(item_field));
285
0
                return local;
286
47.6M
            }
287
0
            case FieldType::OLAP_FIELD_TYPE_MAP: {
288
0
                std::unique_ptr<StorageField> key_field(
289
0
                        StorageFieldFactory::create(column.get_sub_column(0)));
290
0
                std::unique_ptr<StorageField> val_field(
291
0
                        StorageFieldFactory::create(column.get_sub_column(1)));
292
0
                auto* local = new MapField(column);
293
0
                local->add_sub_field(std::move(key_field));
294
0
                local->add_sub_field(std::move(val_field));
295
0
                return local;
296
47.6M
            }
297
2.16k
            case FieldType::OLAP_FIELD_TYPE_DECIMAL:
298
2.16k
                [[fallthrough]];
299
36.8k
            case FieldType::OLAP_FIELD_TYPE_DECIMAL32:
300
36.8k
                [[fallthrough]];
301
43.8k
            case FieldType::OLAP_FIELD_TYPE_DECIMAL64:
302
43.8k
                [[fallthrough]];
303
82.5k
            case FieldType::OLAP_FIELD_TYPE_DECIMAL128I:
304
82.5k
                [[fallthrough]];
305
92.3k
            case FieldType::OLAP_FIELD_TYPE_DECIMAL256:
306
92.3k
                [[fallthrough]];
307
165k
            case FieldType::OLAP_FIELD_TYPE_TIMESTAMPTZ:
308
165k
                [[fallthrough]];
309
277k
            case FieldType::OLAP_FIELD_TYPE_DATETIMEV2: {
310
277k
                StorageField* field = new StorageField(column);
311
277k
                field->set_precision(column.precision());
312
277k
                field->set_scale(column.frac());
313
277k
                return field;
314
165k
            }
315
2.45M
            default:
316
2.45M
                return new StorageField(column);
317
50.5M
            }
318
50.5M
        }
319
320
        // for value column
321
54.5M
        switch (column.aggregation()) {
322
53.7M
        case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE:
323
53.9M
        case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_SUM:
324
53.9M
        case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_MIN:
325
54.0M
        case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_MAX:
326
54.4M
        case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_REPLACE:
327
54.5M
        case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_REPLACE_IF_NOT_NULL:
328
54.5M
            switch (column.type()) {
329
189k
            case FieldType::OLAP_FIELD_TYPE_CHAR:
330
189k
                return new CharField(column);
331
11.3M
            case FieldType::OLAP_FIELD_TYPE_VARCHAR:
332
11.3M
                return new VarcharField(column);
333
6.08M
            case FieldType::OLAP_FIELD_TYPE_STRING:
334
6.08M
                return new StringField(column);
335
48.3k
            case FieldType::OLAP_FIELD_TYPE_STRUCT: {
336
48.3k
                auto* local = new StructField(column);
337
216k
                for (uint32_t i = 0; i < column.get_subtype_count(); i++) {
338
168k
                    std::unique_ptr<StorageField> sub_field(
339
168k
                            StorageFieldFactory::create(column.get_sub_column(i)));
340
168k
                    local->add_sub_field(std::move(sub_field));
341
168k
                }
342
48.3k
                return local;
343
0
            }
344
428k
            case FieldType::OLAP_FIELD_TYPE_ARRAY: {
345
428k
                std::unique_ptr<StorageField> item_field(
346
428k
                        StorageFieldFactory::create(column.get_sub_column(0)));
347
428k
                auto* local = new ArrayField(column);
348
428k
                local->add_sub_field(std::move(item_field));
349
428k
                return local;
350
0
            }
351
128k
            case FieldType::OLAP_FIELD_TYPE_MAP: {
352
128k
                DCHECK(column.get_subtype_count() == 2);
353
128k
                auto* local = new MapField(column);
354
128k
                std::unique_ptr<StorageField> key_field(
355
128k
                        StorageFieldFactory::create(column.get_sub_column(0)));
356
128k
                std::unique_ptr<StorageField> value_field(
357
128k
                        StorageFieldFactory::create(column.get_sub_column(1)));
358
128k
                local->add_sub_field(std::move(key_field));
359
128k
                local->add_sub_field(std::move(value_field));
360
128k
                return local;
361
0
            }
362
5.76k
            case FieldType::OLAP_FIELD_TYPE_DECIMAL:
363
5.76k
                [[fallthrough]];
364
80.8k
            case FieldType::OLAP_FIELD_TYPE_DECIMAL32:
365
80.8k
                [[fallthrough]];
366
326k
            case FieldType::OLAP_FIELD_TYPE_DECIMAL64:
367
326k
                [[fallthrough]];
368
513k
            case FieldType::OLAP_FIELD_TYPE_DECIMAL128I:
369
513k
                [[fallthrough]];
370
524k
            case FieldType::OLAP_FIELD_TYPE_DECIMAL256:
371
524k
                [[fallthrough]];
372
666k
            case FieldType::OLAP_FIELD_TYPE_TIMESTAMPTZ:
373
666k
                [[fallthrough]];
374
6.41M
            case FieldType::OLAP_FIELD_TYPE_DATETIMEV2: {
375
6.41M
                StorageField* field = new StorageField(column);
376
6.41M
                field->set_precision(column.precision());
377
6.41M
                field->set_scale(column.frac());
378
6.41M
                return field;
379
666k
            }
380
30.0M
            default:
381
30.0M
                return new StorageField(column);
382
54.5M
            }
383
16.0k
        case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_HLL_UNION:
384
16.0k
            return new HllAggField(column);
385
25.4k
        case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_BITMAP_UNION:
386
25.4k
            return new BitmapAggField(column);
387
10.7k
        case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_QUANTILE_UNION:
388
10.7k
            return new QuantileStateAggField(column);
389
6.65k
        case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_GENERIC:
390
6.65k
            return new AggStateField(column);
391
0
        case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_UNKNOWN:
392
0
            CHECK(false) << ", value column no agg type";
393
0
            return nullptr;
394
54.5M
        }
395
0
        return nullptr;
396
54.5M
    }
397
398
    static StorageField* create_by_type(const FieldType& type) {
399
        TabletColumn column(FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE, type);
400
        return create(column);
401
    }
402
};
403
#include "common/compile_check_end.h"
404
} // namespace doris