Coverage Report

Created: 2026-05-13 13:59

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/storage/field.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <cstddef>
21
#include <sstream>
22
#include <string>
23
24
#include "core/arena.h"
25
#include "core/value/map_value.h"
26
#include "runtime/collection_value.h"
27
#include "storage/key_coder.h"
28
#include "storage/olap_common.h"
29
#include "storage/olap_define.h"
30
#include "storage/tablet/tablet_schema.h"
31
#include "storage/types.h"
32
#include "storage/utils.h"
33
#include "util/hash_util.hpp"
34
#include "util/json/path_in_data.h"
35
#include "util/slice.h"
36
37
namespace doris {
38
// A Field is used to represent a column in memory format.
39
// User can use this class to access or deal with column data in memory.
40
class StorageField {
41
public:
42
    StorageField(const TabletColumn& column)
43
104M
            : _type(column.type()),
44
104M
              _desc(column),
45
104M
              _length(column.length()),
46
104M
              _key_coder(get_key_coder(column.type())),
47
104M
              _name(column.name()),
48
104M
              _index_size(column.index_length()),
49
104M
              _is_nullable(column.is_nullable()),
50
104M
              _unique_id(column.unique_id()),
51
104M
              _parent_unique_id(column.parent_unique_id()),
52
104M
              _is_extracted_column(column.is_extracted_column()),
53
104M
              _path(column.path_info_ptr()) {}
54
55
105M
    virtual ~StorageField() = default;
56
57
5.72M
    size_t size() const { return field_type_size(_type); }
58
1.07k
    size_t length() const { return _length; }
59
0
    size_t field_size() const { return size() + 1; }
60
0
    size_t index_size() const { return _index_size; }
61
55.3M
    int32_t unique_id() const { return _unique_id; }
62
53.5k
    int32_t parent_unique_id() const { return _parent_unique_id; }
63
53.7M
    bool is_extracted_column() const { return _is_extracted_column; }
64
82.6M
    const std::string& name() const { return _name; }
65
0
    const PathInDataPtr& path() const { return _path; }
66
67
2.65M
    virtual StorageField* clone() const {
68
2.65M
        auto* local = new StorageField(_desc);
69
2.65M
        this->clone(local);
70
2.65M
        return local;
71
2.65M
    }
72
73
75.0M
    FieldType type() const { return _type; }
74
61.1M
    bool is_nullable() const { return _is_nullable; }
75
76
    // similar to `full_encode_ascending`, but only encode part (the first `index_size` bytes) of the value.
77
    // only applicable to string type
78
10.9k
    void encode_ascending(const void* value, std::string* buf) const {
79
10.9k
        _key_coder->encode_ascending(value, _index_size, buf);
80
10.9k
    }
81
82
    // encode the provided `value` into `buf`.
83
11.2M
    void full_encode_ascending(const void* value, std::string* buf) const {
84
11.2M
        _key_coder->full_encode_ascending(value, buf);
85
11.2M
    }
86
87
1.17M
    const KeyCoder* key_coder() const { return _key_coder; }
88
726k
    void add_sub_field(std::unique_ptr<StorageField> sub_field) {
89
726k
        _sub_fields.emplace_back(std::move(sub_field));
90
726k
    }
91
138k
    StorageField* get_sub_field(size_t i) const { return _sub_fields[i].get(); }
92
45.3k
    size_t get_sub_field_count() const { return _sub_fields.size(); }
93
94
6.14M
    void set_precision(int32_t precision) { _precision = precision; }
95
6.14M
    void set_scale(int32_t scale) { _scale = scale; }
96
0
    int32_t get_precision() const { return _precision; }
97
22.7M
    int32_t get_scale() const { return _scale; }
98
97.7M
    const TabletColumn& get_desc() const { return _desc; }
99
100
26.9M
    int32_t get_unique_id() const {
101
26.9M
        return is_extracted_column() ? parent_unique_id() : unique_id();
102
26.9M
    }
103
104
protected:
105
    FieldType _type;
106
    TabletColumn _desc;
107
    // unit : byte
108
    // except for strings, other types have fixed lengths
109
    // Note that, the struct type itself has fixed length, but due to
110
    // its number of subfields is a variable, so the actual length of
111
    // a struct field is not fixed.
112
    size_t _length;
113
114
8.32M
    void clone(StorageField* other) const {
115
8.32M
        other->_type = this->_type;
116
8.32M
        other->_key_coder = this->_key_coder;
117
8.32M
        other->_name = this->_name;
118
8.32M
        other->_index_size = this->_index_size;
119
8.32M
        other->_is_nullable = this->_is_nullable;
120
8.32M
        other->_sub_fields.clear();
121
8.32M
        other->_precision = this->_precision;
122
8.32M
        other->_scale = this->_scale;
123
8.32M
        other->_unique_id = this->_unique_id;
124
8.32M
        other->_parent_unique_id = this->_parent_unique_id;
125
8.32M
        other->_is_extracted_column = this->_is_extracted_column;
126
8.32M
        for (const auto& f : _sub_fields) {
127
0
            StorageField* item = f->clone();
128
0
            other->add_sub_field(std::unique_ptr<StorageField>(item));
129
0
        }
130
8.32M
    }
131
132
private:
133
    // maximum length of Field, unit : bytes
134
    // usually equal to length, except for variable-length strings
135
    const KeyCoder* _key_coder;
136
    std::string _name;
137
    size_t _index_size;
138
    bool _is_nullable;
139
    std::vector<std::unique_ptr<StorageField>> _sub_fields;
140
    int32_t _precision;
141
    int32_t _scale;
142
    int32_t _unique_id;
143
    int32_t _parent_unique_id;
144
    bool _is_extracted_column = false;
145
    PathInDataPtr _path;
146
};
147
148
class MapField : public StorageField {
149
public:
150
116k
    MapField(const TabletColumn& column) : StorageField(column) {}
151
};
152
153
class StructField : public StorageField {
154
public:
155
56.9k
    StructField(const TabletColumn& column) : StorageField(column) {}
156
};
157
158
class ArrayField : public StorageField {
159
public:
160
312k
    ArrayField(const TabletColumn& column) : StorageField(column) {}
161
};
162
163
class CharField : public StorageField {
164
public:
165
201k
    CharField(const TabletColumn& column) : StorageField(column) {}
166
167
1.88k
    CharField* clone() const override {
168
1.88k
        auto* local = new CharField(_desc);
169
1.88k
        StorageField::clone(local);
170
1.88k
        return local;
171
1.88k
    }
172
};
173
174
class VarcharField : public StorageField {
175
public:
176
10.3M
    VarcharField(const TabletColumn& column) : StorageField(column) {}
177
178
0
    VarcharField* clone() const override {
179
0
        auto* local = new VarcharField(_desc);
180
0
        StorageField::clone(local);
181
0
        return local;
182
0
    }
183
};
184
class StringField : public StorageField {
185
public:
186
55.2M
    StringField(const TabletColumn& column) : StorageField(column) {}
187
188
5.67M
    StringField* clone() const override {
189
5.67M
        auto* local = new StringField(_desc);
190
5.67M
        StorageField::clone(local);
191
5.67M
        return local;
192
5.67M
    }
193
};
194
195
class BitmapAggField : public StorageField {
196
public:
197
25.2k
    BitmapAggField(const TabletColumn& column) : StorageField(column) {}
198
199
0
    BitmapAggField* clone() const override {
200
0
        auto* local = new BitmapAggField(_desc);
201
0
        StorageField::clone(local);
202
0
        return local;
203
0
    }
204
};
205
206
class QuantileStateAggField : public StorageField {
207
public:
208
10.7k
    QuantileStateAggField(const TabletColumn& column) : StorageField(column) {}
209
210
0
    QuantileStateAggField* clone() const override {
211
0
        auto* local = new QuantileStateAggField(_desc);
212
0
        StorageField::clone(local);
213
0
        return local;
214
0
    }
215
};
216
217
class AggStateField : public StorageField {
218
public:
219
7.06k
    AggStateField(const TabletColumn& column) : StorageField(column) {}
220
221
0
    AggStateField* clone() const override {
222
0
        auto* local = new AggStateField(_desc);
223
0
        StorageField::clone(local);
224
0
        return local;
225
0
    }
226
};
227
228
class HllAggField : public StorageField {
229
public:
230
15.5k
    HllAggField(const TabletColumn& column) : StorageField(column) {}
231
232
0
    HllAggField* clone() const override {
233
0
        auto* local = new HllAggField(_desc);
234
0
        StorageField::clone(local);
235
0
        return local;
236
0
    }
237
};
238
239
class StorageFieldFactory {
240
public:
241
97.0M
    static StorageField* create(const TabletColumn& column) {
242
        // for key column
243
97.0M
        if (column.is_key()) {
244
47.0M
            switch (column.type()) {
245
38.0k
            case FieldType::OLAP_FIELD_TYPE_CHAR:
246
38.0k
                return new CharField(column);
247
44.2M
            case FieldType::OLAP_FIELD_TYPE_VARCHAR:
248
44.2M
            case FieldType::OLAP_FIELD_TYPE_STRING:
249
44.2M
                return new StringField(column);
250
0
            case FieldType::OLAP_FIELD_TYPE_STRUCT: {
251
0
                auto* local = new StructField(column);
252
0
                for (uint32_t i = 0; i < column.get_subtype_count(); i++) {
253
0
                    std::unique_ptr<StorageField> sub_field(
254
0
                            StorageFieldFactory::create(column.get_sub_column(i)));
255
0
                    local->add_sub_field(std::move(sub_field));
256
0
                }
257
0
                return local;
258
44.2M
            }
259
0
            case FieldType::OLAP_FIELD_TYPE_ARRAY: {
260
0
                std::unique_ptr<StorageField> item_field(
261
0
                        StorageFieldFactory::create(column.get_sub_column(0)));
262
0
                auto* local = new ArrayField(column);
263
0
                local->add_sub_field(std::move(item_field));
264
0
                return local;
265
44.2M
            }
266
0
            case FieldType::OLAP_FIELD_TYPE_MAP: {
267
0
                std::unique_ptr<StorageField> key_field(
268
0
                        StorageFieldFactory::create(column.get_sub_column(0)));
269
0
                std::unique_ptr<StorageField> val_field(
270
0
                        StorageFieldFactory::create(column.get_sub_column(1)));
271
0
                auto* local = new MapField(column);
272
0
                local->add_sub_field(std::move(key_field));
273
0
                local->add_sub_field(std::move(val_field));
274
0
                return local;
275
44.2M
            }
276
2.37k
            case FieldType::OLAP_FIELD_TYPE_DECIMAL:
277
2.37k
                [[fallthrough]];
278
38.4k
            case FieldType::OLAP_FIELD_TYPE_DECIMAL32:
279
38.4k
                [[fallthrough]];
280
45.5k
            case FieldType::OLAP_FIELD_TYPE_DECIMAL64:
281
45.5k
                [[fallthrough]];
282
88.1k
            case FieldType::OLAP_FIELD_TYPE_DECIMAL128I:
283
88.1k
                [[fallthrough]];
284
94.1k
            case FieldType::OLAP_FIELD_TYPE_DECIMAL256:
285
94.1k
                [[fallthrough]];
286
173k
            case FieldType::OLAP_FIELD_TYPE_TIMESTAMPTZ:
287
173k
                [[fallthrough]];
288
286k
            case FieldType::OLAP_FIELD_TYPE_DATETIMEV2: {
289
286k
                StorageField* field = new StorageField(column);
290
286k
                field->set_precision(column.precision());
291
286k
                field->set_scale(column.frac());
292
286k
                return field;
293
173k
            }
294
2.45M
            default:
295
2.45M
                return new StorageField(column);
296
47.0M
            }
297
47.0M
        }
298
299
        // for value column
300
50.0M
        switch (column.aggregation()) {
301
49.1M
        case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE:
302
49.3M
        case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_SUM:
303
49.3M
        case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_MIN:
304
49.4M
        case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_MAX:
305
49.8M
        case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_REPLACE:
306
49.9M
        case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_REPLACE_IF_NOT_NULL:
307
49.9M
            switch (column.type()) {
308
162k
            case FieldType::OLAP_FIELD_TYPE_CHAR:
309
162k
                return new CharField(column);
310
10.3M
            case FieldType::OLAP_FIELD_TYPE_VARCHAR:
311
10.3M
                return new VarcharField(column);
312
5.59M
            case FieldType::OLAP_FIELD_TYPE_STRING:
313
5.59M
                return new StringField(column);
314
57.0k
            case FieldType::OLAP_FIELD_TYPE_STRUCT: {
315
57.0k
                auto* local = new StructField(column);
316
236k
                for (uint32_t i = 0; i < column.get_subtype_count(); i++) {
317
179k
                    std::unique_ptr<StorageField> sub_field(
318
179k
                            StorageFieldFactory::create(column.get_sub_column(i)));
319
179k
                    local->add_sub_field(std::move(sub_field));
320
179k
                }
321
57.0k
                return local;
322
0
            }
323
314k
            case FieldType::OLAP_FIELD_TYPE_ARRAY: {
324
314k
                std::unique_ptr<StorageField> item_field(
325
314k
                        StorageFieldFactory::create(column.get_sub_column(0)));
326
314k
                auto* local = new ArrayField(column);
327
314k
                local->add_sub_field(std::move(item_field));
328
314k
                return local;
329
0
            }
330
116k
            case FieldType::OLAP_FIELD_TYPE_MAP: {
331
116k
                DCHECK(column.get_subtype_count() == 2);
332
116k
                auto* local = new MapField(column);
333
116k
                std::unique_ptr<StorageField> key_field(
334
116k
                        StorageFieldFactory::create(column.get_sub_column(0)));
335
116k
                std::unique_ptr<StorageField> value_field(
336
116k
                        StorageFieldFactory::create(column.get_sub_column(1)));
337
116k
                local->add_sub_field(std::move(key_field));
338
116k
                local->add_sub_field(std::move(value_field));
339
116k
                return local;
340
0
            }
341
4.56k
            case FieldType::OLAP_FIELD_TYPE_DECIMAL:
342
4.56k
                [[fallthrough]];
343
68.2k
            case FieldType::OLAP_FIELD_TYPE_DECIMAL32:
344
68.2k
                [[fallthrough]];
345
301k
            case FieldType::OLAP_FIELD_TYPE_DECIMAL64:
346
301k
                [[fallthrough]];
347
464k
            case FieldType::OLAP_FIELD_TYPE_DECIMAL128I:
348
464k
                [[fallthrough]];
349
471k
            case FieldType::OLAP_FIELD_TYPE_DECIMAL256:
350
471k
                [[fallthrough]];
351
614k
            case FieldType::OLAP_FIELD_TYPE_TIMESTAMPTZ:
352
614k
                [[fallthrough]];
353
5.85M
            case FieldType::OLAP_FIELD_TYPE_DATETIMEV2: {
354
5.85M
                StorageField* field = new StorageField(column);
355
5.85M
                field->set_precision(column.precision());
356
5.85M
                field->set_scale(column.frac());
357
5.85M
                return field;
358
614k
            }
359
27.5M
            default:
360
27.5M
                return new StorageField(column);
361
49.9M
            }
362
15.5k
        case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_HLL_UNION:
363
15.5k
            return new HllAggField(column);
364
25.3k
        case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_BITMAP_UNION:
365
25.3k
            return new BitmapAggField(column);
366
10.7k
        case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_QUANTILE_UNION:
367
10.7k
            return new QuantileStateAggField(column);
368
7.12k
        case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_GENERIC:
369
7.12k
            return new AggStateField(column);
370
0
        case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_UNKNOWN:
371
0
            CHECK(false) << ", value column no agg type";
372
0
            return nullptr;
373
50.0M
        }
374
0
        return nullptr;
375
50.0M
    }
376
377
    static StorageField* create_by_type(const FieldType& type) {
378
        TabletColumn column(FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE, type);
379
        return create(column);
380
    }
381
};
382
} // namespace doris