Coverage Report

Created: 2026-05-09 08:31

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/storage/field.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <cstddef>
21
#include <sstream>
22
#include <string>
23
24
#include "core/arena.h"
25
#include "core/value/map_value.h"
26
#include "runtime/collection_value.h"
27
#include "storage/key_coder.h"
28
#include "storage/olap_common.h"
29
#include "storage/olap_define.h"
30
#include "storage/tablet/tablet_schema.h"
31
#include "storage/types.h"
32
#include "storage/utils.h"
33
#include "util/hash_util.hpp"
34
#include "util/json/path_in_data.h"
35
#include "util/slice.h"
36
37
namespace doris {
38
// A Field is used to represent a column in memory format.
39
// User can use this class to access or deal with column data in memory.
40
class StorageField {
41
public:
42
    StorageField(const TabletColumn& column)
43
108M
            : _type_info(get_type_info(&column)),
44
108M
              _desc(column),
45
108M
              _length(column.length()),
46
108M
              _key_coder(get_key_coder(column.type())),
47
108M
              _name(column.name()),
48
108M
              _index_size(column.index_length()),
49
108M
              _is_nullable(column.is_nullable()),
50
108M
              _unique_id(column.unique_id()),
51
108M
              _parent_unique_id(column.parent_unique_id()),
52
108M
              _is_extracted_column(column.is_extracted_column()),
53
108M
              _path(column.path_info_ptr()) {}
54
55
110M
    virtual ~StorageField() = default;
56
57
5.40M
    size_t size() const { return _type_info->size(); }
58
1.51k
    size_t length() const { return _length; }
59
0
    size_t field_size() const { return size() + 1; }
60
666k
    size_t index_size() const { return _index_size; }
61
54.5M
    int32_t unique_id() const { return _unique_id; }
62
69.8k
    int32_t parent_unique_id() const { return _parent_unique_id; }
63
52.9M
    bool is_extracted_column() const { return _is_extracted_column; }
64
82.1M
    const std::string& name() const { return _name; }
65
0
    const PathInDataPtr& path() const { return _path; }
66
67
2.49M
    virtual StorageField* clone() const {
68
2.49M
        auto* local = new StorageField(_desc);
69
2.49M
        this->clone(local);
70
2.49M
        return local;
71
2.49M
    }
72
73
73.8M
    FieldType type() const { return _type_info->type(); }
74
49.5k
    const TypeInfo* type_info() const { return _type_info.get(); }
75
60.2M
    bool is_nullable() const { return _is_nullable; }
76
77
    // similar to `full_encode_ascending`, but only encode part (the first `index_size` bytes) of the value.
78
    // only applicable to string type
79
10.5k
    void encode_ascending(const void* value, std::string* buf) const {
80
10.5k
        _key_coder->encode_ascending(value, _index_size, buf);
81
10.5k
    }
82
83
    // encode the provided `value` into `buf`.
84
11.1M
    void full_encode_ascending(const void* value, std::string* buf) const {
85
11.1M
        _key_coder->full_encode_ascending(value, buf);
86
11.1M
    }
87
88
1.17M
    const KeyCoder* key_coder() const { return _key_coder; }
89
807k
    void add_sub_field(std::unique_ptr<StorageField> sub_field) {
90
807k
        _sub_fields.emplace_back(std::move(sub_field));
91
807k
    }
92
145k
    StorageField* get_sub_field(size_t i) const { return _sub_fields[i].get(); }
93
45.3k
    size_t get_sub_field_count() const { return _sub_fields.size(); }
94
95
6.50M
    void set_precision(int32_t precision) { _precision = precision; }
96
6.50M
    void set_scale(int32_t scale) { _scale = scale; }
97
0
    int32_t get_precision() const { return _precision; }
98
23.5M
    int32_t get_scale() const { return _scale; }
99
96.3M
    const TabletColumn& get_desc() const { return _desc; }
100
101
26.5M
    int32_t get_unique_id() const {
102
26.5M
        return is_extracted_column() ? parent_unique_id() : unique_id();
103
26.5M
    }
104
105
protected:
106
    TypeInfoPtr _type_info;
107
    TabletColumn _desc;
108
    // unit : byte
109
    // except for strings, other types have fixed lengths
110
    // Note that, the struct type itself has fixed length, but due to
111
    // its number of subfields is a variable, so the actual length of
112
    // a struct field is not fixed.
113
    size_t _length;
114
115
8.22M
    void clone(StorageField* other) const {
116
8.22M
        other->_type_info = clone_type_info(this->_type_info.get());
117
8.22M
        other->_key_coder = this->_key_coder;
118
8.22M
        other->_name = this->_name;
119
8.22M
        other->_index_size = this->_index_size;
120
8.22M
        other->_is_nullable = this->_is_nullable;
121
8.22M
        other->_sub_fields.clear();
122
8.22M
        other->_precision = this->_precision;
123
8.22M
        other->_scale = this->_scale;
124
8.22M
        other->_unique_id = this->_unique_id;
125
8.22M
        other->_parent_unique_id = this->_parent_unique_id;
126
8.22M
        other->_is_extracted_column = this->_is_extracted_column;
127
8.22M
        for (const auto& f : _sub_fields) {
128
0
            StorageField* item = f->clone();
129
0
            other->add_sub_field(std::unique_ptr<StorageField>(item));
130
0
        }
131
8.22M
    }
132
133
private:
134
    // maximum length of Field, unit : bytes
135
    // usually equal to length, except for variable-length strings
136
    const KeyCoder* _key_coder;
137
    std::string _name;
138
    size_t _index_size;
139
    bool _is_nullable;
140
    std::vector<std::unique_ptr<StorageField>> _sub_fields;
141
    int32_t _precision;
142
    int32_t _scale;
143
    int32_t _unique_id;
144
    int32_t _parent_unique_id;
145
    bool _is_extracted_column = false;
146
    PathInDataPtr _path;
147
};
148
149
class MapField : public StorageField {
150
public:
151
128k
    MapField(const TabletColumn& column) : StorageField(column) {}
152
};
153
154
class StructField : public StorageField {
155
public:
156
43.6k
    StructField(const TabletColumn& column) : StorageField(column) {}
157
};
158
159
class ArrayField : public StorageField {
160
public:
161
425k
    ArrayField(const TabletColumn& column) : StorageField(column) {}
162
};
163
164
class CharField : public StorageField {
165
public:
166
231k
    CharField(const TabletColumn& column) : StorageField(column) {}
167
168
2.22k
    CharField* clone() const override {
169
2.22k
        auto* local = new CharField(_desc);
170
2.22k
        StorageField::clone(local);
171
2.22k
        return local;
172
2.22k
    }
173
};
174
175
class VarcharField : public StorageField {
176
public:
177
10.8M
    VarcharField(const TabletColumn& column) : StorageField(column) {}
178
179
0
    VarcharField* clone() const override {
180
0
        auto* local = new VarcharField(_desc);
181
0
        StorageField::clone(local);
182
0
        return local;
183
0
    }
184
};
185
class StringField : public StorageField {
186
public:
187
57.0M
    StringField(const TabletColumn& column) : StorageField(column) {}
188
189
5.73M
    StringField* clone() const override {
190
5.73M
        auto* local = new StringField(_desc);
191
5.73M
        StorageField::clone(local);
192
5.73M
        return local;
193
5.73M
    }
194
};
195
196
class BitmapAggField : public StorageField {
197
public:
198
25.8k
    BitmapAggField(const TabletColumn& column) : StorageField(column) {}
199
200
0
    BitmapAggField* clone() const override {
201
0
        auto* local = new BitmapAggField(_desc);
202
0
        StorageField::clone(local);
203
0
        return local;
204
0
    }
205
};
206
207
class QuantileStateAggField : public StorageField {
208
public:
209
10.7k
    QuantileStateAggField(const TabletColumn& column) : StorageField(column) {}
210
211
0
    QuantileStateAggField* clone() const override {
212
0
        auto* local = new QuantileStateAggField(_desc);
213
0
        StorageField::clone(local);
214
0
        return local;
215
0
    }
216
};
217
218
class AggStateField : public StorageField {
219
public:
220
7.15k
    AggStateField(const TabletColumn& column) : StorageField(column) {}
221
222
0
    AggStateField* clone() const override {
223
0
        auto* local = new AggStateField(_desc);
224
0
        StorageField::clone(local);
225
0
        return local;
226
0
    }
227
};
228
229
class HllAggField : public StorageField {
230
public:
231
15.5k
    HllAggField(const TabletColumn& column) : StorageField(column) {}
232
233
0
    HllAggField* clone() const override {
234
0
        auto* local = new HllAggField(_desc);
235
0
        StorageField::clone(local);
236
0
        return local;
237
0
    }
238
};
239
240
class StorageFieldFactory {
241
public:
242
101M
    static StorageField* create(const TabletColumn& column) {
243
        // for key column
244
101M
        if (column.is_key()) {
245
48.9M
            switch (column.type()) {
246
39.4k
            case FieldType::OLAP_FIELD_TYPE_CHAR:
247
39.4k
                return new CharField(column);
248
45.9M
            case FieldType::OLAP_FIELD_TYPE_VARCHAR:
249
45.9M
            case FieldType::OLAP_FIELD_TYPE_STRING:
250
45.9M
                return new StringField(column);
251
0
            case FieldType::OLAP_FIELD_TYPE_STRUCT: {
252
0
                auto* local = new StructField(column);
253
0
                for (uint32_t i = 0; i < column.get_subtype_count(); i++) {
254
0
                    std::unique_ptr<StorageField> sub_field(
255
0
                            StorageFieldFactory::create(column.get_sub_column(i)));
256
0
                    local->add_sub_field(std::move(sub_field));
257
0
                }
258
0
                return local;
259
45.9M
            }
260
0
            case FieldType::OLAP_FIELD_TYPE_ARRAY: {
261
0
                std::unique_ptr<StorageField> item_field(
262
0
                        StorageFieldFactory::create(column.get_sub_column(0)));
263
0
                auto* local = new ArrayField(column);
264
0
                local->add_sub_field(std::move(item_field));
265
0
                return local;
266
45.9M
            }
267
0
            case FieldType::OLAP_FIELD_TYPE_MAP: {
268
0
                std::unique_ptr<StorageField> key_field(
269
0
                        StorageFieldFactory::create(column.get_sub_column(0)));
270
0
                std::unique_ptr<StorageField> val_field(
271
0
                        StorageFieldFactory::create(column.get_sub_column(1)));
272
0
                auto* local = new MapField(column);
273
0
                local->add_sub_field(std::move(key_field));
274
0
                local->add_sub_field(std::move(val_field));
275
0
                return local;
276
45.9M
            }
277
1.16k
            case FieldType::OLAP_FIELD_TYPE_DECIMAL:
278
1.16k
                [[fallthrough]];
279
38.3k
            case FieldType::OLAP_FIELD_TYPE_DECIMAL32:
280
38.3k
                [[fallthrough]];
281
46.0k
            case FieldType::OLAP_FIELD_TYPE_DECIMAL64:
282
46.0k
                [[fallthrough]];
283
90.0k
            case FieldType::OLAP_FIELD_TYPE_DECIMAL128I:
284
90.0k
                [[fallthrough]];
285
96.4k
            case FieldType::OLAP_FIELD_TYPE_DECIMAL256:
286
96.4k
                [[fallthrough]];
287
191k
            case FieldType::OLAP_FIELD_TYPE_TIMESTAMPTZ:
288
191k
                [[fallthrough]];
289
293k
            case FieldType::OLAP_FIELD_TYPE_DATETIMEV2: {
290
293k
                StorageField* field = new StorageField(column);
291
293k
                field->set_precision(column.precision());
292
293k
                field->set_scale(column.frac());
293
293k
                return field;
294
191k
            }
295
2.53M
            default:
296
2.53M
                return new StorageField(column);
297
48.9M
            }
298
48.9M
        }
299
300
        // for value column
301
52.9M
        switch (column.aggregation()) {
302
51.9M
        case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE:
303
52.1M
        case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_SUM:
304
52.1M
        case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_MIN:
305
52.2M
        case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_MAX:
306
52.5M
        case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_REPLACE:
307
52.7M
        case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_REPLACE_IF_NOT_NULL:
308
52.7M
            switch (column.type()) {
309
190k
            case FieldType::OLAP_FIELD_TYPE_CHAR:
310
190k
                return new CharField(column);
311
10.8M
            case FieldType::OLAP_FIELD_TYPE_VARCHAR:
312
10.8M
                return new VarcharField(column);
313
5.77M
            case FieldType::OLAP_FIELD_TYPE_STRING:
314
5.77M
                return new StringField(column);
315
43.6k
            case FieldType::OLAP_FIELD_TYPE_STRUCT: {
316
43.6k
                auto* local = new StructField(column);
317
167k
                for (uint32_t i = 0; i < column.get_subtype_count(); i++) {
318
123k
                    std::unique_ptr<StorageField> sub_field(
319
123k
                            StorageFieldFactory::create(column.get_sub_column(i)));
320
123k
                    local->add_sub_field(std::move(sub_field));
321
123k
                }
322
43.6k
                return local;
323
0
            }
324
427k
            case FieldType::OLAP_FIELD_TYPE_ARRAY: {
325
427k
                std::unique_ptr<StorageField> item_field(
326
427k
                        StorageFieldFactory::create(column.get_sub_column(0)));
327
427k
                auto* local = new ArrayField(column);
328
427k
                local->add_sub_field(std::move(item_field));
329
427k
                return local;
330
0
            }
331
128k
            case FieldType::OLAP_FIELD_TYPE_MAP: {
332
128k
                DCHECK(column.get_subtype_count() == 2);
333
128k
                auto* local = new MapField(column);
334
128k
                std::unique_ptr<StorageField> key_field(
335
128k
                        StorageFieldFactory::create(column.get_sub_column(0)));
336
128k
                std::unique_ptr<StorageField> value_field(
337
128k
                        StorageFieldFactory::create(column.get_sub_column(1)));
338
128k
                local->add_sub_field(std::move(key_field));
339
128k
                local->add_sub_field(std::move(value_field));
340
128k
                return local;
341
0
            }
342
3.67k
            case FieldType::OLAP_FIELD_TYPE_DECIMAL:
343
3.67k
                [[fallthrough]];
344
95.8k
            case FieldType::OLAP_FIELD_TYPE_DECIMAL32:
345
95.8k
                [[fallthrough]];
346
247k
            case FieldType::OLAP_FIELD_TYPE_DECIMAL64:
347
247k
                [[fallthrough]];
348
413k
            case FieldType::OLAP_FIELD_TYPE_DECIMAL128I:
349
413k
                [[fallthrough]];
350
422k
            case FieldType::OLAP_FIELD_TYPE_DECIMAL256:
351
422k
                [[fallthrough]];
352
573k
            case FieldType::OLAP_FIELD_TYPE_TIMESTAMPTZ:
353
573k
                [[fallthrough]];
354
6.21M
            case FieldType::OLAP_FIELD_TYPE_DATETIMEV2: {
355
6.21M
                StorageField* field = new StorageField(column);
356
6.21M
                field->set_precision(column.precision());
357
6.21M
                field->set_scale(column.frac());
358
6.21M
                return field;
359
573k
            }
360
29.1M
            default:
361
29.1M
                return new StorageField(column);
362
52.7M
            }
363
15.5k
        case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_HLL_UNION:
364
15.5k
            return new HllAggField(column);
365
25.8k
        case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_BITMAP_UNION:
366
25.8k
            return new BitmapAggField(column);
367
10.7k
        case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_QUANTILE_UNION:
368
10.7k
            return new QuantileStateAggField(column);
369
7.18k
        case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_GENERIC:
370
7.18k
            return new AggStateField(column);
371
0
        case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_UNKNOWN:
372
0
            CHECK(false) << ", value column no agg type";
373
0
            return nullptr;
374
52.9M
        }
375
0
        return nullptr;
376
52.9M
    }
377
378
    static StorageField* create_by_type(const FieldType& type) {
379
        TabletColumn column(FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE, type);
380
        return create(column);
381
    }
382
};
383
} // namespace doris