Coverage Report

Created: 2026-04-10 04:10

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/storage/field.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <cstddef>
21
#include <sstream>
22
#include <string>
23
24
#include "core/arena.h"
25
#include "core/value/map_value.h"
26
#include "runtime/collection_value.h"
27
#include "storage/key_coder.h"
28
#include "storage/olap_common.h"
29
#include "storage/olap_define.h"
30
#include "storage/tablet/tablet_schema.h"
31
#include "storage/types.h"
32
#include "storage/utils.h"
33
#include "util/hash_util.hpp"
34
#include "util/json/path_in_data.h"
35
#include "util/slice.h"
36
37
namespace doris {
38
// A Field is used to represent a column in memory format.
39
// User can use this class to access or deal with column data in memory.
40
class StorageField {
41
public:
42
    StorageField(const TabletColumn& column)
43
82.5M
            : _type_info(get_type_info(&column)),
44
82.5M
              _desc(column),
45
82.5M
              _length(column.length()),
46
82.5M
              _key_coder(get_key_coder(column.type())),
47
82.5M
              _name(column.name()),
48
82.5M
              _index_size(column.index_length()),
49
82.5M
              _is_nullable(column.is_nullable()),
50
82.5M
              _unique_id(column.unique_id()),
51
82.5M
              _parent_unique_id(column.parent_unique_id()),
52
82.5M
              _is_extracted_column(column.is_extracted_column()),
53
82.5M
              _path(column.path_info_ptr()) {}
54
55
82.9M
    virtual ~StorageField() = default;
56
57
5.75M
    size_t size() const { return _type_info->size(); }
58
1.11k
    size_t length() const { return _length; }
59
0
    size_t field_size() const { return size() + 1; }
60
0
    size_t index_size() const { return _index_size; }
61
38.3M
    int32_t unique_id() const { return _unique_id; }
62
94.7k
    int32_t parent_unique_id() const { return _parent_unique_id; }
63
37.2M
    bool is_extracted_column() const { return _is_extracted_column; }
64
57.8M
    const std::string& name() const { return _name; }
65
0
    const PathInDataPtr& path() const { return _path; }
66
67
0
    virtual void set_to_max(char* buf) const { return _type_info->set_to_max(buf); }
68
69
2
    virtual void set_to_min(char* buf) const { return _type_info->set_to_min(buf); }
70
71
2.53M
    virtual StorageField* clone() const {
72
2.53M
        auto* local = new StorageField(_desc);
73
2.53M
        this->clone(local);
74
2.53M
        return local;
75
2.53M
    }
76
77
53.2M
    FieldType type() const { return _type_info->type(); }
78
48.0k
    const TypeInfo* type_info() const { return _type_info.get(); }
79
43.3M
    bool is_nullable() const { return _is_nullable; }
80
81
    // similar to `full_encode_ascending`, but only encode part (the first `index_size` bytes) of the value.
82
    // only applicable to string type
83
636k
    void encode_ascending(const void* value, std::string* buf) const {
84
636k
        _key_coder->encode_ascending(value, _index_size, buf);
85
636k
    }
86
87
    // encode the provided `value` into `buf`.
88
8.32M
    void full_encode_ascending(const void* value, std::string* buf) const {
89
8.32M
        _key_coder->full_encode_ascending(value, buf);
90
8.32M
    }
91
727k
    void add_sub_field(std::unique_ptr<StorageField> sub_field) {
92
727k
        _sub_fields.emplace_back(std::move(sub_field));
93
727k
    }
94
138k
    StorageField* get_sub_field(size_t i) const { return _sub_fields[i].get(); }
95
42.5k
    size_t get_sub_field_count() const { return _sub_fields.size(); }
96
97
5.05M
    void set_precision(int32_t precision) { _precision = precision; }
98
5.05M
    void set_scale(int32_t scale) { _scale = scale; }
99
0
    int32_t get_precision() const { return _precision; }
100
18.7M
    int32_t get_scale() const { return _scale; }
101
69.0M
    const TabletColumn& get_desc() const { return _desc; }
102
103
18.6M
    int32_t get_unique_id() const {
104
18.6M
        return is_extracted_column() ? parent_unique_id() : unique_id();
105
18.6M
    }
106
107
protected:
108
    TypeInfoPtr _type_info;
109
    TabletColumn _desc;
110
    // unit : byte
111
    // except for strings, other types have fixed lengths
112
    // Note that, the struct type itself has fixed length, but due to
113
    // its number of subfields is a variable, so the actual length of
114
    // a struct field is not fixed.
115
    size_t _length;
116
117
5.81M
    void clone(StorageField* other) const {
118
5.81M
        other->_type_info = clone_type_info(this->_type_info.get());
119
5.81M
        other->_key_coder = this->_key_coder;
120
5.81M
        other->_name = this->_name;
121
5.81M
        other->_index_size = this->_index_size;
122
5.81M
        other->_is_nullable = this->_is_nullable;
123
5.81M
        other->_sub_fields.clear();
124
5.81M
        other->_precision = this->_precision;
125
5.81M
        other->_scale = this->_scale;
126
5.81M
        other->_unique_id = this->_unique_id;
127
5.81M
        other->_parent_unique_id = this->_parent_unique_id;
128
5.81M
        other->_is_extracted_column = this->_is_extracted_column;
129
5.81M
        for (const auto& f : _sub_fields) {
130
0
            StorageField* item = f->clone();
131
0
            other->add_sub_field(std::unique_ptr<StorageField>(item));
132
0
        }
133
5.81M
    }
134
135
private:
136
    // maximum length of Field, unit : bytes
137
    // usually equal to length, except for variable-length strings
138
    const KeyCoder* _key_coder;
139
    std::string _name;
140
    size_t _index_size;
141
    bool _is_nullable;
142
    std::vector<std::unique_ptr<StorageField>> _sub_fields;
143
    int32_t _precision;
144
    int32_t _scale;
145
    int32_t _unique_id;
146
    int32_t _parent_unique_id;
147
    bool _is_extracted_column = false;
148
    PathInDataPtr _path;
149
};
150
151
class MapField : public StorageField {
152
public:
153
106k
    MapField(const TabletColumn& column) : StorageField(column) {}
154
};
155
156
class StructField : public StorageField {
157
public:
158
39.9k
    StructField(const TabletColumn& column) : StorageField(column) {}
159
};
160
161
class ArrayField : public StorageField {
162
public:
163
390k
    ArrayField(const TabletColumn& column) : StorageField(column) {}
164
};
165
166
class CharField : public StorageField {
167
public:
168
232k
    CharField(const TabletColumn& column) : StorageField(column) {}
169
170
1.62k
    CharField* clone() const override {
171
1.62k
        auto* local = new CharField(_desc);
172
1.62k
        StorageField::clone(local);
173
1.62k
        return local;
174
1.62k
    }
175
176
0
    void set_to_max(char* ch) const override {
177
0
        auto slice = reinterpret_cast<Slice*>(ch);
178
0
        slice->size = _length;
179
0
        memset(slice->data, 0xFF, slice->size);
180
0
    }
181
};
182
183
class VarcharField : public StorageField {
184
public:
185
8.25M
    VarcharField(const TabletColumn& column) : StorageField(column) {}
186
187
0
    VarcharField* clone() const override {
188
0
        auto* local = new VarcharField(_desc);
189
0
        StorageField::clone(local);
190
0
        return local;
191
0
    }
192
193
2
    void set_to_max(char* ch) const override {
194
2
        auto slice = reinterpret_cast<Slice*>(ch);
195
2
        slice->size = _length - OLAP_VARCHAR_MAX_BYTES;
196
2
        memset(slice->data, 0xFF, slice->size);
197
2
    }
198
};
199
class StringField : public StorageField {
200
public:
201
41.2M
    StringField(const TabletColumn& column) : StorageField(column) {}
202
203
3.28M
    StringField* clone() const override {
204
3.28M
        auto* local = new StringField(_desc);
205
3.28M
        StorageField::clone(local);
206
3.28M
        return local;
207
3.28M
    }
208
209
0
    void set_to_max(char* ch) const override {
210
0
        auto slice = reinterpret_cast<Slice*>(ch);
211
0
        memset(slice->data, 0xFF, slice->size);
212
0
    }
213
};
214
215
class BitmapAggField : public StorageField {
216
public:
217
25.5k
    BitmapAggField(const TabletColumn& column) : StorageField(column) {}
218
219
0
    BitmapAggField* clone() const override {
220
0
        auto* local = new BitmapAggField(_desc);
221
0
        StorageField::clone(local);
222
0
        return local;
223
0
    }
224
};
225
226
class QuantileStateAggField : public StorageField {
227
public:
228
10.6k
    QuantileStateAggField(const TabletColumn& column) : StorageField(column) {}
229
230
0
    QuantileStateAggField* clone() const override {
231
0
        auto* local = new QuantileStateAggField(_desc);
232
0
        StorageField::clone(local);
233
0
        return local;
234
0
    }
235
};
236
237
class AggStateField : public StorageField {
238
public:
239
6.30k
    AggStateField(const TabletColumn& column) : StorageField(column) {}
240
241
0
    AggStateField* clone() const override {
242
0
        auto* local = new AggStateField(_desc);
243
0
        StorageField::clone(local);
244
0
        return local;
245
0
    }
246
};
247
248
class HllAggField : public StorageField {
249
public:
250
15.2k
    HllAggField(const TabletColumn& column) : StorageField(column) {}
251
252
0
    HllAggField* clone() const override {
253
0
        auto* local = new HllAggField(_desc);
254
0
        StorageField::clone(local);
255
0
        return local;
256
0
    }
257
};
258
259
class StorageFieldFactory {
260
public:
261
77.2M
    static StorageField* create(const TabletColumn& column) {
262
        // for key column
263
77.2M
        if (column.is_key()) {
264
36.5M
            switch (column.type()) {
265
35.7k
            case FieldType::OLAP_FIELD_TYPE_CHAR:
266
35.7k
                return new CharField(column);
267
33.8M
            case FieldType::OLAP_FIELD_TYPE_VARCHAR:
268
33.8M
            case FieldType::OLAP_FIELD_TYPE_STRING:
269
33.8M
                return new StringField(column);
270
0
            case FieldType::OLAP_FIELD_TYPE_STRUCT: {
271
0
                auto* local = new StructField(column);
272
0
                for (uint32_t i = 0; i < column.get_subtype_count(); i++) {
273
0
                    std::unique_ptr<StorageField> sub_field(
274
0
                            StorageFieldFactory::create(column.get_sub_column(i)));
275
0
                    local->add_sub_field(std::move(sub_field));
276
0
                }
277
0
                return local;
278
33.8M
            }
279
0
            case FieldType::OLAP_FIELD_TYPE_ARRAY: {
280
0
                std::unique_ptr<StorageField> item_field(
281
0
                        StorageFieldFactory::create(column.get_sub_column(0)));
282
0
                auto* local = new ArrayField(column);
283
0
                local->add_sub_field(std::move(item_field));
284
0
                return local;
285
33.8M
            }
286
0
            case FieldType::OLAP_FIELD_TYPE_MAP: {
287
0
                std::unique_ptr<StorageField> key_field(
288
0
                        StorageFieldFactory::create(column.get_sub_column(0)));
289
0
                std::unique_ptr<StorageField> val_field(
290
0
                        StorageFieldFactory::create(column.get_sub_column(1)));
291
0
                auto* local = new MapField(column);
292
0
                local->add_sub_field(std::move(key_field));
293
0
                local->add_sub_field(std::move(val_field));
294
0
                return local;
295
33.8M
            }
296
4.10k
            case FieldType::OLAP_FIELD_TYPE_DECIMAL:
297
4.10k
                [[fallthrough]];
298
38.0k
            case FieldType::OLAP_FIELD_TYPE_DECIMAL32:
299
38.0k
                [[fallthrough]];
300
46.4k
            case FieldType::OLAP_FIELD_TYPE_DECIMAL64:
301
46.4k
                [[fallthrough]];
302
92.6k
            case FieldType::OLAP_FIELD_TYPE_DECIMAL128I:
303
92.6k
                [[fallthrough]];
304
99.4k
            case FieldType::OLAP_FIELD_TYPE_DECIMAL256:
305
99.4k
                [[fallthrough]];
306
176k
            case FieldType::OLAP_FIELD_TYPE_TIMESTAMPTZ:
307
176k
                [[fallthrough]];
308
281k
            case FieldType::OLAP_FIELD_TYPE_DATETIMEV2: {
309
281k
                StorageField* field = new StorageField(column);
310
281k
                field->set_precision(column.precision());
311
281k
                field->set_scale(column.frac());
312
281k
                return field;
313
176k
            }
314
2.43M
            default:
315
2.43M
                return new StorageField(column);
316
36.5M
            }
317
36.5M
        }
318
319
        // for value column
320
40.6M
        switch (column.aggregation()) {
321
39.7M
        case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE:
322
39.9M
        case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_SUM:
323
39.9M
        case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_MIN:
324
40.0M
        case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_MAX:
325
40.4M
        case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_REPLACE:
326
40.5M
        case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_REPLACE_IF_NOT_NULL:
327
40.5M
            switch (column.type()) {
328
195k
            case FieldType::OLAP_FIELD_TYPE_CHAR:
329
195k
                return new CharField(column);
330
8.27M
            case FieldType::OLAP_FIELD_TYPE_VARCHAR:
331
8.27M
                return new VarcharField(column);
332
4.39M
            case FieldType::OLAP_FIELD_TYPE_STRING:
333
4.39M
                return new StringField(column);
334
40.0k
            case FieldType::OLAP_FIELD_TYPE_STRUCT: {
335
40.0k
                auto* local = new StructField(column);
336
160k
                for (uint32_t i = 0; i < column.get_subtype_count(); i++) {
337
120k
                    std::unique_ptr<StorageField> sub_field(
338
120k
                            StorageFieldFactory::create(column.get_sub_column(i)));
339
120k
                    local->add_sub_field(std::move(sub_field));
340
120k
                }
341
40.0k
                return local;
342
0
            }
343
392k
            case FieldType::OLAP_FIELD_TYPE_ARRAY: {
344
392k
                std::unique_ptr<StorageField> item_field(
345
392k
                        StorageFieldFactory::create(column.get_sub_column(0)));
346
392k
                auto* local = new ArrayField(column);
347
392k
                local->add_sub_field(std::move(item_field));
348
392k
                return local;
349
0
            }
350
107k
            case FieldType::OLAP_FIELD_TYPE_MAP: {
351
107k
                DCHECK(column.get_subtype_count() == 2);
352
107k
                auto* local = new MapField(column);
353
107k
                std::unique_ptr<StorageField> key_field(
354
107k
                        StorageFieldFactory::create(column.get_sub_column(0)));
355
107k
                std::unique_ptr<StorageField> value_field(
356
107k
                        StorageFieldFactory::create(column.get_sub_column(1)));
357
107k
                local->add_sub_field(std::move(key_field));
358
107k
                local->add_sub_field(std::move(value_field));
359
107k
                return local;
360
0
            }
361
6.02k
            case FieldType::OLAP_FIELD_TYPE_DECIMAL:
362
6.02k
                [[fallthrough]];
363
80.0k
            case FieldType::OLAP_FIELD_TYPE_DECIMAL32:
364
80.0k
                [[fallthrough]];
365
230k
            case FieldType::OLAP_FIELD_TYPE_DECIMAL64:
366
230k
                [[fallthrough]];
367
401k
            case FieldType::OLAP_FIELD_TYPE_DECIMAL128I:
368
401k
                [[fallthrough]];
369
411k
            case FieldType::OLAP_FIELD_TYPE_DECIMAL256:
370
411k
                [[fallthrough]];
371
553k
            case FieldType::OLAP_FIELD_TYPE_TIMESTAMPTZ:
372
553k
                [[fallthrough]];
373
4.77M
            case FieldType::OLAP_FIELD_TYPE_DATETIMEV2: {
374
4.77M
                StorageField* field = new StorageField(column);
375
4.77M
                field->set_precision(column.precision());
376
4.77M
                field->set_scale(column.frac());
377
4.77M
                return field;
378
553k
            }
379
22.4M
            default:
380
22.4M
                return new StorageField(column);
381
40.5M
            }
382
15.2k
        case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_HLL_UNION:
383
15.2k
            return new HllAggField(column);
384
25.5k
        case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_BITMAP_UNION:
385
25.5k
            return new BitmapAggField(column);
386
10.7k
        case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_QUANTILE_UNION:
387
10.7k
            return new QuantileStateAggField(column);
388
6.33k
        case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_GENERIC:
389
6.33k
            return new AggStateField(column);
390
0
        case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_UNKNOWN:
391
0
            CHECK(false) << ", value column no agg type";
392
0
            return nullptr;
393
40.6M
        }
394
0
        return nullptr;
395
40.6M
    }
396
397
    static StorageField* create_by_type(const FieldType& type) {
398
        TabletColumn column(FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE, type);
399
        return create(column);
400
    }
401
};
402
} // namespace doris