Coverage Report

Created: 2026-04-09 18:34

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exec/common/variant_util.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "exec/common/variant_util.h"
19
20
#include <assert.h>
21
#include <fmt/format.h>
22
#include <gen_cpp/FrontendService.h>
23
#include <gen_cpp/FrontendService_types.h>
24
#include <gen_cpp/HeartbeatService_types.h>
25
#include <gen_cpp/MasterService_types.h>
26
#include <gen_cpp/Status_types.h>
27
#include <gen_cpp/Types_types.h>
28
#include <glog/logging.h>
29
#include <rapidjson/document.h>
30
#include <rapidjson/stringbuffer.h>
31
#include <rapidjson/writer.h>
32
#include <simdjson/simdjson.h> // IWYU pragma: keep
33
#include <unicode/uchar.h>
34
35
#include <algorithm>
36
#include <cassert>
37
#include <cstddef>
38
#include <cstdint>
39
#include <cstring>
40
#include <list>
41
#include <memory>
42
#include <mutex>
43
#include <optional>
44
#include <ostream>
45
#include <ranges>
46
#include <set>
47
#include <stack>
48
#include <string>
49
#include <string_view>
50
#include <unordered_map>
51
#include <utility>
52
#include <vector>
53
54
#include "common/config.h"
55
#include "common/status.h"
56
#include "core/assert_cast.h"
57
#include "core/block/block.h"
58
#include "core/block/column_numbers.h"
59
#include "core/block/column_with_type_and_name.h"
60
#include "core/column/column.h"
61
#include "core/column/column_array.h"
62
#include "core/column/column_map.h"
63
#include "core/column/column_nullable.h"
64
#include "core/column/column_string.h"
65
#include "core/column/column_variant.h"
66
#include "core/data_type/data_type.h"
67
#include "core/data_type/data_type_array.h"
68
#include "core/data_type/data_type_factory.hpp"
69
#include "core/data_type/data_type_jsonb.h"
70
#include "core/data_type/data_type_nullable.h"
71
#include "core/data_type/data_type_string.h"
72
#include "core/data_type/data_type_variant.h"
73
#include "core/data_type/define_primitive_type.h"
74
#include "core/data_type/get_least_supertype.h"
75
#include "core/data_type/primitive_type.h"
76
#include "core/field.h"
77
#include "core/typeid_cast.h"
78
#include "core/types.h"
79
#include "exec/common/field_visitors.h"
80
#include "exec/common/sip_hash.h"
81
#include "exprs/function/function.h"
82
#include "exprs/function/simple_function_factory.h"
83
#include "exprs/function_context.h"
84
#include "exprs/json_functions.h"
85
#include "re2/re2.h"
86
#include "runtime/exec_env.h"
87
#include "runtime/runtime_state.h"
88
#include "storage/olap_common.h"
89
#include "storage/rowset/beta_rowset.h"
90
#include "storage/rowset/rowset.h"
91
#include "storage/rowset/rowset_fwd.h"
92
#include "storage/segment/segment_loader.h"
93
#include "storage/segment/variant/variant_column_reader.h"
94
#include "storage/segment/variant/variant_column_writer_impl.h"
95
#include "storage/tablet/tablet.h"
96
#include "storage/tablet/tablet_fwd.h"
97
#include "storage/tablet/tablet_schema.h"
98
#include "util/client_cache.h"
99
#include "util/defer_op.h"
100
#include "util/json/json_parser.h"
101
#include "util/json/path_in_data.h"
102
#include "util/json/simd_json_parser.h"
103
104
namespace doris::variant_util {
105
#include "common/compile_check_begin.h"
106
107
2.70k
inline void append_escaped_regex_char(std::string* regex_output, char ch) {
108
2.70k
    switch (ch) {
109
21
    case '.':
110
23
    case '^':
111
25
    case '$':
112
27
    case '+':
113
33
    case '*':
114
35
    case '?':
115
37
    case '(':
116
39
    case ')':
117
41
    case '|':
118
43
    case '{':
119
45
    case '}':
120
47
    case '[':
121
47
    case ']':
122
51
    case '\\':
123
51
        regex_output->push_back('\\');
124
51
        regex_output->push_back(ch);
125
51
        break;
126
2.65k
    default:
127
2.65k
        regex_output->push_back(ch);
128
2.65k
        break;
129
2.70k
    }
130
2.70k
}
131
132
// Small LRU to cap compiled glob patterns
133
constexpr size_t kGlobRegexCacheCapacity = 256;
134
135
struct GlobRegexCacheEntry {
136
    std::shared_ptr<RE2> re2;
137
    std::list<std::string>::iterator lru_it;
138
};
139
140
static std::mutex g_glob_regex_cache_mutex;
141
static std::list<std::string> g_glob_regex_cache_lru;
142
static std::unordered_map<std::string, GlobRegexCacheEntry> g_glob_regex_cache;
143
144
155k
std::shared_ptr<RE2> get_or_build_re2(const std::string& glob_pattern) {
145
155k
    {
146
155k
        std::lock_guard<std::mutex> lock(g_glob_regex_cache_mutex);
147
155k
        auto it = g_glob_regex_cache.find(glob_pattern);
148
155k
        if (it != g_glob_regex_cache.end()) {
149
155k
            g_glob_regex_cache_lru.splice(g_glob_regex_cache_lru.begin(), g_glob_regex_cache_lru,
150
155k
                                          it->second.lru_it);
151
155k
            return it->second.re2;
152
155k
        }
153
155k
    }
154
201
    std::string regex_pattern;
155
201
    Status st = glob_to_regex(glob_pattern, &regex_pattern);
156
201
    if (!st.ok()) {
157
2
        return nullptr;
158
2
    }
159
199
    auto compiled = std::make_shared<RE2>(regex_pattern);
160
199
    if (!compiled->ok()) {
161
3
        return nullptr;
162
3
    }
163
196
    {
164
196
        std::lock_guard<std::mutex> lock(g_glob_regex_cache_mutex);
165
196
        auto it = g_glob_regex_cache.find(glob_pattern);
166
196
        if (it != g_glob_regex_cache.end()) {
167
0
            g_glob_regex_cache_lru.splice(g_glob_regex_cache_lru.begin(), g_glob_regex_cache_lru,
168
0
                                          it->second.lru_it);
169
0
            return it->second.re2;
170
0
        }
171
196
        g_glob_regex_cache_lru.push_front(glob_pattern);
172
196
        g_glob_regex_cache.emplace(glob_pattern,
173
196
                                   GlobRegexCacheEntry {compiled, g_glob_regex_cache_lru.begin()});
174
196
        if (g_glob_regex_cache.size() > kGlobRegexCacheCapacity) {
175
0
            const std::string& evict_key = g_glob_regex_cache_lru.back();
176
0
            g_glob_regex_cache.erase(evict_key);
177
0
            g_glob_regex_cache_lru.pop_back();
178
0
        }
179
196
    }
180
0
    return compiled;
181
196
}
182
183
// Convert a restricted glob pattern into a regex.
184
// Supported: '*', '?', '[...]', '\\' escape. Others are treated as literals.
185
278
Status glob_to_regex(const std::string& glob_pattern, std::string* regex_pattern) {
186
278
    regex_pattern->clear();
187
278
    regex_pattern->append("^");
188
278
    bool is_escaped = false;
189
278
    size_t pattern_length = glob_pattern.size();
190
3.09k
    for (size_t index = 0; index < pattern_length; ++index) {
191
2.82k
        char current_char = glob_pattern[index];
192
2.82k
        if (is_escaped) {
193
10
            append_escaped_regex_char(regex_pattern, current_char);
194
10
            is_escaped = false;
195
10
            continue;
196
10
        }
197
2.81k
        if (current_char == '\\') {
198
14
            is_escaped = true;
199
14
            continue;
200
14
        }
201
2.80k
        if (current_char == '*') {
202
63
            regex_pattern->append(".*");
203
63
            continue;
204
63
        }
205
2.73k
        if (current_char == '?') {
206
15
            regex_pattern->append(".");
207
15
            continue;
208
15
        }
209
2.72k
        if (current_char == '[') {
210
33
            size_t class_index = index + 1;
211
33
            bool class_closed = false;
212
33
            bool is_class_escaped = false;
213
33
            std::string class_buffer;
214
33
            if (class_index < pattern_length &&
215
33
                (glob_pattern[class_index] == '!' || glob_pattern[class_index] == '^')) {
216
9
                class_buffer.push_back('^');
217
9
                ++class_index;
218
9
            }
219
99
            for (; class_index < pattern_length; ++class_index) {
220
95
                char class_char = glob_pattern[class_index];
221
95
                if (is_class_escaped) {
222
10
                    class_buffer.push_back(class_char);
223
10
                    is_class_escaped = false;
224
10
                    continue;
225
10
                }
226
85
                if (class_char == '\\') {
227
10
                    is_class_escaped = true;
228
10
                    continue;
229
10
                }
230
75
                if (class_char == ']') {
231
29
                    class_closed = true;
232
29
                    break;
233
29
                }
234
46
                class_buffer.push_back(class_char);
235
46
            }
236
33
            if (!class_closed) {
237
4
                return Status::InvalidArgument("Unclosed character class in glob pattern: {}",
238
4
                                               glob_pattern);
239
4
            }
240
29
            regex_pattern->append("[");
241
29
            regex_pattern->append(class_buffer);
242
29
            regex_pattern->append("]");
243
29
            index = class_index;
244
29
            continue;
245
33
        }
246
2.68k
        append_escaped_regex_char(regex_pattern, current_char);
247
2.68k
    }
248
274
    if (is_escaped) {
249
4
        append_escaped_regex_char(regex_pattern, '\\');
250
4
    }
251
274
    regex_pattern->append("$");
252
274
    return Status::OK();
253
278
}
254
255
155k
bool glob_match_re2(const std::string& glob_pattern, const std::string& candidate_path) {
256
155k
    auto compiled = get_or_build_re2(glob_pattern);
257
155k
    if (compiled == nullptr) {
258
5
        return false;
259
5
    }
260
155k
    return RE2::FullMatch(candidate_path, *compiled);
261
155k
}
262
263
1.11k
size_t get_number_of_dimensions(const IDataType& type) {
264
1.11k
    if (const auto* type_array = typeid_cast<const DataTypeArray*>(&type)) {
265
4
        return type_array->get_number_of_dimensions();
266
4
    }
267
1.11k
    return 0;
268
1.11k
}
269
3
size_t get_number_of_dimensions(const IColumn& column) {
270
3
    if (const auto* column_array = check_and_get_column<ColumnArray>(column)) {
271
2
        return column_array->get_number_of_dimensions();
272
2
    }
273
1
    return 0;
274
3
}
275
276
86.2k
DataTypePtr get_base_type_of_array(const DataTypePtr& type) {
277
    /// Get raw pointers to avoid extra copying of type pointers.
278
86.2k
    const DataTypeArray* last_array = nullptr;
279
86.2k
    const auto* current_type = type.get();
280
86.2k
    if (const auto* nullable = typeid_cast<const DataTypeNullable*>(current_type)) {
281
86.2k
        current_type = nullable->get_nested_type().get();
282
86.2k
    }
283
88.4k
    while (const auto* type_array = typeid_cast<const DataTypeArray*>(current_type)) {
284
2.16k
        current_type = type_array->get_nested_type().get();
285
2.16k
        last_array = type_array;
286
2.16k
        if (const auto* nullable = typeid_cast<const DataTypeNullable*>(current_type)) {
287
2.16k
            current_type = nullable->get_nested_type().get();
288
2.16k
        }
289
2.16k
    }
290
86.2k
    return last_array ? last_array->get_nested_type() : type;
291
86.2k
}
292
293
69.9k
Status cast_column(const ColumnWithTypeAndName& arg, const DataTypePtr& type, ColumnPtr* result) {
294
69.9k
    ColumnsWithTypeAndName arguments {arg, {nullptr, type, type->get_name()}};
295
296
    // To prevent from null info lost, we should not call function since the function framework will wrap
297
    // nullable to Variant instead of the root of Variant
298
    // correct output: Nullable(Array(int)) -> Nullable(Variant(Nullable(Array(int))))
299
    // incorrect output: Nullable(Array(int)) -> Nullable(Variant(Array(int)))
300
69.9k
    if (type->get_primitive_type() == TYPE_VARIANT) {
301
        // If source column is variant, so the nullable info is different from dst column
302
11.5k
        if (arg.type->get_primitive_type() == TYPE_VARIANT) {
303
300
            *result = type->is_nullable() ? make_nullable(arg.column) : remove_nullable(arg.column);
304
300
            return Status::OK();
305
300
        }
306
        // set variant root column/type to from column/type
307
11.5k
        CHECK(arg.column->is_nullable());
308
11.2k
        auto to_type = remove_nullable(type);
309
11.2k
        const auto& data_type_object = assert_cast<const DataTypeVariant&>(*to_type);
310
11.2k
        auto variant = ColumnVariant::create(data_type_object.variant_max_subcolumns_count(),
311
11.2k
                                             data_type_object.enable_doc_mode());
312
313
11.2k
        variant->create_root(arg.type, arg.column->assume_mutable());
314
11.2k
        ColumnPtr nullable = ColumnNullable::create(
315
11.2k
                variant->get_ptr(),
316
11.2k
                check_and_get_column<ColumnNullable>(arg.column.get())->get_null_map_column_ptr());
317
11.2k
        *result = type->is_nullable() ? nullable : variant->get_ptr();
318
11.2k
        return Status::OK();
319
11.5k
    }
320
321
58.4k
    auto function = SimpleFunctionFactory::instance().get_function("CAST", arguments, type);
322
58.4k
    if (!function) {
323
0
        return Status::InternalError("Not found cast function {} to {}", arg.type->get_name(),
324
0
                                     type->get_name());
325
0
    }
326
58.4k
    Block tmp_block {arguments};
327
58.4k
    uint32_t result_column = cast_set<uint32_t>(tmp_block.columns());
328
58.4k
    RuntimeState state;
329
58.4k
    auto ctx = FunctionContext::create_context(&state, {}, {});
330
331
58.4k
    if (arg.type->get_primitive_type() == INVALID_TYPE) {
332
        // cast from nothing to any type should result in nulls
333
5.89k
        *result = type->create_column_const_with_default_value(arg.column->size())
334
5.89k
                          ->convert_to_full_column_if_const();
335
5.89k
        return Status::OK();
336
5.89k
    }
337
338
    // We convert column string to jsonb type just add a string jsonb field to dst column instead of parse
339
    // each line in original string column.
340
52.5k
    ctx->set_string_as_jsonb_string(true);
341
52.5k
    ctx->set_jsonb_string_as_string(true);
342
52.5k
    tmp_block.insert({nullptr, type, arg.name});
343
    // TODO(lihangyu): we should handle this error in strict mode
344
52.5k
    if (!function->execute(ctx.get(), tmp_block, {0}, result_column, arg.column->size())) {
345
1
        LOG_EVERY_N(WARNING, 100) << fmt::format("cast from {} to {}", arg.type->get_name(),
346
1
                                                 type->get_name());
347
1
        *result = type->create_column_const_with_default_value(arg.column->size())
348
1
                          ->convert_to_full_column_if_const();
349
1
        return Status::OK();
350
1
    }
351
52.5k
    *result = tmp_block.get_by_position(result_column).column->convert_to_full_column_if_const();
352
52.5k
    VLOG_DEBUG << fmt::format("{} before convert {}, after convert {}", arg.name,
353
6
                              arg.column->get_name(), (*result)->get_name());
354
52.5k
    return Status::OK();
355
52.5k
}
356
357
void get_column_by_type(const DataTypePtr& data_type, const std::string& name, TabletColumn& column,
358
179k
                        const ExtraInfo& ext_info) {
359
179k
    column.set_name(name);
360
179k
    column.set_type(data_type->get_storage_field_type());
361
179k
    if (ext_info.unique_id >= 0) {
362
4
        column.set_unique_id(ext_info.unique_id);
363
4
    }
364
179k
    if (ext_info.parent_unique_id >= 0) {
365
86.6k
        column.set_parent_unique_id(ext_info.parent_unique_id);
366
86.6k
    }
367
179k
    if (!ext_info.path_info.empty()) {
368
86.6k
        column.set_path_info(ext_info.path_info);
369
86.6k
    }
370
179k
    if (data_type->is_nullable()) {
371
89.8k
        const auto& real_type = static_cast<const DataTypeNullable&>(*data_type);
372
89.8k
        column.set_is_nullable(true);
373
89.8k
        get_column_by_type(real_type.get_nested_type(), name, column, {});
374
89.8k
        return;
375
89.8k
    }
376
89.8k
    if (data_type->get_primitive_type() == PrimitiveType::TYPE_ARRAY) {
377
3.17k
        TabletColumn child;
378
3.17k
        get_column_by_type(assert_cast<const DataTypeArray*>(data_type.get())->get_nested_type(),
379
3.17k
                           "", child, {});
380
3.17k
        column.set_length(TabletColumn::get_field_length_by_type(TPrimitiveType::ARRAY, 0));
381
3.17k
        column.add_sub_column(child);
382
3.17k
        return;
383
3.17k
    }
384
86.6k
    if (data_type->get_primitive_type() == PrimitiveType::TYPE_VARIANT) {
385
0
        const auto* dt_variant = assert_cast<const DataTypeVariant*>(data_type.get());
386
0
        column.set_variant_max_subcolumns_count(dt_variant->variant_max_subcolumns_count());
387
0
        column.set_variant_enable_doc_mode(dt_variant->enable_doc_mode());
388
0
        return;
389
0
    }
390
    // size is not fixed when type is string or json
391
86.6k
    if (is_string_type(data_type->get_primitive_type()) ||
392
86.6k
        data_type->get_primitive_type() == TYPE_JSONB) {
393
28.4k
        column.set_length(INT_MAX);
394
28.4k
        return;
395
28.4k
    }
396
397
58.1k
    PrimitiveType type = data_type->get_primitive_type();
398
58.1k
    if (is_int_or_bool(type) || is_string_type(type) || is_float_or_double(type) || is_ip(type) ||
399
58.1k
        is_date_or_datetime(type) || type == PrimitiveType::TYPE_DATEV2) {
400
58.0k
        column.set_length(cast_set<int32_t>(data_type->get_size_of_value_in_memory()));
401
58.0k
        return;
402
58.0k
    }
403
133
    if (is_decimal(type)) {
404
105
        column.set_precision(data_type->get_precision());
405
105
        column.set_frac(data_type->get_scale());
406
105
        return;
407
105
    }
408
    // datetimev2 needs scale
409
28
    if (type == PrimitiveType::TYPE_DATETIMEV2 || type == PrimitiveType::TYPE_TIMESTAMPTZ) {
410
17
        column.set_precision(-1);
411
17
        column.set_frac(data_type->get_scale());
412
17
        return;
413
17
    }
414
415
11
    throw doris::Exception(doris::ErrorCode::INTERNAL_ERROR,
416
11
                           "unexcepted data column type: {}, column name is: {}",
417
11
                           data_type->get_name(), name);
418
28
}
419
420
TabletColumn get_column_by_type(const DataTypePtr& data_type, const std::string& name,
421
85.6k
                                const ExtraInfo& ext_info) {
422
85.6k
    TabletColumn result;
423
85.6k
    get_column_by_type(data_type, name, result, ext_info);
424
85.6k
    return result;
425
85.6k
}
426
427
// check if two paths which same prefix have different structure
428
static bool has_different_structure_in_same_path(const PathInData::Parts& lhs,
429
51.4k
                                                 const PathInData::Parts& rhs) {
430
51.4k
    if (lhs.size() != rhs.size()) {
431
1
        return false; // different size means different structure
432
1
    }
433
    // Since we group by path string, lhs and rhs must have the same size and keys
434
    // We only need to check if they have different nested structure
435
152k
    for (size_t i = 0; i < lhs.size(); ++i) {
436
101k
        if (lhs[i] != rhs[i]) {
437
5
            VLOG_DEBUG << fmt::format(
438
0
                    "Check different structure: {} vs {}, lhs[i].is_nested: {}, rhs[i].is_nested: "
439
0
                    "{}",
440
0
                    lhs[i].key, rhs[i].key, lhs[i].is_nested, rhs[i].is_nested);
441
5
            return true;
442
5
        }
443
101k
    }
444
51.4k
    return false;
445
51.4k
}
446
447
10.1k
Status check_variant_has_no_ambiguous_paths(const PathsInData& tuple_paths) {
448
    // Group paths by their string representation to reduce comparisons
449
10.1k
    std::unordered_map<std::string, std::vector<size_t>> path_groups;
450
451
122k
    for (size_t i = 0; i < tuple_paths.size(); ++i) {
452
        // same path should have same structure, so we group them by path
453
112k
        path_groups[tuple_paths[i].get_path()].push_back(i);
454
        // print part of tuple_paths[i]
455
18.4E
        VLOG_DEBUG << "tuple_paths[i]: " << tuple_paths[i].get_path();
456
112k
    }
457
458
    // Only compare paths within the same group
459
61.3k
    for (const auto& [path_str, indices] : path_groups) {
460
61.3k
        if (indices.size() <= 1) {
461
10.0k
            continue; // No conflicts possible
462
10.0k
        }
463
464
        // Compare all pairs within this group
465
154k
        for (size_t i = 0; i < indices.size(); ++i) {
466
154k
            for (size_t j = 0; j < i; ++j) {
467
51.4k
                if (has_different_structure_in_same_path(tuple_paths[indices[i]].get_parts(),
468
51.4k
                                                         tuple_paths[indices[j]].get_parts())) {
469
5
                    return Status::DataQualityError(
470
5
                            "Ambiguous paths: {} vs {} with different nested part {} vs {}",
471
5
                            tuple_paths[indices[i]].get_path(), tuple_paths[indices[j]].get_path(),
472
5
                            tuple_paths[indices[i]].has_nested_part(),
473
5
                            tuple_paths[indices[j]].has_nested_part());
474
5
                }
475
51.4k
            }
476
102k
        }
477
51.3k
    }
478
10.1k
    return Status::OK();
479
10.1k
}
480
481
Status update_least_schema_internal(const std::map<PathInData, DataTypes>& subcolumns_types,
482
                                    TabletSchemaSPtr& common_schema, int32_t variant_col_unique_id,
483
                                    const std::map<std::string, TabletColumnPtr>& typed_columns,
484
1.72k
                                    std::set<PathInData>* path_set) {
485
1.72k
    PathsInData tuple_paths;
486
1.72k
    DataTypes tuple_types;
487
1.72k
    CHECK(common_schema.use_count() == 1);
488
    // Get the least common type for all paths.
489
1.72k
    for (const auto& [key, subtypes] : subcolumns_types) {
490
1.03k
        assert(!subtypes.empty());
491
1.03k
        if (key.get_path() == ColumnVariant::COLUMN_NAME_DUMMY) {
492
0
            continue;
493
0
        }
494
1.03k
        size_t first_dim = get_number_of_dimensions(*subtypes[0]);
495
1.03k
        tuple_paths.emplace_back(key);
496
1.11k
        for (size_t i = 1; i < subtypes.size(); ++i) {
497
82
            if (first_dim != get_number_of_dimensions(*subtypes[i])) {
498
1
                tuple_types.emplace_back(make_nullable(std::make_shared<DataTypeJsonb>()));
499
1
                LOG(INFO) << fmt::format(
500
1
                        "Uncompatible types of subcolumn '{}': {} and {}, cast to JSONB",
501
1
                        key.get_path(), subtypes[0]->get_name(), subtypes[i]->get_name());
502
1
                break;
503
1
            }
504
82
        }
505
1.03k
        if (tuple_paths.size() == tuple_types.size()) {
506
1
            continue;
507
1
        }
508
1.03k
        DataTypePtr common_type;
509
1.03k
        get_least_supertype_jsonb(subtypes, &common_type);
510
1.03k
        if (!common_type->is_nullable()) {
511
3
            common_type = make_nullable(common_type);
512
3
        }
513
1.03k
        tuple_types.emplace_back(common_type);
514
1.03k
    }
515
1.72k
    CHECK_EQ(tuple_paths.size(), tuple_types.size());
516
517
    // Append all common type columns of this variant
518
2.75k
    for (int i = 0; i < tuple_paths.size(); ++i) {
519
1.03k
        TabletColumn common_column;
520
        // typed path not contains root part
521
1.03k
        auto path_without_root = tuple_paths[i].copy_pop_front().get_path();
522
1.03k
        if (typed_columns.contains(path_without_root) && !tuple_paths[i].has_nested_part()) {
523
0
            common_column = *typed_columns.at(path_without_root);
524
            // parent unique id and path may not be init in write path
525
0
            common_column.set_parent_unique_id(variant_col_unique_id);
526
0
            common_column.set_path_info(tuple_paths[i]);
527
0
            common_column.set_name(tuple_paths[i].get_path());
528
1.03k
        } else {
529
            // const std::string& column_name = variant_col_name + "." + tuple_paths[i].get_path();
530
1.03k
            get_column_by_type(tuple_types[i], tuple_paths[i].get_path(), common_column,
531
1.03k
                               ExtraInfo {.unique_id = -1,
532
1.03k
                                          .parent_unique_id = variant_col_unique_id,
533
1.03k
                                          .path_info = tuple_paths[i]});
534
1.03k
        }
535
1.03k
        common_schema->append_column(common_column);
536
1.03k
        if (path_set != nullptr) {
537
1.03k
            path_set->insert(tuple_paths[i]);
538
1.03k
        }
539
1.03k
    }
540
1.72k
    return Status::OK();
541
1.72k
}
542
543
Status update_least_common_schema(const std::vector<TabletSchemaSPtr>& schemas,
544
                                  TabletSchemaSPtr& common_schema, int32_t variant_col_unique_id,
545
1.72k
                                  std::set<PathInData>* path_set) {
546
1.72k
    std::map<std::string, TabletColumnPtr> typed_columns;
547
1.72k
    for (const TabletColumnPtr& col :
548
7.82k
         common_schema->column_by_uid(variant_col_unique_id).get_sub_columns()) {
549
7.82k
        typed_columns[col->name()] = col;
550
7.82k
    }
551
    // Types of subcolumns by path from all tuples.
552
1.72k
    std::map<PathInData, DataTypes> subcolumns_types;
553
554
    // Collect all paths first to enable batch checking
555
1.72k
    std::vector<PathInData> all_paths;
556
557
1.85k
    for (const TabletSchemaSPtr& schema : schemas) {
558
5.65k
        for (const TabletColumnPtr& col : schema->columns()) {
559
            // Get subcolumns of this variant
560
5.65k
            if (col->has_path_info() && col->parent_unique_id() > 0 &&
561
5.65k
                col->parent_unique_id() == variant_col_unique_id) {
562
1.11k
                subcolumns_types[*col->path_info_ptr()].emplace_back(
563
1.11k
                        DataTypeFactory::instance().create_data_type(*col, col->is_nullable()));
564
1.11k
                all_paths.push_back(*col->path_info_ptr());
565
1.11k
            }
566
5.65k
        }
567
1.85k
    }
568
569
    // Batch check for conflicts
570
1.72k
    RETURN_IF_ERROR(check_variant_has_no_ambiguous_paths(all_paths));
571
572
1.72k
    return update_least_schema_internal(subcolumns_types, common_schema, variant_col_unique_id,
573
1.72k
                                        typed_columns, path_set);
574
1.72k
}
575
576
// Keep variant subcolumn BF support aligned with FE DDL checks.
577
96.0k
bool is_bf_supported_by_fe_for_variant_subcolumn(FieldType type) {
578
96.0k
    switch (type) {
579
91
    case FieldType::OLAP_FIELD_TYPE_SMALLINT:
580
421
    case FieldType::OLAP_FIELD_TYPE_INT:
581
52.7k
    case FieldType::OLAP_FIELD_TYPE_BIGINT:
582
52.9k
    case FieldType::OLAP_FIELD_TYPE_LARGEINT:
583
52.9k
    case FieldType::OLAP_FIELD_TYPE_CHAR:
584
52.9k
    case FieldType::OLAP_FIELD_TYPE_VARCHAR:
585
80.3k
    case FieldType::OLAP_FIELD_TYPE_STRING:
586
80.3k
    case FieldType::OLAP_FIELD_TYPE_DATE:
587
80.3k
    case FieldType::OLAP_FIELD_TYPE_DATETIME:
588
80.5k
    case FieldType::OLAP_FIELD_TYPE_DATEV2:
589
80.8k
    case FieldType::OLAP_FIELD_TYPE_DATETIMEV2:
590
80.8k
    case FieldType::OLAP_FIELD_TYPE_TIMESTAMPTZ:
591
80.8k
    case FieldType::OLAP_FIELD_TYPE_DECIMAL:
592
80.9k
    case FieldType::OLAP_FIELD_TYPE_DECIMAL32:
593
81.0k
    case FieldType::OLAP_FIELD_TYPE_DECIMAL64:
594
81.2k
    case FieldType::OLAP_FIELD_TYPE_DECIMAL128I:
595
81.3k
    case FieldType::OLAP_FIELD_TYPE_DECIMAL256:
596
81.5k
    case FieldType::OLAP_FIELD_TYPE_IPV4:
597
81.7k
    case FieldType::OLAP_FIELD_TYPE_IPV6:
598
81.7k
        return true;
599
14.3k
    default:
600
14.3k
        return false;
601
96.0k
    }
602
96.0k
}
603
604
void inherit_column_attributes(const TabletColumn& source, TabletColumn& target,
605
96.0k
                               TabletSchemaSPtr* target_schema) {
606
96.0k
    if (!target.is_extracted_column()) {
607
0
        return;
608
0
    }
609
96.0k
    target.set_aggregation_method(source.aggregation());
610
611
    // 1. bloom filter
612
96.0k
    if (is_bf_supported_by_fe_for_variant_subcolumn(target.type())) {
613
81.7k
        target.set_is_bf_column(source.is_bf_column());
614
81.7k
    }
615
616
96.0k
    if (!target_schema) {
617
88.9k
        return;
618
88.9k
    }
619
620
    // 2. inverted index
621
7.10k
    TabletIndexes indexes_to_add;
622
7.10k
    auto source_indexes = (*target_schema)->inverted_indexs(source.unique_id());
623
    // if target is variant type, we need to inherit all indexes
624
    // because this schema is a read schema from fe
625
7.10k
    if (target.is_variant_type()) {
626
6.07k
        for (auto& index : source_indexes) {
627
300
            auto index_info = std::make_shared<TabletIndex>(*index);
628
300
            index_info->set_escaped_escaped_index_suffix_path(target.path_info_ptr()->get_path());
629
300
            indexes_to_add.emplace_back(std::move(index_info));
630
300
        }
631
6.07k
    } else {
632
1.02k
        inherit_index(source_indexes, indexes_to_add, target);
633
1.02k
    }
634
7.10k
    auto target_indexes = (*target_schema)
635
7.10k
                                  ->inverted_indexs(target.parent_unique_id(),
636
7.10k
                                                    target.path_info_ptr()->get_path());
637
7.10k
    if (target_indexes.empty()) {
638
7.10k
        for (auto& index_info : indexes_to_add) {
639
306
            (*target_schema)->append_index(std::move(*index_info));
640
306
        }
641
7.10k
    }
642
643
    // 3. TODO: gnragm bf index
644
7.10k
}
645
646
8.40k
void inherit_column_attributes(TabletSchemaSPtr& schema) {
647
    // Add index meta if extracted column is missing index meta
648
95.3k
    for (size_t i = 0; i < schema->num_columns(); ++i) {
649
86.9k
        TabletColumn& col = schema->mutable_column(i);
650
86.9k
        if (!col.is_extracted_column()) {
651
79.8k
            continue;
652
79.8k
        }
653
7.10k
        if (schema->field_index(col.parent_unique_id()) == -1) {
654
            // parent column is missing, maybe dropped
655
0
            continue;
656
0
        }
657
7.10k
        inherit_column_attributes(schema->column_by_uid(col.parent_unique_id()), col, &schema);
658
7.10k
    }
659
8.40k
}
660
661
Status get_least_common_schema(const std::vector<TabletSchemaSPtr>& schemas,
662
                               const TabletSchemaSPtr& base_schema, TabletSchemaSPtr& output_schema,
663
1.68k
                               bool check_schema_size) {
664
1.68k
    std::vector<int32_t> variant_column_unique_id;
665
    // Construct a schema excluding the extracted columns and gather unique identifiers for variants.
666
    // Ensure that the output schema also excludes these extracted columns. This approach prevents
667
    // duplicated paths following the update_least_common_schema process.
668
1.68k
    auto build_schema_without_extracted_columns = [&](const TabletSchemaSPtr& base_schema) {
669
1.68k
        output_schema = std::make_shared<TabletSchema>();
670
        // not copy columns but only shadow copy other attributes
671
1.68k
        output_schema->shawdow_copy_without_columns(*base_schema);
672
        // Get all columns without extracted columns and collect variant col unique id
673
4.20k
        for (const TabletColumnPtr& col : base_schema->columns()) {
674
4.20k
            if (col->is_variant_type()) {
675
1.72k
                variant_column_unique_id.push_back(col->unique_id());
676
1.72k
            }
677
4.20k
            if (!col->is_extracted_column()) {
678
3.67k
                output_schema->append_column(*col);
679
3.67k
            }
680
4.20k
        }
681
1.68k
    };
682
1.68k
    if (base_schema == nullptr) {
683
        // Pick tablet schema with max schema version
684
266
        auto max_version_schema =
685
266
                *std::max_element(schemas.cbegin(), schemas.cend(),
686
1.36k
                                  [](const TabletSchemaSPtr a, const TabletSchemaSPtr b) {
687
1.36k
                                      return a->schema_version() < b->schema_version();
688
1.36k
                                  });
689
266
        CHECK(max_version_schema);
690
266
        build_schema_without_extracted_columns(max_version_schema);
691
1.41k
    } else {
692
        // use input base_schema schema as base schema
693
1.41k
        build_schema_without_extracted_columns(base_schema);
694
1.41k
    }
695
696
1.71k
    for (int32_t unique_id : variant_column_unique_id) {
697
1.71k
        std::set<PathInData> path_set;
698
1.71k
        RETURN_IF_ERROR(update_least_common_schema(schemas, output_schema, unique_id, &path_set));
699
1.71k
    }
700
701
1.68k
    inherit_column_attributes(output_schema);
702
1.68k
    if (check_schema_size &&
703
1.68k
        output_schema->columns().size() > config::variant_max_merged_tablet_schema_size) {
704
0
        return Status::DataQualityError("Reached max column size limit {}",
705
0
                                        config::variant_max_merged_tablet_schema_size);
706
0
    }
707
708
1.68k
    return Status::OK();
709
1.68k
}
710
711
// sort by paths in lexicographical order
712
9.04k
ColumnVariant::Subcolumns get_sorted_subcolumns(const ColumnVariant::Subcolumns& subcolumns) {
713
    // sort by paths in lexicographical order
714
9.04k
    ColumnVariant::Subcolumns sorted = subcolumns;
715
467k
    std::sort(sorted.begin(), sorted.end(), [](const auto& lhsItem, const auto& rhsItem) {
716
467k
        return lhsItem->path < rhsItem->path;
717
467k
    });
718
9.04k
    return sorted;
719
9.04k
}
720
721
bool has_schema_index_diff(const TabletSchema* new_schema, const TabletSchema* old_schema,
722
18.6k
                           int32_t new_col_idx, int32_t old_col_idx) {
723
18.6k
    const auto& column_new = new_schema->column(new_col_idx);
724
18.6k
    const auto& column_old = old_schema->column(old_col_idx);
725
726
18.6k
    if (column_new.is_bf_column() != column_old.is_bf_column()) {
727
94
        return true;
728
94
    }
729
730
18.5k
    auto new_schema_inverted_indexs = new_schema->inverted_indexs(column_new);
731
18.5k
    auto old_schema_inverted_indexs = old_schema->inverted_indexs(column_old);
732
733
18.5k
    if (new_schema_inverted_indexs.size() != old_schema_inverted_indexs.size()) {
734
706
        return true;
735
706
    }
736
737
18.1k
    for (size_t i = 0; i < new_schema_inverted_indexs.size(); ++i) {
738
393
        if (!new_schema_inverted_indexs[i]->is_same_except_id(old_schema_inverted_indexs[i])) {
739
20
            return true;
740
20
        }
741
393
    }
742
743
17.7k
    return false;
744
17.8k
}
745
746
1.97k
TabletColumn create_sparse_column(const TabletColumn& variant) {
747
1.97k
    TabletColumn res;
748
1.97k
    res.set_name(variant.name_lower_case() + "." + SPARSE_COLUMN_PATH);
749
1.97k
    res.set_type(FieldType::OLAP_FIELD_TYPE_MAP);
750
1.97k
    res.set_aggregation_method(variant.aggregation());
751
1.97k
    res.set_path_info(PathInData {variant.name_lower_case() + "." + SPARSE_COLUMN_PATH});
752
1.97k
    res.set_parent_unique_id(variant.unique_id());
753
    // set default value to "NULL" DefaultColumnIterator will call insert_many_defaults
754
1.97k
    res.set_default_value("NULL");
755
1.97k
    TabletColumn child_tcolumn;
756
1.97k
    child_tcolumn.set_type(FieldType::OLAP_FIELD_TYPE_STRING);
757
1.97k
    res.add_sub_column(child_tcolumn);
758
1.97k
    res.add_sub_column(child_tcolumn);
759
1.97k
    return res;
760
1.97k
}
761
762
16.2k
TabletColumn create_sparse_shard_column(const TabletColumn& variant, int bucket_index) {
763
16.2k
    TabletColumn res;
764
16.2k
    std::string name = variant.name_lower_case() + "." + SPARSE_COLUMN_PATH + ".b" +
765
16.2k
                       std::to_string(bucket_index);
766
16.2k
    res.set_name(name);
767
16.2k
    res.set_type(FieldType::OLAP_FIELD_TYPE_MAP);
768
16.2k
    res.set_aggregation_method(variant.aggregation());
769
16.2k
    res.set_parent_unique_id(variant.unique_id());
770
16.2k
    res.set_default_value("NULL");
771
16.2k
    PathInData path(name);
772
16.2k
    res.set_path_info(path);
773
16.2k
    TabletColumn child_tcolumn;
774
16.2k
    child_tcolumn.set_type(FieldType::OLAP_FIELD_TYPE_STRING);
775
16.2k
    res.add_sub_column(child_tcolumn);
776
16.2k
    res.add_sub_column(child_tcolumn);
777
16.2k
    return res;
778
16.2k
}
779
780
14.0k
TabletColumn create_doc_value_column(const TabletColumn& variant, int bucket_index) {
781
14.0k
    TabletColumn res;
782
14.0k
    std::string name = variant.name_lower_case() + "." + DOC_VALUE_COLUMN_PATH + ".b" +
783
14.0k
                       std::to_string(bucket_index);
784
14.0k
    res.set_name(name);
785
14.0k
    res.set_type(FieldType::OLAP_FIELD_TYPE_MAP);
786
14.0k
    res.set_aggregation_method(variant.aggregation());
787
14.0k
    res.set_parent_unique_id(variant.unique_id());
788
14.0k
    res.set_default_value("NULL");
789
14.0k
    res.set_path_info(PathInData {name});
790
791
14.0k
    TabletColumn child_tcolumn;
792
14.0k
    child_tcolumn.set_type(FieldType::OLAP_FIELD_TYPE_STRING);
793
14.0k
    res.add_sub_column(child_tcolumn);
794
14.0k
    res.add_sub_column(child_tcolumn);
795
14.0k
    return res;
796
14.0k
}
797
798
211k
uint32_t variant_binary_shard_of(const StringRef& path, uint32_t bucket_num) {
799
211k
    if (bucket_num <= 1) return 0;
800
94.5k
    SipHash hash;
801
94.5k
    hash.update(path.data, path.size);
802
94.5k
    uint64_t h = hash.get64();
803
94.5k
    return static_cast<uint32_t>(h % bucket_num);
804
211k
}
805
806
Status VariantCompactionUtil::aggregate_path_to_stats(
807
        const RowsetSharedPtr& rs,
808
3.16k
        std::unordered_map<int32_t, PathToNoneNullValues>* uid_to_path_stats) {
809
3.16k
    SegmentCacheHandle segment_cache;
810
3.16k
    RETURN_IF_ERROR(SegmentLoader::instance()->load_segments(
811
3.16k
            std::static_pointer_cast<BetaRowset>(rs), &segment_cache));
812
813
8.93k
    for (const auto& column : rs->tablet_schema()->columns()) {
814
8.93k
        if (!column->is_variant_type() || column->unique_id() < 0) {
815
4.73k
            continue;
816
4.73k
        }
817
4.20k
        if (!should_check_variant_path_stats(*column)) {
818
0
            continue;
819
0
        }
820
4.20k
        for (const auto& segment : segment_cache.get_segments()) {
821
2.21k
            std::shared_ptr<ColumnReader> column_reader;
822
2.21k
            OlapReaderStatistics stats;
823
2.21k
            RETURN_IF_ERROR(
824
2.21k
                    segment->get_column_reader(column->unique_id(), &column_reader, &stats));
825
2.21k
            if (!column_reader) {
826
0
                continue;
827
0
            }
828
829
2.21k
            CHECK(column_reader->get_meta_type() == FieldType::OLAP_FIELD_TYPE_VARIANT);
830
2.21k
            auto* variant_column_reader =
831
2.21k
                    assert_cast<segment_v2::VariantColumnReader*>(column_reader.get());
832
            // load external meta before getting stats
833
2.21k
            RETURN_IF_ERROR(variant_column_reader->load_external_meta_once());
834
2.21k
            const auto* source_stats = variant_column_reader->get_stats();
835
2.21k
            CHECK(source_stats);
836
837
            // agg path -> stats
838
10.2k
            for (const auto& [path, size] : source_stats->sparse_column_non_null_size) {
839
10.2k
                (*uid_to_path_stats)[column->unique_id()][path] += size;
840
10.2k
            }
841
842
6.37k
            for (const auto& [path, size] : source_stats->subcolumns_non_null_size) {
843
6.37k
                (*uid_to_path_stats)[column->unique_id()][path] += size;
844
6.37k
            }
845
2.21k
        }
846
4.20k
    }
847
3.16k
    return Status::OK();
848
3.16k
}
849
850
Status VariantCompactionUtil::aggregate_variant_extended_info(
851
        const RowsetSharedPtr& rs,
852
4.98k
        std::unordered_map<int32_t, VariantExtendedInfo>* uid_to_variant_extended_info) {
853
4.98k
    SegmentCacheHandle segment_cache;
854
4.98k
    RETURN_IF_ERROR(SegmentLoader::instance()->load_segments(
855
4.98k
            std::static_pointer_cast<BetaRowset>(rs), &segment_cache));
856
857
18.8k
    for (const auto& column : rs->tablet_schema()->columns()) {
858
18.8k
        if (!column->is_variant_type()) {
859
12.7k
            continue;
860
12.7k
        }
861
6.10k
        if (column->variant_enable_nested_group()) {
862
0
            (*uid_to_variant_extended_info)[column->unique_id()].has_nested_group = true;
863
0
            continue;
864
0
        }
865
6.10k
        for (const auto& segment : segment_cache.get_segments()) {
866
3.62k
            std::shared_ptr<ColumnReader> column_reader;
867
3.62k
            OlapReaderStatistics stats;
868
3.62k
            RETURN_IF_ERROR(
869
3.62k
                    segment->get_column_reader(column->unique_id(), &column_reader, &stats));
870
3.62k
            if (!column_reader) {
871
0
                continue;
872
0
            }
873
874
3.62k
            CHECK(column_reader->get_meta_type() == FieldType::OLAP_FIELD_TYPE_VARIANT);
875
3.62k
            auto* variant_column_reader =
876
3.62k
                    assert_cast<segment_v2::VariantColumnReader*>(column_reader.get());
877
            // load external meta before getting stats
878
3.62k
            RETURN_IF_ERROR(variant_column_reader->load_external_meta_once());
879
3.62k
            const auto* source_stats = variant_column_reader->get_stats();
880
3.62k
            CHECK(source_stats);
881
882
            // 1. agg path -> stats
883
5.13k
            for (const auto& [path, size] : source_stats->sparse_column_non_null_size) {
884
5.13k
                (*uid_to_variant_extended_info)[column->unique_id()]
885
5.13k
                        .path_to_none_null_values[path] += size;
886
5.13k
                (*uid_to_variant_extended_info)[column->unique_id()].sparse_paths.emplace(path);
887
5.13k
            }
888
889
6.36k
            for (const auto& [path, size] : source_stats->subcolumns_non_null_size) {
890
6.36k
                (*uid_to_variant_extended_info)[column->unique_id()]
891
6.36k
                        .path_to_none_null_values[path] += size;
892
6.36k
            }
893
894
            //2. agg path -> schema
895
3.62k
            auto& paths_types =
896
3.62k
                    (*uid_to_variant_extended_info)[column->unique_id()].path_to_data_types;
897
3.62k
            variant_column_reader->get_subcolumns_types(&paths_types);
898
899
            // 3. extract typed paths
900
3.62k
            auto& typed_paths = (*uid_to_variant_extended_info)[column->unique_id()].typed_paths;
901
3.62k
            variant_column_reader->get_typed_paths(&typed_paths);
902
903
            // 4. extract nested paths
904
3.62k
            auto& nested_paths = (*uid_to_variant_extended_info)[column->unique_id()].nested_paths;
905
3.62k
            variant_column_reader->get_nested_paths(&nested_paths);
906
3.62k
        }
907
6.10k
    }
908
4.98k
    return Status::OK();
909
4.98k
}
910
911
// get the subpaths and sparse paths for the variant column
912
void VariantCompactionUtil::get_subpaths(int32_t max_subcolumns_count,
913
                                         const PathToNoneNullValues& stats,
914
439
                                         TabletSchema::PathsSetInfo& paths_set_info) {
915
    // max_subcolumns_count is 0 means no limit
916
439
    if (max_subcolumns_count > 0 && stats.size() > max_subcolumns_count) {
917
188
        std::vector<std::pair<size_t, std::string_view>> paths_with_sizes;
918
188
        paths_with_sizes.reserve(stats.size());
919
5.25k
        for (const auto& [path, size] : stats) {
920
5.25k
            paths_with_sizes.emplace_back(size, path);
921
5.25k
        }
922
188
        std::sort(paths_with_sizes.begin(), paths_with_sizes.end(), std::greater());
923
924
        // Select top N paths as subcolumns, remaining paths as sparse columns
925
5.25k
        for (const auto& [size, path] : paths_with_sizes) {
926
5.25k
            if (paths_set_info.sub_path_set.size() < max_subcolumns_count) {
927
964
                paths_set_info.sub_path_set.emplace(path);
928
4.29k
            } else {
929
4.29k
                paths_set_info.sparse_path_set.emplace(path);
930
4.29k
            }
931
5.25k
        }
932
188
        LOG(INFO) << "subpaths " << paths_set_info.sub_path_set.size() << " sparse paths "
933
188
                  << paths_set_info.sparse_path_set.size() << " variant max subcolumns count "
934
188
                  << max_subcolumns_count << " stats size " << paths_with_sizes.size();
935
251
    } else {
936
        // Apply all paths as subcolumns
937
385
        for (const auto& [path, _] : stats) {
938
385
            paths_set_info.sub_path_set.emplace(path);
939
385
        }
940
251
    }
941
439
}
942
943
Status VariantCompactionUtil::check_path_stats(const std::vector<RowsetSharedPtr>& intputs,
944
7.34k
                                               RowsetSharedPtr output, BaseTabletSPtr tablet) {
945
7.34k
    if (output->tablet_schema()->num_variant_columns() == 0) {
946
6.74k
        return Status::OK();
947
6.74k
    }
948
4.98k
    for (const auto& rowset : intputs) {
949
18.8k
        for (const auto& column : rowset->tablet_schema()->columns()) {
950
18.8k
            if (column->is_variant_type() && !should_check_variant_path_stats(*column)) {
951
0
                return Status::OK();
952
0
            }
953
18.8k
        }
954
4.98k
    }
955
    // check no extended schema in input rowsets
956
4.98k
    for (const auto& rowset : intputs) {
957
18.8k
        for (const auto& column : rowset->tablet_schema()->columns()) {
958
18.8k
            if (column->is_extracted_column()) {
959
0
                return Status::OK();
960
0
            }
961
18.8k
        }
962
4.98k
    }
963
597
#ifndef BE_TEST
964
    // check no extended schema in output rowset
965
2.12k
    for (const auto& column : output->tablet_schema()->columns()) {
966
2.12k
        if (column->is_extracted_column()) {
967
0
            const auto& name = column->name();
968
0
            if (name.find("." + DOC_VALUE_COLUMN_PATH + ".") != std::string::npos ||
969
0
                name.find("." + SPARSE_COLUMN_PATH + ".") != std::string::npos ||
970
0
                name.ends_with("." + SPARSE_COLUMN_PATH)) {
971
0
                continue;
972
0
            }
973
0
            return Status::InternalError("Unexpected extracted column {} in output rowset",
974
0
                                         column->name());
975
0
        }
976
2.12k
    }
977
597
#endif
978
    // only check path stats for dup_keys since the rows may be merged in other models
979
597
    if (tablet->keys_type() != KeysType::DUP_KEYS) {
980
201
        return Status::OK();
981
201
    }
982
    // if there is a delete predicate in the input rowsets, we skip the path stats check
983
2.78k
    for (auto& rowset : intputs) {
984
2.78k
        if (rowset->rowset_meta()->has_delete_predicate()) {
985
4
            return Status::OK();
986
4
        }
987
2.78k
    }
988
1.10k
    for (const auto& column : output->tablet_schema()->columns()) {
989
1.10k
        if (column->is_variant_type() && !should_check_variant_path_stats(*column)) {
990
0
            return Status::OK();
991
0
        }
992
1.10k
    }
993
392
    std::unordered_map<int32_t, PathToNoneNullValues> original_uid_to_path_stats;
994
2.75k
    for (const auto& rs : intputs) {
995
2.75k
        RETURN_IF_ERROR(aggregate_path_to_stats(rs, &original_uid_to_path_stats));
996
2.75k
    }
997
392
    std::unordered_map<int32_t, PathToNoneNullValues> output_uid_to_path_stats;
998
392
    RETURN_IF_ERROR(aggregate_path_to_stats(output, &output_uid_to_path_stats));
999
392
    for (const auto& [uid, stats] : output_uid_to_path_stats) {
1000
308
        if (output->tablet_schema()->column_by_uid(uid).is_variant_type() &&
1001
308
            output->tablet_schema()->column_by_uid(uid).variant_enable_doc_mode()) {
1002
48
            continue;
1003
48
        }
1004
260
        if (original_uid_to_path_stats.find(uid) == original_uid_to_path_stats.end()) {
1005
0
            return Status::InternalError("Path stats not found for uid {}, tablet_id {}", uid,
1006
0
                                         tablet->tablet_id());
1007
0
        }
1008
1009
        // In input rowsets, some rowsets may have statistics values exceeding the maximum limit,
1010
        // which leads to inaccurate statistics
1011
260
        if (stats.size() > output->tablet_schema()
1012
260
                                   ->column_by_uid(uid)
1013
260
                                   .variant_max_sparse_column_statistics_size()) {
1014
            // When there is only one segment, we can ensure that the size of each path in output stats is accurate
1015
1
            if (output->num_segments() == 1) {
1016
13
                for (const auto& [path, size] : stats) {
1017
13
                    if (original_uid_to_path_stats.at(uid).find(path) ==
1018
13
                        original_uid_to_path_stats.at(uid).end()) {
1019
0
                        continue;
1020
0
                    }
1021
13
                    if (original_uid_to_path_stats.at(uid).at(path) > size) {
1022
0
                        return Status::InternalError(
1023
0
                                "Path stats not smaller for uid {} with path `{}`, input size {}, "
1024
0
                                "output "
1025
0
                                "size {}, "
1026
0
                                "tablet_id {}",
1027
0
                                uid, path, original_uid_to_path_stats.at(uid).at(path), size,
1028
0
                                tablet->tablet_id());
1029
0
                    }
1030
13
                }
1031
1
            }
1032
1
        }
1033
        // in this case, input stats is accurate, so we check the stats size and stats value
1034
259
        else {
1035
5.46k
            for (const auto& [path, size] : stats) {
1036
5.46k
                if (original_uid_to_path_stats.at(uid).find(path) ==
1037
5.46k
                    original_uid_to_path_stats.at(uid).end()) {
1038
0
                    return Status::InternalError(
1039
0
                            "Path stats not found for uid {}, path {}, tablet_id {}", uid, path,
1040
0
                            tablet->tablet_id());
1041
0
                }
1042
5.46k
                if (original_uid_to_path_stats.at(uid).at(path) != size) {
1043
0
                    return Status::InternalError(
1044
0
                            "Path stats not match for uid {} with path `{}`, input size {}, output "
1045
0
                            "size {}, "
1046
0
                            "tablet_id {}",
1047
0
                            uid, path, original_uid_to_path_stats.at(uid).at(path), size,
1048
0
                            tablet->tablet_id());
1049
0
                }
1050
5.46k
            }
1051
259
        }
1052
260
    }
1053
1054
392
    return Status::OK();
1055
392
}
1056
1057
Status VariantCompactionUtil::get_compaction_typed_columns(
1058
        const TabletSchemaSPtr& target, const std::unordered_set<std::string>& typed_paths,
1059
        const TabletColumnPtr parent_column, TabletSchemaSPtr& output_schema,
1060
437
        TabletSchema::PathsSetInfo& paths_set_info) {
1061
437
    if (parent_column->variant_enable_typed_paths_to_sparse()) {
1062
40
        return Status::OK();
1063
40
    }
1064
463
    for (const auto& path : typed_paths) {
1065
463
        TabletSchema::SubColumnInfo sub_column_info;
1066
463
        if (generate_sub_column_info(*target, parent_column->unique_id(), path, &sub_column_info)) {
1067
462
            inherit_column_attributes(*parent_column, sub_column_info.column);
1068
462
            output_schema->append_column(sub_column_info.column);
1069
462
            paths_set_info.typed_path_set.insert({path, std::move(sub_column_info)});
1070
462
            VLOG_DEBUG << "append typed column " << path;
1071
462
        } else {
1072
1
            return Status::InternalError("Failed to generate sub column info for path {}", path);
1073
1
        }
1074
463
    }
1075
396
    return Status::OK();
1076
397
}
1077
1078
Status VariantCompactionUtil::get_compaction_nested_columns(
1079
        const std::unordered_set<PathInData, PathInData::Hash>& nested_paths,
1080
        const PathToDataTypes& path_to_data_types, const TabletColumnPtr parent_column,
1081
435
        TabletSchemaSPtr& output_schema, TabletSchema::PathsSetInfo& paths_set_info) {
1082
435
    const auto& parent_indexes = output_schema->inverted_indexs(parent_column->unique_id());
1083
908
    for (const auto& path : nested_paths) {
1084
908
        const auto& find_data_types = path_to_data_types.find(path);
1085
908
        if (find_data_types == path_to_data_types.end() || find_data_types->second.empty()) {
1086
1
            return Status::InternalError("Nested path {} has no data type", path.get_path());
1087
1
        }
1088
907
        DataTypePtr data_type;
1089
907
        get_least_supertype_jsonb(find_data_types->second, &data_type);
1090
1091
907
        const std::string& column_name = parent_column->name_lower_case() + "." + path.get_path();
1092
907
        PathInDataBuilder full_path_builder;
1093
907
        auto full_path = full_path_builder.append(parent_column->name_lower_case(), false)
1094
907
                                 .append(path.get_parts(), false)
1095
907
                                 .build();
1096
907
        TabletColumn nested_column =
1097
907
                get_column_by_type(data_type, column_name,
1098
907
                                   ExtraInfo {.unique_id = -1,
1099
907
                                              .parent_unique_id = parent_column->unique_id(),
1100
907
                                              .path_info = full_path});
1101
907
        inherit_column_attributes(*parent_column, nested_column);
1102
907
        TabletIndexes sub_column_indexes;
1103
907
        inherit_index(parent_indexes, sub_column_indexes, nested_column);
1104
907
        paths_set_info.subcolumn_indexes.emplace(path.get_path(), std::move(sub_column_indexes));
1105
907
        output_schema->append_column(nested_column);
1106
907
        VLOG_DEBUG << "append nested column " << path.get_path();
1107
907
    }
1108
434
    return Status::OK();
1109
435
}
1110
1111
void VariantCompactionUtil::get_compaction_subcolumns_from_subpaths(
1112
        TabletSchema::PathsSetInfo& paths_set_info, const TabletColumnPtr parent_column,
1113
        const TabletSchemaSPtr& target, const PathToDataTypes& path_to_data_types,
1114
426
        const std::unordered_set<std::string>& sparse_paths, TabletSchemaSPtr& output_schema) {
1115
426
    auto& path_set = paths_set_info.sub_path_set;
1116
426
    std::vector<StringRef> sorted_subpaths(path_set.begin(), path_set.end());
1117
426
    std::sort(sorted_subpaths.begin(), sorted_subpaths.end());
1118
426
    const auto& parent_indexes = target->inverted_indexs(parent_column->unique_id());
1119
    // append subcolumns
1120
1.25k
    for (const auto& subpath : sorted_subpaths) {
1121
1.25k
        auto column_name = parent_column->name_lower_case() + "." + subpath.to_string();
1122
1.25k
        auto column_path = PathInData(column_name);
1123
1124
1.25k
        const auto& find_data_types = path_to_data_types.find(PathInData(subpath));
1125
1126
        // some cases: the subcolumn type is variant
1127
        // 1. this path has no data type in segments
1128
        // 2. this path is in sparse paths
1129
        // 3. the sparse paths are too much
1130
1.25k
        TabletSchema::SubColumnInfo sub_column_info;
1131
1.25k
        if (parent_column->variant_enable_typed_paths_to_sparse() &&
1132
1.25k
            generate_sub_column_info(*target, parent_column->unique_id(), std::string(subpath),
1133
65
                                     &sub_column_info)) {
1134
55
            inherit_column_attributes(*parent_column, sub_column_info.column);
1135
55
            output_schema->append_column(sub_column_info.column);
1136
55
            paths_set_info.subcolumn_indexes.emplace(subpath, std::move(sub_column_info.indexes));
1137
55
            VLOG_DEBUG << "append typed column " << subpath;
1138
1.20k
        } else if (find_data_types == path_to_data_types.end() || find_data_types->second.empty() ||
1139
1.20k
                   sparse_paths.find(std::string(subpath)) != sparse_paths.end() ||
1140
1.20k
                   sparse_paths.size() >=
1141
1.08k
                           parent_column->variant_max_sparse_column_statistics_size()) {
1142
115
            TabletColumn subcolumn;
1143
115
            subcolumn.set_name(column_name);
1144
115
            subcolumn.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
1145
115
            subcolumn.set_parent_unique_id(parent_column->unique_id());
1146
115
            subcolumn.set_path_info(column_path);
1147
115
            subcolumn.set_aggregation_method(parent_column->aggregation());
1148
115
            subcolumn.set_variant_max_subcolumns_count(
1149
115
                    parent_column->variant_max_subcolumns_count());
1150
115
            subcolumn.set_variant_enable_doc_mode(parent_column->variant_enable_doc_mode());
1151
115
            subcolumn.set_is_nullable(true);
1152
115
            output_schema->append_column(subcolumn);
1153
115
            VLOG_DEBUG << "append sub column " << subpath << " data type "
1154
0
                       << "VARIANT";
1155
115
        }
1156
        // normal case: the subcolumn type can be calculated from the data types in segments
1157
1.08k
        else {
1158
1.08k
            DataTypePtr data_type;
1159
1.08k
            get_least_supertype_jsonb(find_data_types->second, &data_type);
1160
1.08k
            TabletColumn sub_column =
1161
1.08k
                    get_column_by_type(data_type, column_name,
1162
1.08k
                                       ExtraInfo {.unique_id = -1,
1163
1.08k
                                                  .parent_unique_id = parent_column->unique_id(),
1164
1.08k
                                                  .path_info = column_path});
1165
1.08k
            inherit_column_attributes(*parent_column, sub_column);
1166
1.08k
            TabletIndexes sub_column_indexes;
1167
1.08k
            inherit_index(parent_indexes, sub_column_indexes, sub_column);
1168
1.08k
            paths_set_info.subcolumn_indexes.emplace(subpath, std::move(sub_column_indexes));
1169
1.08k
            output_schema->append_column(sub_column);
1170
1.08k
            VLOG_DEBUG << "append sub column " << subpath << " data type " << data_type->get_name();
1171
1.08k
        }
1172
1.25k
    }
1173
426
}
1174
1175
void VariantCompactionUtil::get_compaction_subcolumns_from_data_types(
1176
        TabletSchema::PathsSetInfo& paths_set_info, const TabletColumnPtr parent_column,
1177
        const TabletSchemaSPtr& target, const PathToDataTypes& path_to_data_types,
1178
17
        TabletSchemaSPtr& output_schema) {
1179
17
    const auto& parent_indexes = target->inverted_indexs(parent_column->unique_id());
1180
118
    for (const auto& [path, data_types] : path_to_data_types) {
1181
118
        if (data_types.empty() || path.empty() || path.has_nested_part()) {
1182
15
            continue;
1183
15
        }
1184
103
        DataTypePtr data_type;
1185
103
        get_least_supertype_jsonb(data_types, &data_type);
1186
103
        auto column_name = parent_column->name_lower_case() + "." + path.get_path();
1187
103
        auto column_path = PathInData(column_name);
1188
103
        TabletColumn sub_column =
1189
103
                get_column_by_type(data_type, column_name,
1190
103
                                   ExtraInfo {.unique_id = -1,
1191
103
                                              .parent_unique_id = parent_column->unique_id(),
1192
103
                                              .path_info = column_path});
1193
103
        inherit_column_attributes(*parent_column, sub_column);
1194
103
        TabletIndexes sub_column_indexes;
1195
103
        inherit_index(parent_indexes, sub_column_indexes, sub_column);
1196
103
        paths_set_info.subcolumn_indexes.emplace(path.get_path(), std::move(sub_column_indexes));
1197
103
        output_schema->append_column(sub_column);
1198
103
        VLOG_DEBUG << "append sub column " << path.get_path() << " data type "
1199
0
                   << data_type->get_name();
1200
103
    }
1201
17
}
1202
1203
// Build the temporary schema for compaction
1204
// 1. aggregate path stats and data types from all rowsets
1205
// 2. append typed columns and nested columns to the output schema
1206
// 3. sort the subpaths and sparse paths for each unique id
1207
// 4. append the subpaths and sparse paths to the output schema
1208
// 5. set the path set info for each unique id
1209
// 6. return the output schema
1210
Status VariantCompactionUtil::get_extended_compaction_schema(
1211
7.37k
        const std::vector<RowsetSharedPtr>& rowsets, TabletSchemaSPtr& target) {
1212
7.37k
    std::unordered_map<int32_t, VariantExtendedInfo> uid_to_variant_extended_info;
1213
7.37k
    const bool has_extendable_variant =
1214
80.7k
            std::ranges::any_of(target->columns(), [](const TabletColumnPtr& column) {
1215
80.7k
                return column->is_variant_type() && should_check_variant_path_stats(*column);
1216
80.7k
            });
1217
7.37k
    if (has_extendable_variant) {
1218
        // collect path stats from all rowsets and segments
1219
4.98k
        for (const auto& rs : rowsets) {
1220
4.98k
            RETURN_IF_ERROR(aggregate_variant_extended_info(rs, &uid_to_variant_extended_info));
1221
4.98k
        }
1222
600
    }
1223
1224
    // build the output schema
1225
7.37k
    TabletSchemaSPtr output_schema = std::make_shared<TabletSchema>();
1226
7.37k
    output_schema->shawdow_copy_without_columns(*target);
1227
7.37k
    std::unordered_map<int32_t, TabletSchema::PathsSetInfo> uid_to_paths_set_info;
1228
82.1k
    for (const TabletColumnPtr& column : target->columns()) {
1229
82.1k
        if (!column->is_extracted_column()) {
1230
82.1k
            output_schema->append_column(*column);
1231
82.1k
        }
1232
82.1k
        if (!column->is_variant_type()) {
1233
81.4k
            continue;
1234
81.4k
        }
1235
18.4E
        VLOG_DEBUG << "column " << column->name() << " unique id " << column->unique_id();
1236
1237
699
        const auto info_it = uid_to_variant_extended_info.find(column->unique_id());
1238
699
        const VariantExtendedInfo empty_extended_info;
1239
699
        const VariantExtendedInfo& extended_info = info_it == uid_to_variant_extended_info.end()
1240
699
                                                           ? empty_extended_info
1241
699
                                                           : info_it->second;
1242
699
        if (!should_check_variant_path_stats(*column)) {
1243
0
            VLOG_DEBUG << "skip extended schema compaction for variant uid=" << column->unique_id()
1244
0
                       << " because the column disables variant path stats";
1245
0
            continue;
1246
0
        }
1247
699
        if (extended_info.has_nested_group) {
1248
0
            LOG(INFO) << "Variant column uid=" << column->unique_id()
1249
0
                      << " has nested group, keep original column in compaction schema";
1250
0
            continue;
1251
0
        }
1252
1253
699
        if (column->variant_enable_doc_mode()) {
1254
277
            const int bucket_num = std::max(1, column->variant_doc_hash_shard_count());
1255
1.02k
            for (int b = 0; b < bucket_num; ++b) {
1256
749
                TabletColumn doc_value_bucket_column = create_doc_value_column(*column, b);
1257
749
                doc_value_bucket_column.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
1258
749
                doc_value_bucket_column.set_is_nullable(false);
1259
749
                doc_value_bucket_column.set_variant_enable_doc_mode(true);
1260
749
                output_schema->append_column(doc_value_bucket_column);
1261
749
            }
1262
277
            continue;
1263
277
        }
1264
1265
        // 1. append typed columns
1266
422
        RETURN_IF_ERROR(get_compaction_typed_columns(target, extended_info.typed_paths, column,
1267
422
                                                     output_schema,
1268
422
                                                     uid_to_paths_set_info[column->unique_id()]));
1269
        // 2. append nested columns
1270
422
        RETURN_IF_ERROR(get_compaction_nested_columns(
1271
422
                extended_info.nested_paths, extended_info.path_to_data_types, column, output_schema,
1272
422
                uid_to_paths_set_info[column->unique_id()]));
1273
1274
        // 3. get the subpaths
1275
422
        get_subpaths(column->variant_max_subcolumns_count(), extended_info.path_to_none_null_values,
1276
422
                     uid_to_paths_set_info[column->unique_id()]);
1277
1278
        // 4. append subcolumns
1279
422
        if (column->variant_max_subcolumns_count() > 0 || !column->get_sub_columns().empty()) {
1280
418
            get_compaction_subcolumns_from_subpaths(
1281
418
                    uid_to_paths_set_info[column->unique_id()], column, target,
1282
418
                    extended_info.path_to_data_types, extended_info.sparse_paths, output_schema);
1283
418
        }
1284
        // variant_max_subcolumns_count == 0 and no typed paths materialized
1285
        // it means that all subcolumns are materialized, may be from old data
1286
4
        else {
1287
4
            get_compaction_subcolumns_from_data_types(
1288
4
                    uid_to_paths_set_info[column->unique_id()], column, target,
1289
4
                    extended_info.path_to_data_types, output_schema);
1290
4
        }
1291
1292
        // append sparse column(s)
1293
        // If variant uses bucketized sparse columns, append one sparse bucket column per bucket.
1294
        // Otherwise, append the single sparse column.
1295
422
        int bucket_num = std::max(1, column->variant_sparse_hash_shard_count());
1296
422
        if (bucket_num > 1) {
1297
1.50k
            for (int b = 0; b < bucket_num; ++b) {
1298
1.16k
                TabletColumn sparse_bucket_column = create_sparse_shard_column(*column, b);
1299
1.16k
                output_schema->append_column(sparse_bucket_column);
1300
1.16k
            }
1301
344
        } else {
1302
78
            TabletColumn sparse_column = create_sparse_column(*column);
1303
78
            output_schema->append_column(sparse_column);
1304
78
        }
1305
422
    }
1306
1307
7.37k
    target = output_schema;
1308
    // used to merge & filter path to sparse column during reading in compaction
1309
7.37k
    target->set_path_set_info(std::move(uid_to_paths_set_info));
1310
7.37k
    VLOG_DEBUG << "dump schema " << target->dump_full_schema();
1311
7.37k
    return Status::OK();
1312
7.37k
}
1313
1314
// Calculate statistics about variant data paths from the encoded sparse column
1315
void VariantCompactionUtil::calculate_variant_stats(const IColumn& encoded_sparse_column,
1316
                                                    segment_v2::VariantStatisticsPB* stats,
1317
                                                    size_t max_sparse_column_statistics_size,
1318
1.24k
                                                    size_t row_pos, size_t num_rows) {
1319
    // Cast input column to ColumnMap type since sparse column is stored as a map
1320
1.24k
    const auto& map_column = assert_cast<const ColumnMap&>(encoded_sparse_column);
1321
1322
    // Get the keys column which contains the paths as strings
1323
1.24k
    const auto& sparse_data_paths =
1324
1.24k
            assert_cast<const ColumnString*>(map_column.get_keys_ptr().get());
1325
1.24k
    const auto& serialized_sparse_column_offsets =
1326
1.24k
            assert_cast<const ColumnArray::Offsets64&>(map_column.get_offsets());
1327
1.24k
    auto& count_map = *stats->mutable_sparse_column_non_null_size();
1328
    // Iterate through all paths in the sparse column
1329
422k
    for (size_t i = row_pos; i != row_pos + num_rows; ++i) {
1330
421k
        size_t offset = serialized_sparse_column_offsets[i - 1];
1331
421k
        size_t end = serialized_sparse_column_offsets[i];
1332
2.09M
        for (size_t j = offset; j != end; ++j) {
1333
1.67M
            auto path = sparse_data_paths->get_data_at(j);
1334
1335
1.67M
            const auto& sparse_path = path.to_string();
1336
            // If path already exists in statistics, increment its count
1337
1.67M
            if (auto it = count_map.find(sparse_path); it != count_map.end()) {
1338
1.66M
                ++it->second;
1339
1.66M
            }
1340
            // If path doesn't exist and we haven't hit the max statistics size limit,
1341
            // add it with count 1
1342
4.24k
            else if (count_map.size() < max_sparse_column_statistics_size) {
1343
4.24k
                count_map.emplace(sparse_path, 1);
1344
4.24k
            }
1345
1.67M
        }
1346
421k
    }
1347
1348
1.24k
    if (stats->sparse_column_non_null_size().size() > max_sparse_column_statistics_size) {
1349
0
        throw doris::Exception(
1350
0
                ErrorCode::INTERNAL_ERROR,
1351
0
                "Sparse column non null size: {} is greater than max statistics size: {}",
1352
0
                stats->sparse_column_non_null_size().size(), max_sparse_column_statistics_size);
1353
0
    }
1354
1.24k
}
1355
1356
/// Calculates number of dimensions in array field.
1357
/// Returns 0 for scalar fields.
1358
class FieldVisitorToNumberOfDimensions : public StaticVisitor<size_t> {
1359
public:
1360
    FieldVisitorToNumberOfDimensions() = default;
1361
    template <PrimitiveType T>
1362
23.4M
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1363
23.4M
        if constexpr (T == TYPE_ARRAY) {
1364
2.26M
            const size_t size = x.size();
1365
2.26M
            size_t dimensions = 0;
1366
5.42M
            for (size_t i = 0; i < size; ++i) {
1367
3.15M
                size_t element_dimensions = apply_visitor(*this, x[i]);
1368
3.15M
                dimensions = std::max(dimensions, element_dimensions);
1369
3.15M
            }
1370
2.26M
            return 1 + dimensions;
1371
21.1M
        } else {
1372
21.1M
            return 0;
1373
21.1M
        }
1374
23.4M
    }
_ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE1EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1362
122k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1363
        if constexpr (T == TYPE_ARRAY) {
1364
            const size_t size = x.size();
1365
            size_t dimensions = 0;
1366
            for (size_t i = 0; i < size; ++i) {
1367
                size_t element_dimensions = apply_visitor(*this, x[i]);
1368
                dimensions = std::max(dimensions, element_dimensions);
1369
            }
1370
            return 1 + dimensions;
1371
122k
        } else {
1372
122k
            return 0;
1373
122k
        }
1374
122k
    }
_ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE26EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1362
511
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1363
        if constexpr (T == TYPE_ARRAY) {
1364
            const size_t size = x.size();
1365
            size_t dimensions = 0;
1366
            for (size_t i = 0; i < size; ++i) {
1367
                size_t element_dimensions = apply_visitor(*this, x[i]);
1368
                dimensions = std::max(dimensions, element_dimensions);
1369
            }
1370
            return 1 + dimensions;
1371
511
        } else {
1372
511
            return 0;
1373
511
        }
1374
511
    }
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE42EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE7EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1362
41.9k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1363
        if constexpr (T == TYPE_ARRAY) {
1364
            const size_t size = x.size();
1365
            size_t dimensions = 0;
1366
            for (size_t i = 0; i < size; ++i) {
1367
                size_t element_dimensions = apply_visitor(*this, x[i]);
1368
                dimensions = std::max(dimensions, element_dimensions);
1369
            }
1370
            return 1 + dimensions;
1371
41.9k
        } else {
1372
41.9k
            return 0;
1373
41.9k
        }
1374
41.9k
    }
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE12EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE11EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE25EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1362
428
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1363
        if constexpr (T == TYPE_ARRAY) {
1364
            const size_t size = x.size();
1365
            size_t dimensions = 0;
1366
            for (size_t i = 0; i < size; ++i) {
1367
                size_t element_dimensions = apply_visitor(*this, x[i]);
1368
                dimensions = std::max(dimensions, element_dimensions);
1369
            }
1370
            return 1 + dimensions;
1371
428
        } else {
1372
428
            return 0;
1373
428
        }
1374
428
    }
_ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE2EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1362
332k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1363
        if constexpr (T == TYPE_ARRAY) {
1364
            const size_t size = x.size();
1365
            size_t dimensions = 0;
1366
            for (size_t i = 0; i < size; ++i) {
1367
                size_t element_dimensions = apply_visitor(*this, x[i]);
1368
                dimensions = std::max(dimensions, element_dimensions);
1369
            }
1370
            return 1 + dimensions;
1371
332k
        } else {
1372
332k
            return 0;
1373
332k
        }
1374
332k
    }
_ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE3EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1362
1.05k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1363
        if constexpr (T == TYPE_ARRAY) {
1364
            const size_t size = x.size();
1365
            size_t dimensions = 0;
1366
            for (size_t i = 0; i < size; ++i) {
1367
                size_t element_dimensions = apply_visitor(*this, x[i]);
1368
                dimensions = std::max(dimensions, element_dimensions);
1369
            }
1370
            return 1 + dimensions;
1371
1.05k
        } else {
1372
1.05k
            return 0;
1373
1.05k
        }
1374
1.05k
    }
_ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE4EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1362
1.04k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1363
        if constexpr (T == TYPE_ARRAY) {
1364
            const size_t size = x.size();
1365
            size_t dimensions = 0;
1366
            for (size_t i = 0; i < size; ++i) {
1367
                size_t element_dimensions = apply_visitor(*this, x[i]);
1368
                dimensions = std::max(dimensions, element_dimensions);
1369
            }
1370
            return 1 + dimensions;
1371
1.04k
        } else {
1372
1.04k
            return 0;
1373
1.04k
        }
1374
1.04k
    }
_ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE5EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1362
1.97k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1363
        if constexpr (T == TYPE_ARRAY) {
1364
            const size_t size = x.size();
1365
            size_t dimensions = 0;
1366
            for (size_t i = 0; i < size; ++i) {
1367
                size_t element_dimensions = apply_visitor(*this, x[i]);
1368
                dimensions = std::max(dimensions, element_dimensions);
1369
            }
1370
            return 1 + dimensions;
1371
1.97k
        } else {
1372
1.97k
            return 0;
1373
1.97k
        }
1374
1.97k
    }
_ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE6EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1362
6.33M
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1363
        if constexpr (T == TYPE_ARRAY) {
1364
            const size_t size = x.size();
1365
            size_t dimensions = 0;
1366
            for (size_t i = 0; i < size; ++i) {
1367
                size_t element_dimensions = apply_visitor(*this, x[i]);
1368
                dimensions = std::max(dimensions, element_dimensions);
1369
            }
1370
            return 1 + dimensions;
1371
6.33M
        } else {
1372
6.33M
            return 0;
1373
6.33M
        }
1374
6.33M
    }
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE38EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE39EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE8EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1362
884
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1363
        if constexpr (T == TYPE_ARRAY) {
1364
            const size_t size = x.size();
1365
            size_t dimensions = 0;
1366
            for (size_t i = 0; i < size; ++i) {
1367
                size_t element_dimensions = apply_visitor(*this, x[i]);
1368
                dimensions = std::max(dimensions, element_dimensions);
1369
            }
1370
            return 1 + dimensions;
1371
884
        } else {
1372
884
            return 0;
1373
884
        }
1374
884
    }
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE27EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE9EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1362
2.95M
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1363
        if constexpr (T == TYPE_ARRAY) {
1364
            const size_t size = x.size();
1365
            size_t dimensions = 0;
1366
            for (size_t i = 0; i < size; ++i) {
1367
                size_t element_dimensions = apply_visitor(*this, x[i]);
1368
                dimensions = std::max(dimensions, element_dimensions);
1369
            }
1370
            return 1 + dimensions;
1371
2.95M
        } else {
1372
2.95M
            return 0;
1373
2.95M
        }
1374
2.95M
    }
_ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE36EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1362
339
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1363
        if constexpr (T == TYPE_ARRAY) {
1364
            const size_t size = x.size();
1365
            size_t dimensions = 0;
1366
            for (size_t i = 0; i < size; ++i) {
1367
                size_t element_dimensions = apply_visitor(*this, x[i]);
1368
                dimensions = std::max(dimensions, element_dimensions);
1369
            }
1370
            return 1 + dimensions;
1371
339
        } else {
1372
339
            return 0;
1373
339
        }
1374
339
    }
_ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE37EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1362
338
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1363
        if constexpr (T == TYPE_ARRAY) {
1364
            const size_t size = x.size();
1365
            size_t dimensions = 0;
1366
            for (size_t i = 0; i < size; ++i) {
1367
                size_t element_dimensions = apply_visitor(*this, x[i]);
1368
                dimensions = std::max(dimensions, element_dimensions);
1369
            }
1370
            return 1 + dimensions;
1371
338
        } else {
1372
338
            return 0;
1373
338
        }
1374
338
    }
_ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE23EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1362
11.2M
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1363
        if constexpr (T == TYPE_ARRAY) {
1364
            const size_t size = x.size();
1365
            size_t dimensions = 0;
1366
            for (size_t i = 0; i < size; ++i) {
1367
                size_t element_dimensions = apply_visitor(*this, x[i]);
1368
                dimensions = std::max(dimensions, element_dimensions);
1369
            }
1370
            return 1 + dimensions;
1371
11.2M
        } else {
1372
11.2M
            return 0;
1373
11.2M
        }
1374
11.2M
    }
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE15EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE10EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE41EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE17EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1362
2.26M
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1363
2.26M
        if constexpr (T == TYPE_ARRAY) {
1364
2.26M
            const size_t size = x.size();
1365
2.26M
            size_t dimensions = 0;
1366
5.42M
            for (size_t i = 0; i < size; ++i) {
1367
3.15M
                size_t element_dimensions = apply_visitor(*this, x[i]);
1368
3.15M
                dimensions = std::max(dimensions, element_dimensions);
1369
3.15M
            }
1370
2.26M
            return 1 + dimensions;
1371
        } else {
1372
            return 0;
1373
        }
1374
2.26M
    }
_ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE16EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1362
1
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1363
        if constexpr (T == TYPE_ARRAY) {
1364
            const size_t size = x.size();
1365
            size_t dimensions = 0;
1366
            for (size_t i = 0; i < size; ++i) {
1367
                size_t element_dimensions = apply_visitor(*this, x[i]);
1368
                dimensions = std::max(dimensions, element_dimensions);
1369
            }
1370
            return 1 + dimensions;
1371
1
        } else {
1372
1
            return 0;
1373
1
        }
1374
1
    }
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE18EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE32EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1362
1
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1363
        if constexpr (T == TYPE_ARRAY) {
1364
            const size_t size = x.size();
1365
            size_t dimensions = 0;
1366
            for (size_t i = 0; i < size; ++i) {
1367
                size_t element_dimensions = apply_visitor(*this, x[i]);
1368
                dimensions = std::max(dimensions, element_dimensions);
1369
            }
1370
            return 1 + dimensions;
1371
1
        } else {
1372
1
            return 0;
1373
1
        }
1374
1
    }
_ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE28EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1362
783
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1363
        if constexpr (T == TYPE_ARRAY) {
1364
            const size_t size = x.size();
1365
            size_t dimensions = 0;
1366
            for (size_t i = 0; i < size; ++i) {
1367
                size_t element_dimensions = apply_visitor(*this, x[i]);
1368
                dimensions = std::max(dimensions, element_dimensions);
1369
            }
1370
            return 1 + dimensions;
1371
783
        } else {
1372
783
            return 0;
1373
783
        }
1374
783
    }
_ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE29EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1362
724
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1363
        if constexpr (T == TYPE_ARRAY) {
1364
            const size_t size = x.size();
1365
            size_t dimensions = 0;
1366
            for (size_t i = 0; i < size; ++i) {
1367
                size_t element_dimensions = apply_visitor(*this, x[i]);
1368
                dimensions = std::max(dimensions, element_dimensions);
1369
            }
1370
            return 1 + dimensions;
1371
724
        } else {
1372
724
            return 0;
1373
724
        }
1374
724
    }
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE20EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE30EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1362
759
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1363
        if constexpr (T == TYPE_ARRAY) {
1364
            const size_t size = x.size();
1365
            size_t dimensions = 0;
1366
            for (size_t i = 0; i < size; ++i) {
1367
                size_t element_dimensions = apply_visitor(*this, x[i]);
1368
                dimensions = std::max(dimensions, element_dimensions);
1369
            }
1370
            return 1 + dimensions;
1371
759
        } else {
1372
759
            return 0;
1373
759
        }
1374
759
    }
_ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE35EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1362
588
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1363
        if constexpr (T == TYPE_ARRAY) {
1364
            const size_t size = x.size();
1365
            size_t dimensions = 0;
1366
            for (size_t i = 0; i < size; ++i) {
1367
                size_t element_dimensions = apply_visitor(*this, x[i]);
1368
                dimensions = std::max(dimensions, element_dimensions);
1369
            }
1370
            return 1 + dimensions;
1371
588
        } else {
1372
588
            return 0;
1373
588
        }
1374
588
    }
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE22EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE19EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE24EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE31EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1362
46.8k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1363
        if constexpr (T == TYPE_ARRAY) {
1364
            const size_t size = x.size();
1365
            size_t dimensions = 0;
1366
            for (size_t i = 0; i < size; ++i) {
1367
                size_t element_dimensions = apply_visitor(*this, x[i]);
1368
                dimensions = std::max(dimensions, element_dimensions);
1369
            }
1370
            return 1 + dimensions;
1371
46.8k
        } else {
1372
46.8k
            return 0;
1373
46.8k
        }
1374
46.8k
    }
1375
};
1376
1377
// Visitor that allows to get type of scalar field
1378
// but exclude fields contain complex field.This is a faster version
1379
// for FieldVisitorToScalarType which does not support complex field.
1380
class SimpleFieldVisitorToScalarType : public StaticVisitor<size_t> {
1381
public:
1382
    template <PrimitiveType T>
1383
18.6M
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1384
18.6M
        if constexpr (T == TYPE_ARRAY) {
1385
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Array type is not supported");
1386
109k
        } else if constexpr (T == TYPE_NULL) {
1387
109k
            have_nulls = true;
1388
109k
            return 1;
1389
18.5M
        } else {
1390
18.5M
            type = T;
1391
18.5M
            return 1;
1392
18.5M
        }
1393
18.6M
    }
_ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE1EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1383
109k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1384
        if constexpr (T == TYPE_ARRAY) {
1385
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Array type is not supported");
1386
109k
        } else if constexpr (T == TYPE_NULL) {
1387
109k
            have_nulls = true;
1388
109k
            return 1;
1389
        } else {
1390
            type = T;
1391
            return 1;
1392
        }
1393
109k
    }
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE26EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE42EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE7EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1383
12.3k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1384
        if constexpr (T == TYPE_ARRAY) {
1385
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Array type is not supported");
1386
        } else if constexpr (T == TYPE_NULL) {
1387
            have_nulls = true;
1388
            return 1;
1389
12.3k
        } else {
1390
12.3k
            type = T;
1391
12.3k
            return 1;
1392
12.3k
        }
1393
12.3k
    }
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE12EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE11EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE25EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE2EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1383
273k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1384
        if constexpr (T == TYPE_ARRAY) {
1385
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Array type is not supported");
1386
        } else if constexpr (T == TYPE_NULL) {
1387
            have_nulls = true;
1388
            return 1;
1389
273k
        } else {
1390
273k
            type = T;
1391
273k
            return 1;
1392
273k
        }
1393
273k
    }
_ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE3EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1383
2
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1384
        if constexpr (T == TYPE_ARRAY) {
1385
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Array type is not supported");
1386
        } else if constexpr (T == TYPE_NULL) {
1387
            have_nulls = true;
1388
            return 1;
1389
2
        } else {
1390
2
            type = T;
1391
2
            return 1;
1392
2
        }
1393
2
    }
_ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE4EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1383
7
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1384
        if constexpr (T == TYPE_ARRAY) {
1385
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Array type is not supported");
1386
        } else if constexpr (T == TYPE_NULL) {
1387
            have_nulls = true;
1388
            return 1;
1389
7
        } else {
1390
7
            type = T;
1391
7
            return 1;
1392
7
        }
1393
7
    }
_ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE5EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1383
570
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1384
        if constexpr (T == TYPE_ARRAY) {
1385
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Array type is not supported");
1386
        } else if constexpr (T == TYPE_NULL) {
1387
            have_nulls = true;
1388
            return 1;
1389
570
        } else {
1390
570
            type = T;
1391
570
            return 1;
1392
570
        }
1393
570
    }
_ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE6EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1383
4.93M
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1384
        if constexpr (T == TYPE_ARRAY) {
1385
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Array type is not supported");
1386
        } else if constexpr (T == TYPE_NULL) {
1387
            have_nulls = true;
1388
            return 1;
1389
4.93M
        } else {
1390
4.93M
            type = T;
1391
4.93M
            return 1;
1392
4.93M
        }
1393
4.93M
    }
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE38EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE39EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE8EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1383
1
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1384
        if constexpr (T == TYPE_ARRAY) {
1385
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Array type is not supported");
1386
        } else if constexpr (T == TYPE_NULL) {
1387
            have_nulls = true;
1388
            return 1;
1389
1
        } else {
1390
1
            type = T;
1391
1
            return 1;
1392
1
        }
1393
1
    }
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE27EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE9EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1383
2.76M
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1384
        if constexpr (T == TYPE_ARRAY) {
1385
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Array type is not supported");
1386
        } else if constexpr (T == TYPE_NULL) {
1387
            have_nulls = true;
1388
            return 1;
1389
2.76M
        } else {
1390
2.76M
            type = T;
1391
2.76M
            return 1;
1392
2.76M
        }
1393
2.76M
    }
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE36EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE37EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE23EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1383
10.5M
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1384
        if constexpr (T == TYPE_ARRAY) {
1385
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Array type is not supported");
1386
        } else if constexpr (T == TYPE_NULL) {
1387
            have_nulls = true;
1388
            return 1;
1389
10.5M
        } else {
1390
10.5M
            type = T;
1391
10.5M
            return 1;
1392
10.5M
        }
1393
10.5M
    }
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE15EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE10EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE41EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE17EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE16EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE18EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE32EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE28EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE29EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE20EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE30EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE35EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE22EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE19EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE24EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE31EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1383
46.8k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1384
        if constexpr (T == TYPE_ARRAY) {
1385
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Array type is not supported");
1386
        } else if constexpr (T == TYPE_NULL) {
1387
            have_nulls = true;
1388
            return 1;
1389
46.8k
        } else {
1390
46.8k
            type = T;
1391
46.8k
            return 1;
1392
46.8k
        }
1393
46.8k
    }
1394
18.4M
    void get_scalar_type(PrimitiveType* data_type) const { *data_type = type; }
1395
18.4M
    bool contain_nulls() const { return have_nulls; }
1396
1397
18.4M
    bool need_convert_field() const { return false; }
1398
1399
private:
1400
    PrimitiveType type = PrimitiveType::INVALID_TYPE;
1401
    bool have_nulls = false;
1402
};
1403
1404
/// Visitor that allows to get type of scalar field
1405
/// or least common type of scalars in array.
1406
/// More optimized version of FieldToDataType.
1407
class FieldVisitorToScalarType : public StaticVisitor<size_t> {
1408
public:
1409
    template <PrimitiveType T>
1410
4.77M
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1411
4.77M
        if constexpr (T == TYPE_ARRAY) {
1412
2.26M
            size_t size = x.size();
1413
5.42M
            for (size_t i = 0; i < size; ++i) {
1414
3.15M
                apply_visitor(*this, x[i]);
1415
3.15M
            }
1416
2.26M
            return 0;
1417
2.26M
        } else if constexpr (T == TYPE_NULL) {
1418
13.4k
            have_nulls = true;
1419
13.4k
            return 0;
1420
2.49M
        } else {
1421
2.49M
            field_types.insert(T);
1422
2.49M
            type_indexes.insert(T);
1423
2.49M
            return 0;
1424
2.49M
        }
1425
4.77M
    }
_ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE1EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1410
13.4k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1411
        if constexpr (T == TYPE_ARRAY) {
1412
            size_t size = x.size();
1413
            for (size_t i = 0; i < size; ++i) {
1414
                apply_visitor(*this, x[i]);
1415
            }
1416
            return 0;
1417
13.4k
        } else if constexpr (T == TYPE_NULL) {
1418
13.4k
            have_nulls = true;
1419
13.4k
            return 0;
1420
        } else {
1421
            field_types.insert(T);
1422
            type_indexes.insert(T);
1423
            return 0;
1424
        }
1425
13.4k
    }
_ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE26EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1410
511
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1411
        if constexpr (T == TYPE_ARRAY) {
1412
            size_t size = x.size();
1413
            for (size_t i = 0; i < size; ++i) {
1414
                apply_visitor(*this, x[i]);
1415
            }
1416
            return 0;
1417
        } else if constexpr (T == TYPE_NULL) {
1418
            have_nulls = true;
1419
            return 0;
1420
511
        } else {
1421
511
            field_types.insert(T);
1422
511
            type_indexes.insert(T);
1423
511
            return 0;
1424
511
        }
1425
511
    }
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE42EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE7EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1410
29.6k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1411
        if constexpr (T == TYPE_ARRAY) {
1412
            size_t size = x.size();
1413
            for (size_t i = 0; i < size; ++i) {
1414
                apply_visitor(*this, x[i]);
1415
            }
1416
            return 0;
1417
        } else if constexpr (T == TYPE_NULL) {
1418
            have_nulls = true;
1419
            return 0;
1420
29.6k
        } else {
1421
29.6k
            field_types.insert(T);
1422
29.6k
            type_indexes.insert(T);
1423
29.6k
            return 0;
1424
29.6k
        }
1425
29.6k
    }
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE12EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE11EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE25EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1410
428
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1411
        if constexpr (T == TYPE_ARRAY) {
1412
            size_t size = x.size();
1413
            for (size_t i = 0; i < size; ++i) {
1414
                apply_visitor(*this, x[i]);
1415
            }
1416
            return 0;
1417
        } else if constexpr (T == TYPE_NULL) {
1418
            have_nulls = true;
1419
            return 0;
1420
428
        } else {
1421
428
            field_types.insert(T);
1422
428
            type_indexes.insert(T);
1423
428
            return 0;
1424
428
        }
1425
428
    }
_ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE2EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1410
58.3k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1411
        if constexpr (T == TYPE_ARRAY) {
1412
            size_t size = x.size();
1413
            for (size_t i = 0; i < size; ++i) {
1414
                apply_visitor(*this, x[i]);
1415
            }
1416
            return 0;
1417
        } else if constexpr (T == TYPE_NULL) {
1418
            have_nulls = true;
1419
            return 0;
1420
58.3k
        } else {
1421
58.3k
            field_types.insert(T);
1422
58.3k
            type_indexes.insert(T);
1423
58.3k
            return 0;
1424
58.3k
        }
1425
58.3k
    }
_ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE3EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1410
1.04k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1411
        if constexpr (T == TYPE_ARRAY) {
1412
            size_t size = x.size();
1413
            for (size_t i = 0; i < size; ++i) {
1414
                apply_visitor(*this, x[i]);
1415
            }
1416
            return 0;
1417
        } else if constexpr (T == TYPE_NULL) {
1418
            have_nulls = true;
1419
            return 0;
1420
1.04k
        } else {
1421
1.04k
            field_types.insert(T);
1422
1.04k
            type_indexes.insert(T);
1423
1.04k
            return 0;
1424
1.04k
        }
1425
1.04k
    }
_ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE4EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1410
1.03k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1411
        if constexpr (T == TYPE_ARRAY) {
1412
            size_t size = x.size();
1413
            for (size_t i = 0; i < size; ++i) {
1414
                apply_visitor(*this, x[i]);
1415
            }
1416
            return 0;
1417
        } else if constexpr (T == TYPE_NULL) {
1418
            have_nulls = true;
1419
            return 0;
1420
1.03k
        } else {
1421
1.03k
            field_types.insert(T);
1422
1.03k
            type_indexes.insert(T);
1423
1.03k
            return 0;
1424
1.03k
        }
1425
1.03k
    }
_ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE5EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1410
1.40k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1411
        if constexpr (T == TYPE_ARRAY) {
1412
            size_t size = x.size();
1413
            for (size_t i = 0; i < size; ++i) {
1414
                apply_visitor(*this, x[i]);
1415
            }
1416
            return 0;
1417
        } else if constexpr (T == TYPE_NULL) {
1418
            have_nulls = true;
1419
            return 0;
1420
1.40k
        } else {
1421
1.40k
            field_types.insert(T);
1422
1.40k
            type_indexes.insert(T);
1423
1.40k
            return 0;
1424
1.40k
        }
1425
1.40k
    }
_ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE6EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1410
1.41M
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1411
        if constexpr (T == TYPE_ARRAY) {
1412
            size_t size = x.size();
1413
            for (size_t i = 0; i < size; ++i) {
1414
                apply_visitor(*this, x[i]);
1415
            }
1416
            return 0;
1417
        } else if constexpr (T == TYPE_NULL) {
1418
            have_nulls = true;
1419
            return 0;
1420
1.41M
        } else {
1421
1.41M
            field_types.insert(T);
1422
1.41M
            type_indexes.insert(T);
1423
1.41M
            return 0;
1424
1.41M
        }
1425
1.41M
    }
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE38EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE39EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE8EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1410
883
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1411
        if constexpr (T == TYPE_ARRAY) {
1412
            size_t size = x.size();
1413
            for (size_t i = 0; i < size; ++i) {
1414
                apply_visitor(*this, x[i]);
1415
            }
1416
            return 0;
1417
        } else if constexpr (T == TYPE_NULL) {
1418
            have_nulls = true;
1419
            return 0;
1420
883
        } else {
1421
883
            field_types.insert(T);
1422
883
            type_indexes.insert(T);
1423
883
            return 0;
1424
883
        }
1425
883
    }
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE27EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE9EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1410
203k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1411
        if constexpr (T == TYPE_ARRAY) {
1412
            size_t size = x.size();
1413
            for (size_t i = 0; i < size; ++i) {
1414
                apply_visitor(*this, x[i]);
1415
            }
1416
            return 0;
1417
        } else if constexpr (T == TYPE_NULL) {
1418
            have_nulls = true;
1419
            return 0;
1420
203k
        } else {
1421
203k
            field_types.insert(T);
1422
203k
            type_indexes.insert(T);
1423
203k
            return 0;
1424
203k
        }
1425
203k
    }
_ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE36EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1410
339
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1411
        if constexpr (T == TYPE_ARRAY) {
1412
            size_t size = x.size();
1413
            for (size_t i = 0; i < size; ++i) {
1414
                apply_visitor(*this, x[i]);
1415
            }
1416
            return 0;
1417
        } else if constexpr (T == TYPE_NULL) {
1418
            have_nulls = true;
1419
            return 0;
1420
339
        } else {
1421
339
            field_types.insert(T);
1422
339
            type_indexes.insert(T);
1423
339
            return 0;
1424
339
        }
1425
339
    }
_ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE37EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1410
338
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1411
        if constexpr (T == TYPE_ARRAY) {
1412
            size_t size = x.size();
1413
            for (size_t i = 0; i < size; ++i) {
1414
                apply_visitor(*this, x[i]);
1415
            }
1416
            return 0;
1417
        } else if constexpr (T == TYPE_NULL) {
1418
            have_nulls = true;
1419
            return 0;
1420
338
        } else {
1421
338
            field_types.insert(T);
1422
338
            type_indexes.insert(T);
1423
338
            return 0;
1424
338
        }
1425
338
    }
_ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE23EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1410
784k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1411
        if constexpr (T == TYPE_ARRAY) {
1412
            size_t size = x.size();
1413
            for (size_t i = 0; i < size; ++i) {
1414
                apply_visitor(*this, x[i]);
1415
            }
1416
            return 0;
1417
        } else if constexpr (T == TYPE_NULL) {
1418
            have_nulls = true;
1419
            return 0;
1420
784k
        } else {
1421
784k
            field_types.insert(T);
1422
784k
            type_indexes.insert(T);
1423
784k
            return 0;
1424
784k
        }
1425
784k
    }
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE15EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE10EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE41EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE17EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1410
2.26M
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1411
2.26M
        if constexpr (T == TYPE_ARRAY) {
1412
2.26M
            size_t size = x.size();
1413
5.42M
            for (size_t i = 0; i < size; ++i) {
1414
3.15M
                apply_visitor(*this, x[i]);
1415
3.15M
            }
1416
2.26M
            return 0;
1417
        } else if constexpr (T == TYPE_NULL) {
1418
            have_nulls = true;
1419
            return 0;
1420
        } else {
1421
            field_types.insert(T);
1422
            type_indexes.insert(T);
1423
            return 0;
1424
        }
1425
2.26M
    }
_ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE16EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1410
1
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1411
        if constexpr (T == TYPE_ARRAY) {
1412
            size_t size = x.size();
1413
            for (size_t i = 0; i < size; ++i) {
1414
                apply_visitor(*this, x[i]);
1415
            }
1416
            return 0;
1417
        } else if constexpr (T == TYPE_NULL) {
1418
            have_nulls = true;
1419
            return 0;
1420
1
        } else {
1421
1
            field_types.insert(T);
1422
1
            type_indexes.insert(T);
1423
1
            return 0;
1424
1
        }
1425
1
    }
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE18EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE32EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1410
1
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1411
        if constexpr (T == TYPE_ARRAY) {
1412
            size_t size = x.size();
1413
            for (size_t i = 0; i < size; ++i) {
1414
                apply_visitor(*this, x[i]);
1415
            }
1416
            return 0;
1417
        } else if constexpr (T == TYPE_NULL) {
1418
            have_nulls = true;
1419
            return 0;
1420
1
        } else {
1421
1
            field_types.insert(T);
1422
1
            type_indexes.insert(T);
1423
1
            return 0;
1424
1
        }
1425
1
    }
_ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE28EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1410
783
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1411
        if constexpr (T == TYPE_ARRAY) {
1412
            size_t size = x.size();
1413
            for (size_t i = 0; i < size; ++i) {
1414
                apply_visitor(*this, x[i]);
1415
            }
1416
            return 0;
1417
        } else if constexpr (T == TYPE_NULL) {
1418
            have_nulls = true;
1419
            return 0;
1420
783
        } else {
1421
783
            field_types.insert(T);
1422
783
            type_indexes.insert(T);
1423
783
            return 0;
1424
783
        }
1425
783
    }
_ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE29EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1410
724
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1411
        if constexpr (T == TYPE_ARRAY) {
1412
            size_t size = x.size();
1413
            for (size_t i = 0; i < size; ++i) {
1414
                apply_visitor(*this, x[i]);
1415
            }
1416
            return 0;
1417
        } else if constexpr (T == TYPE_NULL) {
1418
            have_nulls = true;
1419
            return 0;
1420
724
        } else {
1421
724
            field_types.insert(T);
1422
724
            type_indexes.insert(T);
1423
724
            return 0;
1424
724
        }
1425
724
    }
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE20EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE30EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1410
759
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1411
        if constexpr (T == TYPE_ARRAY) {
1412
            size_t size = x.size();
1413
            for (size_t i = 0; i < size; ++i) {
1414
                apply_visitor(*this, x[i]);
1415
            }
1416
            return 0;
1417
        } else if constexpr (T == TYPE_NULL) {
1418
            have_nulls = true;
1419
            return 0;
1420
759
        } else {
1421
759
            field_types.insert(T);
1422
759
            type_indexes.insert(T);
1423
759
            return 0;
1424
759
        }
1425
759
    }
_ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE35EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1410
588
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1411
        if constexpr (T == TYPE_ARRAY) {
1412
            size_t size = x.size();
1413
            for (size_t i = 0; i < size; ++i) {
1414
                apply_visitor(*this, x[i]);
1415
            }
1416
            return 0;
1417
        } else if constexpr (T == TYPE_NULL) {
1418
            have_nulls = true;
1419
            return 0;
1420
588
        } else {
1421
588
            field_types.insert(T);
1422
588
            type_indexes.insert(T);
1423
588
            return 0;
1424
588
        }
1425
588
    }
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE22EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE19EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE24EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE31EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1410
44
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1411
        if constexpr (T == TYPE_ARRAY) {
1412
            size_t size = x.size();
1413
            for (size_t i = 0; i < size; ++i) {
1414
                apply_visitor(*this, x[i]);
1415
            }
1416
            return 0;
1417
        } else if constexpr (T == TYPE_NULL) {
1418
            have_nulls = true;
1419
            return 0;
1420
44
        } else {
1421
44
            field_types.insert(T);
1422
44
            type_indexes.insert(T);
1423
44
            return 0;
1424
44
        }
1425
44
    }
1426
1.62M
    void get_scalar_type(PrimitiveType* type) const {
1427
1.62M
        if (type_indexes.size() == 1) {
1428
            // Most cases will have only one type
1429
1.54M
            *type = *type_indexes.begin();
1430
1.54M
            return;
1431
1.54M
        }
1432
74.1k
        DataTypePtr data_type;
1433
74.1k
        get_least_supertype_jsonb(type_indexes, &data_type);
1434
74.1k
        *type = data_type->get_primitive_type();
1435
74.1k
    }
1436
1.62M
    bool contain_nulls() const { return have_nulls; }
1437
1.62M
    bool need_convert_field() const { return field_types.size() > 1; }
1438
1439
private:
1440
    phmap::flat_hash_set<PrimitiveType> type_indexes;
1441
    phmap::flat_hash_set<PrimitiveType> field_types;
1442
    bool have_nulls = false;
1443
};
1444
1445
template <typename Visitor>
1446
20.3M
void get_field_info_impl(const Field& field, FieldInfo* info) {
1447
20.3M
    Visitor to_scalar_type_visitor;
1448
20.3M
    apply_visitor(to_scalar_type_visitor, field);
1449
20.3M
    PrimitiveType type_id;
1450
20.3M
    to_scalar_type_visitor.get_scalar_type(&type_id);
1451
    // array item's dimension may missmatch, eg. [1, 2, [1, 2, 3]]
1452
20.3M
    *info = {type_id, to_scalar_type_visitor.contain_nulls(),
1453
20.3M
             to_scalar_type_visitor.need_convert_field(),
1454
20.3M
             apply_visitor(FieldVisitorToNumberOfDimensions(), field)};
1455
20.3M
}
_ZN5doris12variant_util19get_field_info_implINS0_24FieldVisitorToScalarTypeEEEvRKNS_5FieldEPNS_9FieldInfoE
Line
Count
Source
1446
1.62M
void get_field_info_impl(const Field& field, FieldInfo* info) {
1447
1.62M
    Visitor to_scalar_type_visitor;
1448
1.62M
    apply_visitor(to_scalar_type_visitor, field);
1449
1.62M
    PrimitiveType type_id;
1450
1.62M
    to_scalar_type_visitor.get_scalar_type(&type_id);
1451
    // array item's dimension may missmatch, eg. [1, 2, [1, 2, 3]]
1452
1.62M
    *info = {type_id, to_scalar_type_visitor.contain_nulls(),
1453
1.62M
             to_scalar_type_visitor.need_convert_field(),
1454
1.62M
             apply_visitor(FieldVisitorToNumberOfDimensions(), field)};
1455
1.62M
}
_ZN5doris12variant_util19get_field_info_implINS0_30SimpleFieldVisitorToScalarTypeEEEvRKNS_5FieldEPNS_9FieldInfoE
Line
Count
Source
1446
18.6M
void get_field_info_impl(const Field& field, FieldInfo* info) {
1447
18.6M
    Visitor to_scalar_type_visitor;
1448
18.6M
    apply_visitor(to_scalar_type_visitor, field);
1449
18.6M
    PrimitiveType type_id;
1450
18.6M
    to_scalar_type_visitor.get_scalar_type(&type_id);
1451
    // array item's dimension may missmatch, eg. [1, 2, [1, 2, 3]]
1452
18.6M
    *info = {type_id, to_scalar_type_visitor.contain_nulls(),
1453
18.6M
             to_scalar_type_visitor.need_convert_field(),
1454
18.6M
             apply_visitor(FieldVisitorToNumberOfDimensions(), field)};
1455
18.6M
}
1456
1457
20.3M
void get_field_info(const Field& field, FieldInfo* info) {
1458
20.3M
    if (field.is_complex_field()) {
1459
1.62M
        get_field_info_impl<FieldVisitorToScalarType>(field, info);
1460
18.7M
    } else {
1461
18.7M
        get_field_info_impl<SimpleFieldVisitorToScalarType>(field, info);
1462
18.7M
    }
1463
20.3M
}
1464
1465
bool generate_sub_column_info(const TabletSchema& schema, int32_t col_unique_id,
1466
                              const std::string& path,
1467
149k
                              TabletSchema::SubColumnInfo* sub_column_info) {
1468
149k
    const auto& parent_column = schema.column_by_uid(col_unique_id);
1469
149k
    std::function<void(const TabletColumn&, TabletColumn*)> generate_result_column =
1470
149k
            [&](const TabletColumn& from_column, TabletColumn* to_column) {
1471
12.9k
                to_column->set_name(parent_column.name_lower_case() + "." + path);
1472
12.9k
                to_column->set_type(from_column.type());
1473
12.9k
                to_column->set_parent_unique_id(parent_column.unique_id());
1474
12.9k
                bool is_typed = !parent_column.variant_enable_typed_paths_to_sparse();
1475
12.9k
                to_column->set_path_info(
1476
12.9k
                        PathInData(parent_column.name_lower_case() + "." + path, is_typed));
1477
12.9k
                to_column->set_aggregation_method(parent_column.aggregation());
1478
12.9k
                to_column->set_is_nullable(true);
1479
12.9k
                to_column->set_parent_unique_id(parent_column.unique_id());
1480
12.9k
                if (from_column.is_decimal()) {
1481
12.8k
                    to_column->set_precision(from_column.precision());
1482
12.8k
                }
1483
12.9k
                to_column->set_frac(from_column.frac());
1484
1485
12.9k
                if (from_column.is_array_type()) {
1486
2.98k
                    TabletColumn nested_column;
1487
2.98k
                    generate_result_column(*from_column.get_sub_columns()[0], &nested_column);
1488
2.98k
                    to_column->add_sub_column(nested_column);
1489
2.98k
                }
1490
12.9k
            };
1491
1492
149k
    auto generate_index = [&](const std::string& pattern) {
1493
        // 1. find subcolumn's index
1494
9.92k
        if (const auto& indexes = schema.inverted_index_by_field_pattern(col_unique_id, pattern);
1495
9.92k
            !indexes.empty()) {
1496
3.93k
            for (const auto& index : indexes) {
1497
3.93k
                auto index_ptr = std::make_shared<TabletIndex>(*index);
1498
3.93k
                index_ptr->set_escaped_escaped_index_suffix_path(
1499
3.93k
                        sub_column_info->column.path_info_ptr()->get_path());
1500
3.93k
                sub_column_info->indexes.emplace_back(std::move(index_ptr));
1501
3.93k
            }
1502
3.88k
        }
1503
        // 2. find parent column's index
1504
6.04k
        else if (const auto parent_index = schema.inverted_indexs(col_unique_id);
1505
6.04k
                 !parent_index.empty()) {
1506
324
            inherit_index(parent_index, sub_column_info->indexes, sub_column_info->column);
1507
5.72k
        } else {
1508
5.72k
            sub_column_info->indexes.clear();
1509
5.72k
        }
1510
9.92k
    };
1511
1512
149k
    const auto& sub_columns = parent_column.get_sub_columns();
1513
160k
    for (const auto& sub_column : sub_columns) {
1514
160k
        const char* pattern = sub_column->name().c_str();
1515
160k
        switch (sub_column->pattern_type()) {
1516
5.01k
        case PatternTypePB::MATCH_NAME: {
1517
5.01k
            if (strcmp(pattern, path.c_str()) == 0) {
1518
1.52k
                generate_result_column(*sub_column, &sub_column_info->column);
1519
1.52k
                generate_index(sub_column->name());
1520
1.52k
                return true;
1521
1.52k
            }
1522
3.49k
            break;
1523
5.01k
        }
1524
155k
        case PatternTypePB::MATCH_NAME_GLOB: {
1525
155k
            if (glob_match_re2(pattern, path)) {
1526
8.40k
                generate_result_column(*sub_column, &sub_column_info->column);
1527
8.40k
                generate_index(sub_column->name());
1528
8.40k
                return true;
1529
8.40k
            }
1530
147k
            break;
1531
155k
        }
1532
147k
        default:
1533
0
            break;
1534
160k
        }
1535
160k
    }
1536
139k
    return false;
1537
149k
}
1538
1539
TabletSchemaSPtr VariantCompactionUtil::calculate_variant_extended_schema(
1540
1.41k
        const std::vector<RowsetSharedPtr>& rowsets, const TabletSchemaSPtr& base_schema) {
1541
1.41k
    if (rowsets.empty()) {
1542
0
        return nullptr;
1543
0
    }
1544
1545
1.41k
    std::vector<TabletSchemaSPtr> schemas;
1546
3.34k
    for (const auto& rs : rowsets) {
1547
3.34k
        if (rs->num_segments() == 0) {
1548
3.16k
            continue;
1549
3.16k
        }
1550
187
        const auto& tablet_schema = rs->tablet_schema();
1551
187
        SegmentCacheHandle segment_cache;
1552
187
        auto st = SegmentLoader::instance()->load_segments(std::static_pointer_cast<BetaRowset>(rs),
1553
187
                                                           &segment_cache);
1554
187
        if (!st.ok()) {
1555
0
            return base_schema;
1556
0
        }
1557
187
        for (const auto& segment : segment_cache.get_segments()) {
1558
187
            TabletSchemaSPtr schema = tablet_schema->copy_without_variant_extracted_columns();
1559
380
            for (const auto& column : tablet_schema->columns()) {
1560
380
                if (!column->is_variant_type()) {
1561
187
                    continue;
1562
187
                }
1563
193
                std::shared_ptr<ColumnReader> column_reader;
1564
193
                OlapReaderStatistics stats;
1565
193
                st = segment->get_column_reader(column->unique_id(), &column_reader, &stats);
1566
193
                if (!st.ok()) {
1567
0
                    LOG(WARNING) << "Failed to get column reader for column: " << column->name()
1568
0
                                 << " error: " << st.to_string();
1569
0
                    continue;
1570
0
                }
1571
193
                if (!column_reader) {
1572
0
                    continue;
1573
0
                }
1574
1575
193
                CHECK(column_reader->get_meta_type() == FieldType::OLAP_FIELD_TYPE_VARIANT);
1576
193
                auto* variant_column_reader =
1577
193
                        assert_cast<segment_v2::VariantColumnReader*>(column_reader.get());
1578
                // load external meta before getting subcolumn meta info
1579
193
                st = variant_column_reader->load_external_meta_once();
1580
193
                if (!st.ok()) {
1581
0
                    LOG(WARNING) << "Failed to load external meta for column: " << column->name()
1582
0
                                 << " error: " << st.to_string();
1583
0
                    continue;
1584
0
                }
1585
193
                const auto* subcolumn_meta_info = variant_column_reader->get_subcolumns_meta_info();
1586
601
                for (const auto& entry : *subcolumn_meta_info) {
1587
601
                    if (entry->path.empty()) {
1588
193
                        continue;
1589
193
                    }
1590
408
                    const std::string& column_name =
1591
408
                            column->name_lower_case() + "." + entry->path.get_path();
1592
408
                    const DataTypePtr& data_type = entry->data.file_column_type;
1593
408
                    PathInDataBuilder full_path_builder;
1594
408
                    auto full_path = full_path_builder.append(column->name_lower_case(), false)
1595
408
                                             .append(entry->path.get_parts(), false)
1596
408
                                             .build();
1597
408
                    TabletColumn subcolumn =
1598
408
                            get_column_by_type(data_type, column_name,
1599
408
                                               ExtraInfo {.unique_id = -1,
1600
408
                                                          .parent_unique_id = column->unique_id(),
1601
408
                                                          .path_info = full_path});
1602
408
                    schema->append_column(subcolumn);
1603
408
                }
1604
193
            }
1605
187
            schemas.emplace_back(schema);
1606
187
        }
1607
187
    }
1608
1.41k
    TabletSchemaSPtr least_common_schema;
1609
1.41k
    auto st = get_least_common_schema(schemas, base_schema, least_common_schema, false);
1610
1.41k
    if (!st.ok()) {
1611
0
        return base_schema;
1612
0
    }
1613
1.41k
    return least_common_schema;
1614
1.41k
}
1615
1616
bool inherit_index(const std::vector<const TabletIndex*>& parent_indexes,
1617
                   TabletIndexes& subcolumns_indexes, FieldType column_type,
1618
86.4k
                   const std::string& suffix_path, bool is_array_nested_type) {
1619
86.4k
    if (parent_indexes.empty()) {
1620
76.3k
        return false;
1621
76.3k
    }
1622
10.1k
    subcolumns_indexes.clear();
1623
    // bkd index or array index only need to inherit one index
1624
10.1k
    if (field_is_numeric_type(column_type) ||
1625
10.1k
        (is_array_nested_type &&
1626
7.43k
         (field_is_numeric_type(column_type) || field_is_slice_type(column_type)))) {
1627
2.69k
        auto index_ptr = std::make_shared<TabletIndex>(*parent_indexes[0]);
1628
2.69k
        index_ptr->set_escaped_escaped_index_suffix_path(suffix_path);
1629
        // no need parse for bkd index or array index
1630
2.69k
        index_ptr->remove_parser_and_analyzer();
1631
2.69k
        subcolumns_indexes.emplace_back(std::move(index_ptr));
1632
2.69k
        return true;
1633
2.69k
    }
1634
    // string type need to inherit all indexes
1635
7.42k
    else if (field_is_slice_type(column_type) && !is_array_nested_type) {
1636
7.38k
        for (const auto& index : parent_indexes) {
1637
7.38k
            auto index_ptr = std::make_shared<TabletIndex>(*index);
1638
7.38k
            index_ptr->set_escaped_escaped_index_suffix_path(suffix_path);
1639
7.38k
            subcolumns_indexes.emplace_back(std::move(index_ptr));
1640
7.38k
        }
1641
7.35k
        return true;
1642
7.35k
    }
1643
65
    return false;
1644
10.1k
}
1645
1646
bool inherit_index(const std::vector<const TabletIndex*>& parent_indexes,
1647
86.4k
                   TabletIndexes& subcolumns_indexes, const TabletColumn& column) {
1648
86.4k
    if (!column.is_extracted_column()) {
1649
3
        return false;
1650
3
    }
1651
86.4k
    if (column.is_array_type()) {
1652
3.01k
        if (column.get_sub_columns().empty()) {
1653
0
            return false;
1654
0
        }
1655
3.01k
        const TabletColumn* nested = column.get_sub_columns()[0].get();
1656
3.01k
        while (nested != nullptr && nested->is_array_type()) {
1657
0
            if (nested->get_sub_columns().empty()) {
1658
0
                return false;
1659
0
            }
1660
0
            nested = nested->get_sub_columns()[0].get();
1661
0
        }
1662
3.01k
        if (nested == nullptr) {
1663
0
            return false;
1664
0
        }
1665
3.01k
        return inherit_index(parent_indexes, subcolumns_indexes, nested->type(),
1666
3.01k
                             column.path_info_ptr()->get_path(), true);
1667
3.01k
    }
1668
83.4k
    return inherit_index(parent_indexes, subcolumns_indexes, column.type(),
1669
83.4k
                         column.path_info_ptr()->get_path());
1670
86.4k
}
1671
1672
bool inherit_index(const std::vector<const TabletIndex*>& parent_indexes,
1673
0
                   TabletIndexes& subcolumns_indexes, const ColumnMetaPB& column_pb) {
1674
0
    if (!column_pb.has_column_path_info()) {
1675
0
        return false;
1676
0
    }
1677
0
    if (column_pb.type() == (int)FieldType::OLAP_FIELD_TYPE_ARRAY) {
1678
0
        if (column_pb.children_columns_size() == 0) {
1679
0
            return false;
1680
0
        }
1681
0
        const ColumnMetaPB* nested = &column_pb.children_columns(0);
1682
0
        while (nested != nullptr && nested->type() == (int)FieldType::OLAP_FIELD_TYPE_ARRAY) {
1683
0
            if (nested->children_columns_size() == 0) {
1684
0
                return false;
1685
0
            }
1686
0
            nested = &nested->children_columns(0);
1687
0
        }
1688
0
        if (nested == nullptr) {
1689
0
            return false;
1690
0
        }
1691
0
        return inherit_index(parent_indexes, subcolumns_indexes, (FieldType)nested->type(),
1692
0
                             column_pb.column_path_info().path(), true);
1693
0
    }
1694
0
    return inherit_index(parent_indexes, subcolumns_indexes, (FieldType)column_pb.type(),
1695
0
                         column_pb.column_path_info().path());
1696
0
}
1697
1698
// ============ Implementation from parse2column.cpp ============
1699
1700
/** Pool for objects that cannot be used from different threads simultaneously.
1701
  * Allows to create an object for each thread.
1702
  * Pool has unbounded size and objects are not destroyed before destruction of pool.
1703
  *
1704
  * Use it in cases when thread local storage is not appropriate
1705
  *  (when maximum number of simultaneously used objects is less
1706
  *   than number of running/sleeping threads, that has ever used object,
1707
  *   and creation/destruction of objects is expensive).
1708
  */
1709
template <typename T>
1710
class SimpleObjectPool {
1711
protected:
1712
    /// Hold all available objects in stack.
1713
    std::mutex mutex;
1714
    std::stack<std::unique_ptr<T>> stack;
1715
    /// Specialized deleter for std::unique_ptr.
1716
    /// Returns underlying pointer back to stack thus reclaiming its ownership.
1717
    struct Deleter {
1718
        SimpleObjectPool<T>* parent;
1719
16.9k
        Deleter(SimpleObjectPool<T>* parent_ = nullptr) : parent {parent_} {} /// NOLINT
1720
16.9k
        void operator()(T* owning_ptr) const {
1721
16.9k
            std::lock_guard lock {parent->mutex};
1722
16.9k
            parent->stack.emplace(owning_ptr);
1723
16.9k
        }
1724
    };
1725
1726
public:
1727
    using Pointer = std::unique_ptr<T, Deleter>;
1728
    /// Extracts and returns a pointer from the stack if it's not empty,
1729
    ///  creates a new one by calling provided f() otherwise.
1730
    template <typename Factory>
1731
16.8k
    Pointer get(Factory&& f) {
1732
16.8k
        std::unique_lock lock(mutex);
1733
16.8k
        if (stack.empty()) {
1734
19
            return {f(), this};
1735
19
        }
1736
16.8k
        auto object = stack.top().release();
1737
16.8k
        stack.pop();
1738
16.8k
        return std::unique_ptr<T, Deleter>(object, Deleter(this));
1739
16.8k
    }
variant_util.cpp:_ZN5doris12variant_util16SimpleObjectPoolINS_14JSONDataParserINS_14SimdJSONParserEEEE3getIZNS0_21parse_json_to_variantERNS_7IColumnERKNS_9StringRefEPS4_RKNS_11ParseConfigEE3$_0EESt10unique_ptrIS4_NS5_7DeleterEEOT_
Line
Count
Source
1731
12.4k
    Pointer get(Factory&& f) {
1732
12.4k
        std::unique_lock lock(mutex);
1733
12.4k
        if (stack.empty()) {
1734
1
            return {f(), this};
1735
1
        }
1736
12.4k
        auto object = stack.top().release();
1737
12.4k
        stack.pop();
1738
12.4k
        return std::unique_ptr<T, Deleter>(object, Deleter(this));
1739
12.4k
    }
variant_util.cpp:_ZN5doris12variant_util16SimpleObjectPoolINS_14JSONDataParserINS_14SimdJSONParserEEEE3getIZNS0_21parse_json_to_variantERNS_7IColumnERKNS_9ColumnStrIjEERKNS_11ParseConfigEE3$_0EESt10unique_ptrIS4_NS5_7DeleterEEOT_
Line
Count
Source
1731
4.45k
    Pointer get(Factory&& f) {
1732
4.45k
        std::unique_lock lock(mutex);
1733
4.45k
        if (stack.empty()) {
1734
18
            return {f(), this};
1735
18
        }
1736
4.43k
        auto object = stack.top().release();
1737
4.43k
        stack.pop();
1738
4.43k
        return std::unique_ptr<T, Deleter>(object, Deleter(this));
1739
4.45k
    }
1740
    /// Like get(), but creates object using default constructor.
1741
    Pointer getDefault() {
1742
        return get([] { return new T; });
1743
    }
1744
};
1745
1746
SimpleObjectPool<JsonParser> parsers_pool;
1747
1748
using Node = typename ColumnVariant::Subcolumns::Node;
1749
1750
8.67M
static inline void append_binary_bytes(ColumnString::Chars& chars, const void* data, size_t size) {
1751
8.67M
    const auto old_size = chars.size();
1752
8.67M
    chars.resize(old_size + size);
1753
8.67M
    memcpy(chars.data() + old_size, reinterpret_cast<const char*>(data), size);
1754
8.67M
}
1755
1756
3.32M
static inline void append_binary_type(ColumnString::Chars& chars, FieldType type) {
1757
3.32M
    const uint8_t t = static_cast<uint8_t>(type);
1758
3.32M
    append_binary_bytes(chars, &t, sizeof(uint8_t));
1759
3.32M
}
1760
1761
2.37M
static inline void append_binary_sizet(ColumnString::Chars& chars, size_t v) {
1762
2.37M
    append_binary_bytes(chars, &v, sizeof(size_t));
1763
2.37M
}
1764
1765
3.32M
static void append_field_to_binary_chars(const Field& field, ColumnString::Chars& chars) {
1766
3.32M
    switch (field.get_type()) {
1767
0
    case PrimitiveType::TYPE_NULL: {
1768
0
        append_binary_type(chars, FieldType::OLAP_FIELD_TYPE_NONE);
1769
0
        return;
1770
0
    }
1771
182k
    case PrimitiveType::TYPE_BOOLEAN: {
1772
182k
        append_binary_type(chars,
1773
182k
                           TabletColumn::get_field_type_by_type(PrimitiveType::TYPE_BOOLEAN));
1774
182k
        const auto v = static_cast<UInt8>(field.get<PrimitiveType::TYPE_BOOLEAN>());
1775
182k
        append_binary_bytes(chars, &v, sizeof(UInt8));
1776
182k
        return;
1777
0
    }
1778
718k
    case PrimitiveType::TYPE_BIGINT: {
1779
718k
        append_binary_type(chars, TabletColumn::get_field_type_by_type(PrimitiveType::TYPE_BIGINT));
1780
718k
        const auto v = field.get<PrimitiveType::TYPE_BIGINT>();
1781
718k
        append_binary_bytes(chars, &v, sizeof(Int64));
1782
718k
        return;
1783
0
    }
1784
3
    case PrimitiveType::TYPE_LARGEINT: {
1785
3
        append_binary_type(chars,
1786
3
                           TabletColumn::get_field_type_by_type(PrimitiveType::TYPE_LARGEINT));
1787
3
        const auto v = field.get<PrimitiveType::TYPE_LARGEINT>();
1788
3
        append_binary_bytes(chars, &v, sizeof(int128_t));
1789
3
        return;
1790
0
    }
1791
80.7k
    case PrimitiveType::TYPE_DOUBLE: {
1792
80.7k
        append_binary_type(chars, TabletColumn::get_field_type_by_type(PrimitiveType::TYPE_DOUBLE));
1793
80.7k
        const auto v = field.get<PrimitiveType::TYPE_DOUBLE>();
1794
80.7k
        append_binary_bytes(chars, &v, sizeof(Float64));
1795
80.7k
        return;
1796
0
    }
1797
2.14M
    case PrimitiveType::TYPE_STRING: {
1798
2.14M
        append_binary_type(chars, FieldType::OLAP_FIELD_TYPE_STRING);
1799
2.14M
        const auto& v = field.get<PrimitiveType::TYPE_STRING>();
1800
2.14M
        append_binary_sizet(chars, v.size());
1801
2.14M
        append_binary_bytes(chars, v.data(), v.size());
1802
2.14M
        return;
1803
0
    }
1804
34.1k
    case PrimitiveType::TYPE_JSONB: {
1805
34.1k
        append_binary_type(chars, FieldType::OLAP_FIELD_TYPE_JSONB);
1806
34.1k
        const auto& v = field.get<PrimitiveType::TYPE_JSONB>();
1807
34.1k
        append_binary_sizet(chars, v.get_size());
1808
34.1k
        append_binary_bytes(chars, v.get_value(), v.get_size());
1809
34.1k
        return;
1810
0
    }
1811
187k
    case PrimitiveType::TYPE_ARRAY: {
1812
187k
        append_binary_type(chars, FieldType::OLAP_FIELD_TYPE_ARRAY);
1813
187k
        const auto& a = field.get<PrimitiveType::TYPE_ARRAY>();
1814
187k
        append_binary_sizet(chars, a.size());
1815
252k
        for (const auto& elem : a) {
1816
252k
            append_field_to_binary_chars(elem, chars);
1817
252k
        }
1818
187k
        return;
1819
0
    }
1820
0
    default:
1821
0
        throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Unsupported field type {}",
1822
0
                               field.get_type());
1823
3.32M
    }
1824
3.32M
}
1825
/// Visitor that keeps @num_dimensions_to_keep dimensions in arrays
1826
/// and replaces all scalars or nested arrays to @replacement at that level.
1827
class FieldVisitorReplaceScalars : public StaticVisitor<Field> {
1828
public:
1829
    FieldVisitorReplaceScalars(const Field& replacement_, size_t num_dimensions_to_keep_)
1830
0
            : replacement(replacement_), num_dimensions_to_keep(num_dimensions_to_keep_) {}
1831
    template <PrimitiveType T>
1832
    Field operator()(const typename PrimitiveTypeTraits<T>::CppType& x) const {
1833
        if constexpr (T == TYPE_ARRAY) {
1834
            if (num_dimensions_to_keep == 0) {
1835
                return replacement;
1836
            }
1837
            const size_t size = x.size();
1838
            Array res(size);
1839
            for (size_t i = 0; i < size; ++i) {
1840
                res[i] = apply_visitor(
1841
                        FieldVisitorReplaceScalars(replacement, num_dimensions_to_keep - 1), x[i]);
1842
            }
1843
            return Field::create_field<TYPE_ARRAY>(res);
1844
        } else {
1845
            return replacement;
1846
        }
1847
    }
1848
1849
private:
1850
    const Field& replacement;
1851
    size_t num_dimensions_to_keep;
1852
};
1853
1854
template <typename ParserImpl>
1855
void parse_json_to_variant_impl(IColumn& column, const char* src, size_t length,
1856
1.35M
                                JSONDataParser<ParserImpl>* parser, const ParseConfig& config) {
1857
1.35M
    auto& column_variant = assert_cast<ColumnVariant&>(column);
1858
1.35M
    std::optional<ParseResult> result;
1859
    /// Treat empty string as an empty object
1860
    /// for better CAST from String to Object.
1861
1.35M
    if (length > 0) {
1862
1.34M
        result = parser->parse(src, length, config);
1863
1.34M
    } else {
1864
4.79k
        result = ParseResult {};
1865
4.79k
    }
1866
1.35M
    if (!result) {
1867
658
        VLOG_DEBUG << "failed to parse " << std::string_view(src, length) << ", length= " << length;
1868
658
        if (config::variant_throw_exeception_on_invalid_json) {
1869
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Failed to parse object {}",
1870
0
                                   std::string_view(src, length));
1871
0
        }
1872
        // Treat as string
1873
658
        PathInData root_path;
1874
658
        Field field = Field::create_field<TYPE_STRING>(String(src, length));
1875
658
        result = ParseResult {{root_path}, {field}};
1876
658
    }
1877
1.35M
    auto& [paths, values] = *result;
1878
1.35M
    assert(paths.size() == values.size());
1879
1.35M
    size_t old_num_rows = column_variant.rows();
1880
1.35M
    if (config.deprecated_enable_flatten_nested) {
1881
        // here we should check the paths in variant and paths in result,
1882
        // if two paths which same prefix have different structure, we should throw an exception
1883
8.39k
        std::vector<PathInData> check_paths;
1884
59.6k
        for (const auto& entry : column_variant.get_subcolumns()) {
1885
59.6k
            check_paths.push_back(entry->path);
1886
59.6k
        }
1887
8.39k
        check_paths.insert(check_paths.end(), paths.begin(), paths.end());
1888
8.39k
        THROW_IF_ERROR(check_variant_has_no_ambiguous_paths(check_paths));
1889
8.39k
    }
1890
1.35M
    auto [doc_value_data_paths, doc_value_data_values] =
1891
1.35M
            column_variant.get_doc_value_data_paths_and_values();
1892
1.35M
    auto& doc_value_data_offsets = column_variant.serialized_doc_value_column_offsets();
1893
1894
15.6M
    auto flush_defaults = [](ColumnVariant::Subcolumn* subcolumn) {
1895
15.6M
        const auto num_defaults = subcolumn->cur_num_of_defaults();
1896
15.6M
        if (num_defaults > 0) {
1897
1.54M
            subcolumn->insert_many_defaults(num_defaults);
1898
1.54M
            subcolumn->reset_current_num_of_defaults();
1899
1.54M
        }
1900
15.6M
    };
1901
1902
1.35M
    auto get_or_create_subcolumn = [&](const PathInData& path, size_t index_hint,
1903
15.7M
                                       const FieldInfo& field_info) -> ColumnVariant::Subcolumn* {
1904
15.7M
        if (column_variant.get_subcolumn(path, index_hint) == nullptr) {
1905
24.4k
            if (path.has_nested_part()) {
1906
396
                column_variant.add_nested_subcolumn(path, field_info, old_num_rows);
1907
24.0k
            } else {
1908
24.0k
                column_variant.add_sub_column(path, old_num_rows);
1909
24.0k
            }
1910
24.4k
        }
1911
15.7M
        auto* subcolumn = column_variant.get_subcolumn(path, index_hint);
1912
15.7M
        if (!subcolumn) {
1913
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Failed to find sub column {}",
1914
0
                                   path.get_path());
1915
0
        }
1916
15.7M
        return subcolumn;
1917
15.7M
    };
1918
1919
1.35M
    auto insert_into_subcolumn = [&](size_t i,
1920
15.5M
                                     bool check_size_mismatch) -> ColumnVariant::Subcolumn* {
1921
15.5M
        FieldInfo field_info;
1922
15.5M
        get_field_info(values[i], &field_info);
1923
15.5M
        if (field_info.scalar_type_id == PrimitiveType::INVALID_TYPE) {
1924
27.3k
            return nullptr;
1925
27.3k
        }
1926
15.5M
        auto* subcolumn = get_or_create_subcolumn(paths[i], i, field_info);
1927
15.5M
        flush_defaults(subcolumn);
1928
15.6M
        if (check_size_mismatch && subcolumn->size() != old_num_rows) {
1929
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
1930
0
                                   "subcolumn {} size missmatched, may contains duplicated entry",
1931
0
                                   paths[i].get_path());
1932
0
        }
1933
15.5M
        subcolumn->insert(std::move(values[i]), std::move(field_info));
1934
15.5M
        return subcolumn;
1935
15.5M
    };
1936
1937
1.35M
    switch (config.parse_to) {
1938
1.17M
    case ParseConfig::ParseTo::OnlySubcolumns:
1939
16.4M
        for (size_t i = 0; i < paths.size(); ++i) {
1940
15.2M
            insert_into_subcolumn(i, true);
1941
15.2M
        }
1942
1.17M
        break;
1943
175k
    case ParseConfig::ParseTo::OnlyDocValueColumn: {
1944
175k
        CHECK(column_variant.enable_doc_mode()) << "OnlyDocValueColumn requires doc mode enabled";
1945
175k
        std::vector<size_t> doc_item_indexes;
1946
175k
        doc_item_indexes.reserve(paths.size());
1947
175k
        phmap::flat_hash_set<StringRef, StringRefHash> seen_paths;
1948
175k
        seen_paths.reserve(paths.size());
1949
1950
3.37M
        for (size_t i = 0; i < paths.size(); ++i) {
1951
3.20M
            FieldInfo field_info;
1952
3.20M
            get_field_info(values[i], &field_info);
1953
3.20M
            if (paths[i].empty()) {
1954
708
                auto* subcolumn = column_variant.get_subcolumn(paths[i]);
1955
708
                DCHECK(subcolumn != nullptr);
1956
708
                flush_defaults(subcolumn);
1957
708
                subcolumn->insert(std::move(values[i]), std::move(field_info));
1958
708
                continue;
1959
708
            }
1960
3.20M
            if (field_info.scalar_type_id == PrimitiveType::INVALID_TYPE ||
1961
3.20M
                values[i].get_type() == PrimitiveType::TYPE_NULL) {
1962
89.4k
                continue;
1963
89.4k
            }
1964
3.11M
            const auto& path_str = paths[i].get_path();
1965
3.11M
            StringRef path_ref {path_str.data(), path_str.size()};
1966
3.11M
            if (UNLIKELY(!seen_paths.emplace(path_ref).second)) {
1967
2
                throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
1968
2
                                       "may contains duplicated entry : {}",
1969
2
                                       std::string_view(path_str));
1970
2
            }
1971
3.11M
            doc_item_indexes.push_back(i);
1972
3.11M
        }
1973
1974
175k
        std::sort(doc_item_indexes.begin(), doc_item_indexes.end(),
1975
20.8M
                  [&](size_t l, size_t r) { return paths[l].get_path() < paths[r].get_path(); });
1976
3.02M
        for (const auto idx : doc_item_indexes) {
1977
3.02M
            const auto& path_str = paths[idx].get_path();
1978
3.02M
            doc_value_data_paths->insert_data(path_str.data(), path_str.size());
1979
3.02M
            auto& chars = doc_value_data_values->get_chars();
1980
3.02M
            append_field_to_binary_chars(values[idx], chars);
1981
3.02M
            doc_value_data_values->get_offsets().push_back(chars.size());
1982
3.02M
        }
1983
175k
    } break;
1984
1.35M
    }
1985
1.35M
    doc_value_data_offsets.push_back(doc_value_data_paths->size());
1986
    // /// Insert default values to missed subcolumns.
1987
1.35M
    const auto& subcolumns = column_variant.get_subcolumns();
1988
30.3M
    for (const auto& entry : subcolumns) {
1989
30.3M
        if (entry->data.size() == old_num_rows) {
1990
            // Handle nested paths differently from simple paths
1991
14.4M
            if (entry->path.has_nested_part()) {
1992
                // Try to insert default from nested, if failed, insert regular default
1993
0
                bool success = UNLIKELY(column_variant.try_insert_default_from_nested(entry));
1994
0
                if (!success) {
1995
0
                    entry->data.insert_default();
1996
0
                }
1997
14.4M
            } else {
1998
                // For non-nested paths, increment default counter
1999
14.4M
                entry->data.increment_default_counter();
2000
14.4M
            }
2001
14.4M
        }
2002
30.3M
    }
2003
1.35M
    column_variant.incr_num_rows();
2004
1.35M
    auto sparse_column = column_variant.get_sparse_column();
2005
1.35M
    if (sparse_column->size() == old_num_rows) {
2006
1.34M
        sparse_column->assume_mutable()->insert_default();
2007
1.34M
    }
2008
1.35M
#ifndef NDEBUG
2009
1.35M
    column_variant.check_consistency();
2010
1.35M
#endif
2011
1.35M
}
2012
2013
// exposed interfaces
2014
void parse_json_to_variant(IColumn& column, const StringRef& json, JsonParser* parser,
2015
12.4k
                           const ParseConfig& config) {
2016
12.4k
    if (parser) {
2017
0
        return parse_json_to_variant_impl(column, json.data, json.size, parser, config);
2018
12.4k
    } else {
2019
12.4k
        auto pool_parser = parsers_pool.get([] { return new JsonParser(); });
2020
12.4k
        return parse_json_to_variant_impl(column, json.data, json.size, pool_parser.get(), config);
2021
12.4k
    }
2022
12.4k
}
2023
2024
void parse_json_to_variant(IColumn& column, const ColumnString& raw_json_column,
2025
4.45k
                           const ParseConfig& config) {
2026
4.45k
    auto parser = parsers_pool.get([] { return new JsonParser(); });
2027
1.34M
    for (size_t i = 0; i < raw_json_column.size(); ++i) {
2028
1.34M
        StringRef raw_json = raw_json_column.get_data_at(i);
2029
1.34M
        parse_json_to_variant_impl(column, raw_json.data, raw_json.size, parser.get(), config);
2030
1.34M
    }
2031
4.45k
    column.finalize();
2032
4.45k
}
2033
2034
// parse the doc snapshot column to subcolumns
2035
0
void materialize_docs_to_subcolumns(ColumnVariant& column_variant) {
2036
0
    auto subcolumns = materialize_docs_to_subcolumns_map(column_variant);
2037
2038
0
    for (auto& entry : subcolumns) {
2039
0
        entry.second.finalize();
2040
0
        if (!column_variant.add_sub_column(PathInData(entry.first),
2041
0
                                           IColumn::mutate(entry.second.get_finalized_column_ptr()),
2042
0
                                           entry.second.get_least_common_type())) {
2043
0
            throw doris::Exception(ErrorCode::INTERNAL_ERROR,
2044
0
                                   "Failed to add subcolumn {}, which is from doc snapshot column",
2045
0
                                   entry.first);
2046
0
        }
2047
0
    }
2048
2049
0
    column_variant.finalize();
2050
0
}
2051
2052
// ============ Implementation from variant_util.cpp ============
2053
2054
phmap::flat_hash_map<std::string_view, ColumnVariant::Subcolumn> materialize_docs_to_subcolumns_map(
2055
5
        const ColumnVariant& variant, size_t expected_unique_paths) {
2056
5
    constexpr size_t kInitialPathReserve = 8192;
2057
5
    phmap::flat_hash_map<std::string_view, ColumnVariant::Subcolumn> subcolumns;
2058
2059
5
    const auto [column_key, column_value] = variant.get_doc_value_data_paths_and_values();
2060
5
    const auto& column_offsets = variant.serialized_doc_value_column_offsets();
2061
5
    const size_t num_rows = column_offsets.size();
2062
2063
5
    DCHECK_EQ(num_rows, variant.size()) << "doc snapshot offsets size mismatch with variant rows";
2064
2065
5
    subcolumns.reserve(expected_unique_paths != 0
2066
5
                               ? expected_unique_paths
2067
5
                               : std::min<size_t>(column_key->size(), kInitialPathReserve));
2068
2069
18
    for (size_t row = 0; row < num_rows; ++row) {
2070
13
        const size_t start = column_offsets[row - 1];
2071
13
        const size_t end = column_offsets[row];
2072
42
        for (size_t i = start; i < end; ++i) {
2073
29
            const auto& key = column_key->get_data_at(i);
2074
29
            const std::string_view path_sv(key.data, key.size);
2075
2076
29
            auto [it, inserted] =
2077
29
                    subcolumns.try_emplace(path_sv, ColumnVariant::Subcolumn {0, true, false});
2078
29
            auto& subcolumn = it->second;
2079
29
            if (inserted) {
2080
16
                subcolumn.insert_many_defaults(row);
2081
16
            } else if (subcolumn.size() != row) {
2082
3
                subcolumn.insert_many_defaults(row - subcolumn.size());
2083
3
            }
2084
29
            subcolumn.deserialize_from_binary_column(column_value, i);
2085
29
        }
2086
13
    }
2087
2088
16
    for (auto& [path, subcolumn] : subcolumns) {
2089
16
        if (subcolumn.size() != num_rows) {
2090
6
            subcolumn.insert_many_defaults(num_rows - subcolumn.size());
2091
6
        }
2092
16
    }
2093
2094
5
    return subcolumns;
2095
5
}
2096
2097
Status _parse_and_materialize_variant_columns(Block& block,
2098
                                              const std::vector<uint32_t>& variant_pos,
2099
4.54k
                                              const std::vector<ParseConfig>& configs) {
2100
9.97k
    for (size_t i = 0; i < variant_pos.size(); ++i) {
2101
5.43k
        auto column_ref = block.get_by_position(variant_pos[i]).column;
2102
5.43k
        bool is_nullable = column_ref->is_nullable();
2103
5.43k
        MutableColumnPtr var_column = column_ref->assume_mutable();
2104
5.43k
        if (is_nullable) {
2105
4.95k
            const auto& nullable = assert_cast<const ColumnNullable&>(*column_ref);
2106
4.95k
            var_column = nullable.get_nested_column_ptr()->assume_mutable();
2107
4.95k
        }
2108
5.43k
        auto& var = assert_cast<ColumnVariant&>(*var_column);
2109
5.43k
        var_column->finalize();
2110
2111
5.43k
        MutableColumnPtr variant_column;
2112
5.43k
        if (!var.is_scalar_variant()) {
2113
            // already parsed
2114
1.16k
            continue;
2115
1.16k
        }
2116
2117
4.27k
        VLOG_DEBUG << "parse scalar variant column: " << var.get_root_type()->get_name();
2118
4.27k
        ColumnPtr scalar_root_column;
2119
4.27k
        if (var.get_root_type()->get_primitive_type() == TYPE_JSONB) {
2120
            // TODO more efficient way to parse jsonb type, currently we just convert jsonb to
2121
            // json str and parse them into variant
2122
28
            RETURN_IF_ERROR(cast_column({var.get_root(), var.get_root_type(), ""},
2123
28
                                        var.get_root()->is_nullable()
2124
28
                                                ? make_nullable(std::make_shared<DataTypeString>())
2125
28
                                                : std::make_shared<DataTypeString>(),
2126
28
                                        &scalar_root_column));
2127
28
            if (scalar_root_column->is_nullable()) {
2128
28
                scalar_root_column = assert_cast<const ColumnNullable*>(scalar_root_column.get())
2129
28
                                             ->get_nested_column_ptr();
2130
28
            }
2131
4.24k
        } else {
2132
4.24k
            const auto& root = *var.get_root();
2133
4.24k
            scalar_root_column =
2134
4.24k
                    root.is_nullable()
2135
4.24k
                            ? assert_cast<const ColumnNullable&>(root).get_nested_column_ptr()
2136
4.24k
                            : var.get_root();
2137
4.24k
        }
2138
2139
4.27k
        if (scalar_root_column->is_column_string()) {
2140
4.23k
            variant_column = ColumnVariant::create(0, var.enable_doc_mode());
2141
4.23k
            parse_json_to_variant(*variant_column.get(),
2142
4.23k
                                  assert_cast<const ColumnString&>(*scalar_root_column),
2143
4.23k
                                  configs[i]);
2144
4.23k
        } else {
2145
            // Root maybe other types rather than string like ColumnVariant(Int32).
2146
            // In this case, we should finlize the root and cast to JSON type
2147
31
            auto expected_root_type =
2148
31
                    make_nullable(std::make_shared<ColumnVariant::MostCommonType>());
2149
31
            var.ensure_root_node_type(expected_root_type);
2150
31
            variant_column = var.assume_mutable();
2151
31
        }
2152
2153
        // Wrap variant with nullmap if it is nullable
2154
4.27k
        ColumnPtr result = variant_column->get_ptr();
2155
4.27k
        if (is_nullable) {
2156
4.06k
            const auto& null_map =
2157
4.06k
                    assert_cast<const ColumnNullable&>(*column_ref).get_null_map_column_ptr();
2158
4.06k
            result = ColumnNullable::create(result, null_map);
2159
4.06k
        }
2160
4.27k
        block.get_by_position(variant_pos[i]).column = result;
2161
4.27k
    }
2162
4.54k
    return Status::OK();
2163
4.54k
}
2164
2165
Status parse_and_materialize_variant_columns(Block& block, const std::vector<uint32_t>& variant_pos,
2166
4.54k
                                             const std::vector<ParseConfig>& configs) {
2167
4.54k
    RETURN_IF_CATCH_EXCEPTION(
2168
4.54k
            { return _parse_and_materialize_variant_columns(block, variant_pos, configs); });
2169
4.54k
}
2170
2171
Status parse_and_materialize_variant_columns(Block& block, const TabletSchema& tablet_schema,
2172
4.60k
                                             const std::vector<uint32_t>& column_pos) {
2173
4.60k
    std::vector<uint32_t> variant_column_pos;
2174
4.60k
    std::vector<uint32_t> variant_schema_pos;
2175
4.60k
    variant_column_pos.reserve(column_pos.size());
2176
4.60k
    variant_schema_pos.reserve(column_pos.size());
2177
28.4k
    for (size_t block_pos = 0; block_pos < column_pos.size(); ++block_pos) {
2178
23.8k
        const uint32_t schema_pos = column_pos[block_pos];
2179
23.8k
        const auto& column = tablet_schema.column(schema_pos);
2180
23.8k
        if (column.is_variant_type()) {
2181
5.38k
            variant_column_pos.push_back(schema_pos);
2182
5.38k
            variant_schema_pos.push_back(schema_pos);
2183
5.38k
        }
2184
23.8k
    }
2185
2186
4.60k
    if (variant_column_pos.empty()) {
2187
45
        return Status::OK();
2188
45
    }
2189
2190
4.55k
    std::vector<ParseConfig> configs(variant_column_pos.size());
2191
9.95k
    for (size_t i = 0; i < variant_column_pos.size(); ++i) {
2192
        // Deprecated legacy flatten-nested switch. Distinct from variant_enable_nested_group.
2193
5.39k
        configs[i].deprecated_enable_flatten_nested =
2194
5.39k
                tablet_schema.deprecated_variant_flatten_nested();
2195
5.39k
        const auto& column = tablet_schema.column(variant_schema_pos[i]);
2196
5.39k
        if (!column.is_variant_type()) {
2197
0
            return Status::InternalError("column is not variant type, column name: {}",
2198
0
                                         column.name());
2199
0
        }
2200
        // if doc mode is not enabled, no need to parse to doc value column
2201
5.39k
        if (!column.variant_enable_doc_mode()) {
2202
2.95k
            configs[i].parse_to = ParseConfig::ParseTo::OnlySubcolumns;
2203
2.95k
            continue;
2204
2.95k
        }
2205
2206
2.44k
        configs[i].parse_to = ParseConfig::ParseTo::OnlyDocValueColumn;
2207
2.44k
    }
2208
2209
4.55k
    RETURN_IF_ERROR(parse_and_materialize_variant_columns(block, variant_column_pos, configs));
2210
4.55k
    return Status::OK();
2211
4.55k
}
2212
2213
#include "common/compile_check_end.h"
2214
} // namespace doris::variant_util