Coverage Report

Created: 2026-06-25 20:47

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exec/common/variant_util.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "exec/common/variant_util.h"
19
20
#include <fmt/format.h>
21
#include <gen_cpp/FrontendService.h>
22
#include <gen_cpp/FrontendService_types.h>
23
#include <gen_cpp/HeartbeatService_types.h>
24
#include <gen_cpp/MasterService_types.h>
25
#include <gen_cpp/Status_types.h>
26
#include <gen_cpp/Types_types.h>
27
#include <glog/logging.h>
28
#include <rapidjson/document.h>
29
#include <rapidjson/stringbuffer.h>
30
#include <rapidjson/writer.h>
31
#include <simdjson/simdjson.h> // IWYU pragma: keep
32
#include <unicode/uchar.h>
33
34
#include <algorithm>
35
#include <cassert>
36
#include <cstddef>
37
#include <cstdint>
38
#include <cstring>
39
#include <list>
40
#include <memory>
41
#include <mutex>
42
#include <optional>
43
#include <ostream>
44
#include <ranges>
45
#include <set>
46
#include <stack>
47
#include <string>
48
#include <string_view>
49
#include <unordered_map>
50
#include <utility>
51
#include <vector>
52
53
#include "common/config.h"
54
#include "common/status.h"
55
#include "core/assert_cast.h"
56
#include "core/block/block.h"
57
#include "core/block/column_numbers.h"
58
#include "core/block/column_with_type_and_name.h"
59
#include "core/column/column.h"
60
#include "core/column/column_array.h"
61
#include "core/column/column_map.h"
62
#include "core/column/column_nullable.h"
63
#include "core/column/column_string.h"
64
#include "core/column/column_variant.h"
65
#include "core/data_type/data_type.h"
66
#include "core/data_type/data_type_array.h"
67
#include "core/data_type/data_type_factory.hpp"
68
#include "core/data_type/data_type_jsonb.h"
69
#include "core/data_type/data_type_nullable.h"
70
#include "core/data_type/data_type_string.h"
71
#include "core/data_type/data_type_variant.h"
72
#include "core/data_type/define_primitive_type.h"
73
#include "core/data_type/get_least_supertype.h"
74
#include "core/data_type/primitive_type.h"
75
#include "core/field.h"
76
#include "core/typeid_cast.h"
77
#include "core/types.h"
78
#include "exec/common/field_visitors.h"
79
#include "exec/common/sip_hash.h"
80
#include "exprs/function/function.h"
81
#include "exprs/function/simple_function_factory.h"
82
#include "exprs/function_context.h"
83
#include "exprs/json_functions.h"
84
#include "re2/re2.h"
85
#include "runtime/exec_env.h"
86
#include "runtime/runtime_state.h"
87
#include "storage/olap_common.h"
88
#include "storage/rowset/beta_rowset.h"
89
#include "storage/rowset/rowset.h"
90
#include "storage/rowset/rowset_fwd.h"
91
#include "storage/segment/segment_loader.h"
92
#include "storage/segment/variant/nested_group_path.h"
93
#include "storage/segment/variant/variant_column_reader.h"
94
#include "storage/segment/variant/variant_column_writer_impl.h"
95
#include "storage/tablet/tablet.h"
96
#include "storage/tablet/tablet_fwd.h"
97
#include "storage/tablet/tablet_schema.h"
98
#include "util/client_cache.h"
99
#include "util/defer_op.h"
100
#include "util/json/json_parser.h"
101
#include "util/json/path_in_data.h"
102
#include "util/json/simd_json_parser.h"
103
#include "util/jsonb_utils.h"
104
105
namespace doris::variant_util {
106
107
279
inline void append_escaped_regex_char(std::string* regex_output, char ch) {
108
279
    switch (ch) {
109
11
    case '.':
110
13
    case '^':
111
15
    case '$':
112
17
    case '+':
113
22
    case '*':
114
24
    case '?':
115
26
    case '(':
116
28
    case ')':
117
30
    case '|':
118
32
    case '{':
119
34
    case '}':
120
36
    case '[':
121
36
    case ']':
122
40
    case '\\':
123
40
        regex_output->push_back('\\');
124
40
        regex_output->push_back(ch);
125
40
        break;
126
239
    default:
127
239
        regex_output->push_back(ch);
128
239
        break;
129
279
    }
130
279
}
131
132
// Small LRU to cap compiled glob patterns
133
constexpr size_t kGlobRegexCacheCapacity = 256;
134
135
struct GlobRegexCacheEntry {
136
    std::shared_ptr<RE2> re2;
137
    std::list<std::string>::iterator lru_it;
138
};
139
140
static std::mutex g_glob_regex_cache_mutex;
141
static std::list<std::string> g_glob_regex_cache_lru;
142
static std::unordered_map<std::string, GlobRegexCacheEntry> g_glob_regex_cache;
143
144
286
std::shared_ptr<RE2> get_or_build_re2(const std::string& glob_pattern) {
145
286
    {
146
286
        std::lock_guard<std::mutex> lock(g_glob_regex_cache_mutex);
147
286
        auto it = g_glob_regex_cache.find(glob_pattern);
148
286
        if (it != g_glob_regex_cache.end()) {
149
230
            g_glob_regex_cache_lru.splice(g_glob_regex_cache_lru.begin(), g_glob_regex_cache_lru,
150
230
                                          it->second.lru_it);
151
230
            return it->second.re2;
152
230
        }
153
286
    }
154
56
    std::string regex_pattern;
155
56
    Status st = glob_to_regex(glob_pattern, &regex_pattern);
156
56
    if (!st.ok()) {
157
2
        return nullptr;
158
2
    }
159
54
    auto compiled = std::make_shared<RE2>(regex_pattern);
160
54
    if (!compiled->ok()) {
161
3
        return nullptr;
162
3
    }
163
51
    {
164
51
        std::lock_guard<std::mutex> lock(g_glob_regex_cache_mutex);
165
51
        auto it = g_glob_regex_cache.find(glob_pattern);
166
51
        if (it != g_glob_regex_cache.end()) {
167
0
            g_glob_regex_cache_lru.splice(g_glob_regex_cache_lru.begin(), g_glob_regex_cache_lru,
168
0
                                          it->second.lru_it);
169
0
            return it->second.re2;
170
0
        }
171
51
        g_glob_regex_cache_lru.push_front(glob_pattern);
172
51
        g_glob_regex_cache.emplace(glob_pattern,
173
51
                                   GlobRegexCacheEntry {compiled, g_glob_regex_cache_lru.begin()});
174
51
        if (g_glob_regex_cache.size() > kGlobRegexCacheCapacity) {
175
0
            const std::string& evict_key = g_glob_regex_cache_lru.back();
176
0
            g_glob_regex_cache.erase(evict_key);
177
0
            g_glob_regex_cache_lru.pop_back();
178
0
        }
179
51
    }
180
0
    return compiled;
181
51
}
182
183
// Convert a restricted glob pattern into a regex.
184
// Supported: '*', '?', '[...]', '\\' escape. Others are treated as literals.
185
94
Status glob_to_regex(const std::string& glob_pattern, std::string* regex_pattern) {
186
94
    regex_pattern->clear();
187
94
    regex_pattern->append("^");
188
94
    bool is_escaped = false;
189
94
    size_t pattern_length = glob_pattern.size();
190
447
    for (size_t index = 0; index < pattern_length; ++index) {
191
357
        char current_char = glob_pattern[index];
192
357
        if (is_escaped) {
193
9
            append_escaped_regex_char(regex_pattern, current_char);
194
9
            is_escaped = false;
195
9
            continue;
196
9
        }
197
348
        if (current_char == '\\') {
198
13
            is_escaped = true;
199
13
            continue;
200
13
        }
201
335
        if (current_char == '*') {
202
24
            regex_pattern->append(".*");
203
24
            continue;
204
24
        }
205
311
        if (current_char == '?') {
206
13
            regex_pattern->append(".");
207
13
            continue;
208
13
        }
209
298
        if (current_char == '[') {
210
32
            size_t class_index = index + 1;
211
32
            bool class_closed = false;
212
32
            bool is_class_escaped = false;
213
32
            std::string class_buffer;
214
32
            if (class_index < pattern_length &&
215
32
                (glob_pattern[class_index] == '!' || glob_pattern[class_index] == '^')) {
216
9
                class_buffer.push_back('^');
217
9
                ++class_index;
218
9
            }
219
95
            for (; class_index < pattern_length; ++class_index) {
220
91
                char class_char = glob_pattern[class_index];
221
91
                if (is_class_escaped) {
222
10
                    class_buffer.push_back(class_char);
223
10
                    is_class_escaped = false;
224
10
                    continue;
225
10
                }
226
81
                if (class_char == '\\') {
227
10
                    is_class_escaped = true;
228
10
                    continue;
229
10
                }
230
71
                if (class_char == ']') {
231
28
                    class_closed = true;
232
28
                    break;
233
28
                }
234
43
                class_buffer.push_back(class_char);
235
43
            }
236
32
            if (!class_closed) {
237
4
                return Status::InvalidArgument("Unclosed character class in glob pattern: {}",
238
4
                                               glob_pattern);
239
4
            }
240
28
            regex_pattern->append("[");
241
28
            regex_pattern->append(class_buffer);
242
28
            regex_pattern->append("]");
243
28
            index = class_index;
244
28
            continue;
245
32
        }
246
266
        append_escaped_regex_char(regex_pattern, current_char);
247
266
    }
248
90
    if (is_escaped) {
249
4
        append_escaped_regex_char(regex_pattern, '\\');
250
4
    }
251
90
    regex_pattern->append("$");
252
90
    return Status::OK();
253
94
}
254
255
286
bool glob_match_re2(const std::string& glob_pattern, const std::string& candidate_path) {
256
286
    auto compiled = get_or_build_re2(glob_pattern);
257
286
    if (compiled == nullptr) {
258
5
        return false;
259
5
    }
260
281
    return RE2::FullMatch(candidate_path, *compiled);
261
286
}
262
263
// NestedGroup's physical children and offsets are produced by NestedGroupWriteProvider, not by
264
// appending TabletSchema extracted columns here. This predicate keeps only ordinary Variant paths
265
// that are outside the NG tree, for example `v.owner` beside `v.items[*]`.
266
0
bool is_regular_path_outside_nested_group(const PathInData& path) {
267
0
    const std::string& relative_path = path.get_path();
268
0
    return !relative_path.empty() && !path.get_is_typed() && !path.has_nested_part() &&
269
0
           !segment_v2::contains_nested_group_marker(relative_path) &&
270
0
           !segment_v2::is_root_nested_group_path(relative_path) &&
271
0
           relative_path != SPARSE_COLUMN_PATH &&
272
0
           relative_path.find(DOC_VALUE_COLUMN_PATH) == std::string::npos;
273
0
}
274
275
bool should_materialize_nested_group_regular_subcolumns(
276
        const TabletColumnPtr& column,
277
34
        const std::unordered_map<int32_t, VariantExtendedInfo>& uid_to_variant_extended_info) {
278
34
    const auto info_it = uid_to_variant_extended_info.find(column->unique_id());
279
34
    return column->variant_enable_nested_group() ||
280
34
           (info_it != uid_to_variant_extended_info.end() && info_it->second.has_nested_group);
281
34
}
282
283
std::unordered_set<int32_t> collect_nested_group_compaction_root_uids(
284
        const TabletSchemaSPtr& target,
285
67
        const std::unordered_map<int32_t, VariantExtendedInfo>& uid_to_variant_extended_info) {
286
67
    std::unordered_set<int32_t> root_uids;
287
881
    for (const TabletColumnPtr& column : target->columns()) {
288
881
        if (column->is_variant_type() && should_materialize_nested_group_regular_subcolumns(
289
34
                                                 column, uid_to_variant_extended_info)) {
290
1
            root_uids.insert(column->unique_id());
291
1
        }
292
881
    }
293
67
    return root_uids;
294
67
}
295
296
PathToDataTypes collect_regular_types_outside_nested_group(
297
1
        const VariantExtendedInfo& extended_info) {
298
1
    PathToDataTypes regular_path_to_data_types;
299
1
    for (const auto& [path, data_types] : extended_info.path_to_data_types) {
300
0
        if (!is_regular_path_outside_nested_group(path)) {
301
0
            continue;
302
0
        }
303
0
        regular_path_to_data_types.emplace(path, data_types);
304
0
    }
305
1
    return regular_path_to_data_types;
306
1
}
307
308
80
size_t get_number_of_dimensions(const IDataType& type) {
309
80
    if (const auto* type_array = typeid_cast<const DataTypeArray*>(&type)) {
310
4
        return type_array->get_number_of_dimensions();
311
4
    }
312
76
    return 0;
313
80
}
314
3
size_t get_number_of_dimensions(const IColumn& column) {
315
3
    if (const auto* column_array = check_and_get_column<ColumnArray>(column)) {
316
2
        return column_array->get_number_of_dimensions();
317
2
    }
318
1
    return 0;
319
3
}
320
321
1.09k
DataTypePtr get_base_type_of_array(const DataTypePtr& type) {
322
    /// Get raw pointers to avoid extra copying of type pointers.
323
1.09k
    const DataTypeArray* last_array = nullptr;
324
1.09k
    const auto* current_type = type.get();
325
1.09k
    if (const auto* nullable = typeid_cast<const DataTypeNullable*>(current_type)) {
326
1.09k
        current_type = nullable->get_nested_type().get();
327
1.09k
    }
328
1.12k
    while (const auto* type_array = typeid_cast<const DataTypeArray*>(current_type)) {
329
28
        current_type = type_array->get_nested_type().get();
330
28
        last_array = type_array;
331
28
        if (const auto* nullable = typeid_cast<const DataTypeNullable*>(current_type)) {
332
28
            current_type = nullable->get_nested_type().get();
333
28
        }
334
28
    }
335
1.09k
    return last_array ? last_array->get_nested_type() : type;
336
1.09k
}
337
338
49.6k
Status cast_column(const ColumnWithTypeAndName& arg, const DataTypePtr& type, ColumnPtr* result) {
339
49.6k
    ColumnsWithTypeAndName arguments {arg, {nullptr, type, type->get_name()}};
340
341
    // To prevent from null info lost, we should not call function since the function framework will wrap
342
    // nullable to Variant instead of the root of Variant
343
    // correct output: Nullable(Array(int)) -> Nullable(Variant(Nullable(Array(int))))
344
    // incorrect output: Nullable(Array(int)) -> Nullable(Variant(Array(int)))
345
49.6k
    if (type->get_primitive_type() == TYPE_VARIANT) {
346
        // If source column is variant, so the nullable info is different from dst column
347
19
        if (arg.type->get_primitive_type() == TYPE_VARIANT) {
348
1
            *result = type->is_nullable() ? make_nullable(arg.column) : remove_nullable(arg.column);
349
1
            return Status::OK();
350
1
        }
351
        // set variant root column/type to from column/type
352
19
        CHECK(is_column_nullable(*arg.column));
353
18
        auto to_type = remove_nullable(type);
354
18
        const auto& data_type_object = assert_cast<const DataTypeVariant&>(*to_type);
355
18
        auto variant = ColumnVariant::create(data_type_object.variant_max_subcolumns_count(),
356
18
                                             data_type_object.enable_doc_mode());
357
358
18
        variant->create_root(arg.type, IColumn::mutate(arg.column));
359
18
        ColumnPtr nullable = ColumnNullable::create(
360
18
                variant->get_ptr(),
361
18
                assert_cast<const ColumnNullable*>(arg.column.get())->get_null_map_column_ptr());
362
18
        *result = type->is_nullable() ? nullable : variant->get_ptr();
363
18
        return Status::OK();
364
19
    }
365
366
49.6k
    auto function = SimpleFunctionFactory::instance().get_function("CAST", arguments, type);
367
49.6k
    if (!function) {
368
0
        return Status::InternalError("Not found cast function {} to {}", arg.type->get_name(),
369
0
                                     type->get_name());
370
0
    }
371
49.6k
    Block tmp_block {arguments};
372
49.6k
    uint32_t result_column = cast_set<uint32_t>(tmp_block.columns());
373
49.6k
    RuntimeState state;
374
49.6k
    auto ctx = FunctionContext::create_context(&state, {}, {});
375
376
49.6k
    if (arg.type->get_primitive_type() == INVALID_TYPE) {
377
        // cast from nothing to any type should result in nulls
378
790
        *result = type->create_column_const_with_default_value(arg.column->size())
379
790
                          ->convert_to_full_column_if_const();
380
790
        return Status::OK();
381
790
    }
382
383
    // We convert column string to jsonb type just add a string jsonb field to dst column instead of parse
384
    // each line in original string column.
385
48.8k
    ctx->set_string_as_jsonb_string(true);
386
48.8k
    ctx->set_jsonb_string_as_string(true);
387
48.8k
    tmp_block.insert({nullptr, type, arg.name});
388
    // TODO(lihangyu): we should handle this error in strict mode
389
48.8k
    if (!function->execute(ctx.get(), tmp_block, {0}, result_column, arg.column->size())) {
390
0
        LOG_EVERY_N(WARNING, 100) << fmt::format("cast from {} to {}", arg.type->get_name(),
391
0
                                                 type->get_name());
392
0
        *result = type->create_column_const_with_default_value(arg.column->size())
393
0
                          ->convert_to_full_column_if_const();
394
0
        return Status::OK();
395
0
    }
396
48.8k
    *result = tmp_block.get_by_position(result_column).column->convert_to_full_column_if_const();
397
48.8k
    VLOG_DEBUG << fmt::format("{} before convert {}, after convert {}", arg.name,
398
0
                              arg.column->get_name(), (*result)->get_name());
399
48.8k
    return Status::OK();
400
48.8k
}
401
402
2
ColumnPtr jsonb_root_to_json_string_column(const IColumn& root) {
403
2
    auto root_column = root.convert_to_full_column_if_const();
404
2
    const IColumn* jsonb_column = root_column.get();
405
2
    const NullMap* null_map = nullptr;
406
2
    if (root_column->is_nullable()) {
407
0
        const auto& nullable = assert_cast<const ColumnNullable&>(*root_column);
408
0
        jsonb_column = &nullable.get_nested_column();
409
0
        null_map = &nullable.get_null_map_data();
410
0
    }
411
412
2
    const auto& column = assert_cast<const ColumnString&>(*jsonb_column);
413
2
    auto result = ColumnString::create();
414
2
    result->reserve(column.size());
415
4
    for (size_t i = 0; i < column.size(); ++i) {
416
2
        if (null_map != nullptr && (*null_map)[i]) {
417
0
            result->insert_default();
418
0
            continue;
419
0
        }
420
421
2
        const auto jsonb = column.get_data_at(i);
422
2
        if (jsonb.size == 0) {
423
0
            result->insert_default();
424
0
            continue;
425
0
        }
426
427
2
        const auto json = JsonbToJson::jsonb_to_json_string(jsonb.data, jsonb.size);
428
2
        result->insert_data(json.data(), json.size());
429
2
    }
430
2
    return result->get_ptr();
431
2
}
432
433
void get_column_by_type(const DataTypePtr& data_type, const std::string& name, TabletColumn& column,
434
2.40k
                        const ExtraInfo& ext_info) {
435
2.40k
    column.set_name(name);
436
2.40k
    column.set_type(data_type->get_storage_field_type());
437
2.40k
    if (ext_info.unique_id >= 0) {
438
4
        column.set_unique_id(ext_info.unique_id);
439
4
    }
440
2.40k
    if (ext_info.parent_unique_id >= 0) {
441
1.19k
        column.set_parent_unique_id(ext_info.parent_unique_id);
442
1.19k
    }
443
2.40k
    if (!ext_info.path_info.empty()) {
444
1.19k
        column.set_path_info(ext_info.path_info);
445
1.19k
    }
446
2.40k
    if (data_type->is_nullable()) {
447
1.19k
        const auto& real_type = static_cast<const DataTypeNullable&>(*data_type);
448
1.19k
        column.set_is_nullable(true);
449
1.19k
        get_column_by_type(real_type.get_nested_type(), name, column, {});
450
1.19k
        return;
451
1.19k
    }
452
1.21k
    if (data_type->get_primitive_type() == PrimitiveType::TYPE_ARRAY) {
453
19
        TabletColumn child;
454
19
        get_column_by_type(assert_cast<const DataTypeArray*>(data_type.get())->get_nested_type(),
455
19
                           "", child, {});
456
19
        column.set_length(TabletColumn::get_field_length_by_type(TPrimitiveType::ARRAY, 0));
457
19
        column.add_sub_column(child);
458
19
        return;
459
19
    }
460
1.19k
    if (data_type->get_primitive_type() == PrimitiveType::TYPE_VARIANT) {
461
0
        const auto* dt_variant = assert_cast<const DataTypeVariant*>(data_type.get());
462
0
        column.set_variant_max_subcolumns_count(dt_variant->variant_max_subcolumns_count());
463
0
        column.set_variant_enable_doc_mode(dt_variant->enable_doc_mode());
464
0
        return;
465
0
    }
466
    // size is not fixed when type is string or json
467
1.19k
    if (is_string_type(data_type->get_primitive_type()) ||
468
1.19k
        data_type->get_primitive_type() == TYPE_JSONB) {
469
476
        column.set_length(INT_MAX);
470
476
        return;
471
476
    }
472
473
720
    PrimitiveType type = data_type->get_primitive_type();
474
720
    if (is_int_or_bool(type) || is_string_type(type) || is_float_or_double(type) || is_ip(type) ||
475
720
        is_date_or_datetime(type) || type == PrimitiveType::TYPE_DATEV2) {
476
715
        column.set_length(cast_set<int32_t>(data_type->get_size_of_value_in_memory()));
477
715
        return;
478
715
    }
479
5
    if (is_decimal(type)) {
480
1
        column.set_precision(data_type->get_precision());
481
1
        column.set_frac(data_type->get_scale());
482
1
        return;
483
1
    }
484
    // datetimev2 needs scale
485
4
    if (type == PrimitiveType::TYPE_DATETIMEV2 || type == PrimitiveType::TYPE_TIMESTAMPTZ) {
486
3
        column.set_precision(-1);
487
3
        column.set_frac(data_type->get_scale());
488
3
        return;
489
3
    }
490
491
1
    throw doris::Exception(doris::ErrorCode::INTERNAL_ERROR,
492
1
                           "unexcepted data column type: {}, column name is: {}",
493
1
                           data_type->get_name(), name);
494
4
}
495
496
TabletColumn get_column_by_type(const DataTypePtr& data_type, const std::string& name,
497
1.15k
                                const ExtraInfo& ext_info) {
498
1.15k
    TabletColumn result;
499
1.15k
    get_column_by_type(data_type, name, result, ext_info);
500
1.15k
    return result;
501
1.15k
}
502
503
// check if two paths which same prefix have different structure
504
static bool has_different_structure_in_same_path(const PathInData::Parts& lhs,
505
9.01k
                                                 const PathInData::Parts& rhs) {
506
9.01k
    if (lhs.size() != rhs.size()) {
507
1
        return false; // different size means different structure
508
1
    }
509
    // Since we group by path string, lhs and rhs must have the same size and keys
510
    // We only need to check if they have different nested structure
511
36.0k
    for (size_t i = 0; i < lhs.size(); ++i) {
512
27.0k
        if (lhs[i] != rhs[i]) {
513
5
            VLOG_DEBUG << fmt::format(
514
0
                    "Check different structure: {} vs {}, lhs[i].is_nested: {}, rhs[i].is_nested: "
515
0
                    "{}",
516
0
                    lhs[i].key, rhs[i].key, lhs[i].is_nested, rhs[i].is_nested);
517
5
            return true;
518
5
        }
519
27.0k
    }
520
9.01k
    return false;
521
9.01k
}
522
523
3.05k
Status check_variant_has_no_ambiguous_paths(const PathsInData& tuple_paths) {
524
    // Group paths by their string representation to reduce comparisons
525
3.05k
    std::unordered_map<std::string, std::vector<size_t>> path_groups;
526
527
24.1k
    for (size_t i = 0; i < tuple_paths.size(); ++i) {
528
        // same path should have same structure, so we group them by path
529
21.0k
        path_groups[tuple_paths[i].get_path()].push_back(i);
530
        // print part of tuple_paths[i]
531
21.0k
        VLOG_DEBUG << "tuple_paths[i]: " << tuple_paths[i].get_path();
532
21.0k
    }
533
534
    // Only compare paths within the same group
535
12.0k
    for (const auto& [path_str, indices] : path_groups) {
536
12.0k
        if (indices.size() <= 1) {
537
3.05k
            continue; // No conflicts possible
538
3.05k
        }
539
540
        // Compare all pairs within this group
541
27.0k
        for (size_t i = 0; i < indices.size(); ++i) {
542
27.0k
            for (size_t j = 0; j < i; ++j) {
543
9.01k
                if (has_different_structure_in_same_path(tuple_paths[indices[i]].get_parts(),
544
9.01k
                                                         tuple_paths[indices[j]].get_parts())) {
545
5
                    return Status::DataQualityError(
546
5
                            "Ambiguous paths: {} vs {} with different nested part {} vs {}",
547
5
                            tuple_paths[indices[i]].get_path(), tuple_paths[indices[j]].get_path(),
548
5
                            tuple_paths[indices[i]].has_nested_part(),
549
5
                            tuple_paths[indices[j]].has_nested_part());
550
5
                }
551
9.01k
            }
552
18.0k
        }
553
9.01k
    }
554
3.05k
    return Status::OK();
555
3.05k
}
556
557
Status update_least_schema_internal(const std::map<PathInData, DataTypes>& subcolumns_types,
558
                                    TabletSchemaSPtr& common_schema, int32_t variant_col_unique_id,
559
                                    const std::map<std::string, TabletColumnPtr>& typed_columns,
560
43
                                    std::set<PathInData>* path_set) {
561
43
    PathsInData tuple_paths;
562
43
    DataTypes tuple_types;
563
43
    CHECK(common_schema.use_count() == 1);
564
    // Get the least common type for all paths.
565
56
    for (const auto& [key, subtypes] : subcolumns_types) {
566
56
        assert(!subtypes.empty());
567
56
        if (key.get_path() == ColumnVariant::COLUMN_NAME_DUMMY) {
568
0
            continue;
569
0
        }
570
56
        size_t first_dim = get_number_of_dimensions(*subtypes[0]);
571
56
        tuple_paths.emplace_back(key);
572
76
        for (size_t i = 1; i < subtypes.size(); ++i) {
573
21
            if (first_dim != get_number_of_dimensions(*subtypes[i])) {
574
1
                tuple_types.emplace_back(make_nullable(std::make_shared<DataTypeJsonb>()));
575
1
                LOG(INFO) << fmt::format(
576
1
                        "Uncompatible types of subcolumn '{}': {} and {}, cast to JSONB",
577
1
                        key.get_path(), subtypes[0]->get_name(), subtypes[i]->get_name());
578
1
                break;
579
1
            }
580
21
        }
581
56
        if (tuple_paths.size() == tuple_types.size()) {
582
1
            continue;
583
1
        }
584
55
        DataTypePtr common_type;
585
55
        get_least_supertype_jsonb(subtypes, &common_type);
586
55
        if (!common_type->is_nullable()) {
587
3
            common_type = make_nullable(common_type);
588
3
        }
589
55
        tuple_types.emplace_back(common_type);
590
55
    }
591
43
    CHECK_EQ(tuple_paths.size(), tuple_types.size());
592
593
    // Append all common type columns of this variant
594
99
    for (int i = 0; i < tuple_paths.size(); ++i) {
595
56
        TabletColumn common_column;
596
        // typed path not contains root part
597
56
        auto path_without_root = tuple_paths[i].copy_pop_front().get_path();
598
56
        if (typed_columns.contains(path_without_root) && !tuple_paths[i].has_nested_part()) {
599
17
            common_column = *typed_columns.at(path_without_root);
600
            // parent unique id and path may not be init in write path
601
17
            common_column.set_parent_unique_id(variant_col_unique_id);
602
17
            common_column.set_path_info(tuple_paths[i]);
603
17
            common_column.set_name(tuple_paths[i].get_path());
604
39
        } else {
605
            // const std::string& column_name = variant_col_name + "." + tuple_paths[i].get_path();
606
39
            get_column_by_type(tuple_types[i], tuple_paths[i].get_path(), common_column,
607
39
                               ExtraInfo {.unique_id = -1,
608
39
                                          .parent_unique_id = variant_col_unique_id,
609
39
                                          .path_info = tuple_paths[i]});
610
39
        }
611
56
        common_schema->append_column(common_column);
612
56
        if (path_set != nullptr) {
613
53
            path_set->insert(tuple_paths[i]);
614
53
        }
615
56
    }
616
43
    return Status::OK();
617
43
}
618
619
Status update_least_common_schema(const std::vector<TabletSchemaSPtr>& schemas,
620
                                  TabletSchemaSPtr& common_schema, int32_t variant_col_unique_id,
621
42
                                  std::set<PathInData>* path_set) {
622
42
    std::map<std::string, TabletColumnPtr> typed_columns;
623
42
    for (const TabletColumnPtr& col :
624
48
         common_schema->column_by_uid(variant_col_unique_id).get_sub_columns()) {
625
48
        typed_columns[col->name()] = col;
626
48
    }
627
    // Types of subcolumns by path from all tuples.
628
42
    std::map<PathInData, DataTypes> subcolumns_types;
629
630
    // Collect all paths first to enable batch checking
631
42
    std::vector<PathInData> all_paths;
632
633
60
    for (const TabletSchemaSPtr& schema : schemas) {
634
178
        for (const TabletColumnPtr& col : schema->columns()) {
635
            // Get subcolumns of this variant
636
178
            if (col->has_path_info() && col->parent_unique_id() >= 0 &&
637
178
                col->parent_unique_id() == variant_col_unique_id) {
638
72
                subcolumns_types[*col->path_info_ptr()].emplace_back(
639
72
                        DataTypeFactory::instance().create_data_type(*col, col->is_nullable()));
640
72
                all_paths.push_back(*col->path_info_ptr());
641
72
            }
642
178
        }
643
60
    }
644
645
    // Batch check for conflicts
646
42
    RETURN_IF_ERROR(check_variant_has_no_ambiguous_paths(all_paths));
647
648
42
    return update_least_schema_internal(subcolumns_types, common_schema, variant_col_unique_id,
649
42
                                        typed_columns, path_set);
650
42
}
651
652
// Keep variant subcolumn BF support aligned with FE DDL checks.
653
1.22k
bool is_bf_supported_by_fe_for_variant_subcolumn(FieldType type) {
654
1.22k
    switch (type) {
655
0
    case FieldType::OLAP_FIELD_TYPE_SMALLINT:
656
52
    case FieldType::OLAP_FIELD_TYPE_INT:
657
698
    case FieldType::OLAP_FIELD_TYPE_BIGINT:
658
698
    case FieldType::OLAP_FIELD_TYPE_LARGEINT:
659
698
    case FieldType::OLAP_FIELD_TYPE_CHAR:
660
698
    case FieldType::OLAP_FIELD_TYPE_VARCHAR:
661
1.17k
    case FieldType::OLAP_FIELD_TYPE_STRING:
662
1.17k
    case FieldType::OLAP_FIELD_TYPE_DATE:
663
1.17k
    case FieldType::OLAP_FIELD_TYPE_DATETIME:
664
1.18k
    case FieldType::OLAP_FIELD_TYPE_DATEV2:
665
1.18k
    case FieldType::OLAP_FIELD_TYPE_DATETIMEV2:
666
1.18k
    case FieldType::OLAP_FIELD_TYPE_TIMESTAMPTZ:
667
1.18k
    case FieldType::OLAP_FIELD_TYPE_DECIMAL:
668
1.18k
    case FieldType::OLAP_FIELD_TYPE_DECIMAL32:
669
1.18k
    case FieldType::OLAP_FIELD_TYPE_DECIMAL64:
670
1.18k
    case FieldType::OLAP_FIELD_TYPE_DECIMAL128I:
671
1.18k
    case FieldType::OLAP_FIELD_TYPE_DECIMAL256:
672
1.18k
    case FieldType::OLAP_FIELD_TYPE_IPV4:
673
1.18k
    case FieldType::OLAP_FIELD_TYPE_IPV6:
674
1.18k
        return true;
675
41
    default:
676
41
        return false;
677
1.22k
    }
678
1.22k
}
679
680
void inherit_column_attributes(const TabletColumn& source, TabletColumn& target,
681
1.22k
                               TabletSchemaSPtr* target_schema) {
682
1.22k
    if (!target.is_extracted_column()) {
683
0
        return;
684
0
    }
685
1.22k
    target.set_aggregation_method(source.aggregation());
686
687
    // 1. bloom filter
688
1.22k
    if (is_bf_supported_by_fe_for_variant_subcolumn(target.type())) {
689
1.18k
        target.set_is_bf_column(source.is_bf_column());
690
1.18k
    }
691
692
1.22k
    if (!target_schema) {
693
1.17k
        return;
694
1.17k
    }
695
696
    // 2. inverted index
697
57
    TabletIndexes indexes_to_add;
698
57
    auto source_indexes = (*target_schema)->inverted_indexs(source.unique_id());
699
    // if target is variant type, we need to inherit all indexes
700
    // because this schema is a read schema from fe
701
57
    if (target.is_variant_type()) {
702
0
        for (auto& index : source_indexes) {
703
0
            auto index_info = std::make_shared<TabletIndex>(*index);
704
0
            index_info->set_escaped_escaped_index_suffix_path(target.path_info_ptr()->get_path());
705
0
            indexes_to_add.emplace_back(std::move(index_info));
706
0
        }
707
57
    } else {
708
57
        inherit_index(source_indexes, indexes_to_add, target);
709
57
    }
710
57
    auto target_indexes = (*target_schema)
711
57
                                  ->inverted_indexs(target.parent_unique_id(),
712
57
                                                    target.path_info_ptr()->get_path());
713
57
    if (target_indexes.empty()) {
714
57
        for (auto& index_info : indexes_to_add) {
715
14
            (*target_schema)->append_index(std::move(*index_info));
716
14
        }
717
57
    }
718
719
    // 3. TODO: gnragm bf index
720
57
}
721
722
41
void inherit_column_attributes(TabletSchemaSPtr& schema) {
723
    // Add index meta if extracted column is missing index meta
724
177
    for (size_t i = 0; i < schema->num_columns(); ++i) {
725
136
        TabletColumn& col = schema->mutable_column(i);
726
136
        if (!col.is_extracted_column()) {
727
79
            continue;
728
79
        }
729
57
        if (schema->field_index(col.parent_unique_id()) == -1) {
730
            // parent column is missing, maybe dropped
731
0
            continue;
732
0
        }
733
57
        inherit_column_attributes(schema->column_by_uid(col.parent_unique_id()), col, &schema);
734
57
    }
735
41
}
736
737
Status get_least_common_schema(const std::vector<TabletSchemaSPtr>& schemas,
738
                               const TabletSchemaSPtr& base_schema, TabletSchemaSPtr& output_schema,
739
39
                               bool check_schema_size) {
740
39
    std::vector<int32_t> variant_column_unique_id;
741
    // Construct a schema excluding the extracted columns and gather unique identifiers for variants.
742
    // Ensure that the output schema also excludes these extracted columns. This approach prevents
743
    // duplicated paths following the update_least_common_schema process.
744
39
    auto build_schema_without_extracted_columns = [&](const TabletSchemaSPtr& base_schema) {
745
39
        output_schema = std::make_shared<TabletSchema>();
746
        // not copy columns but only shadow copy other attributes
747
39
        output_schema->shawdow_copy_without_columns(*base_schema);
748
        // Get all columns without extracted columns and collect variant col unique id
749
77
        for (const TabletColumnPtr& col : base_schema->columns()) {
750
77
            if (col->is_variant_type()) {
751
39
                variant_column_unique_id.push_back(col->unique_id());
752
39
            }
753
77
            if (!col->is_extracted_column()) {
754
74
                output_schema->append_column(*col);
755
74
            }
756
77
        }
757
39
    };
758
39
    if (base_schema == nullptr) {
759
        // Pick tablet schema with max schema version
760
4
        auto max_version_schema =
761
4
                *std::max_element(schemas.cbegin(), schemas.cend(),
762
4
                                  [](const TabletSchemaSPtr a, const TabletSchemaSPtr b) {
763
2
                                      return a->schema_version() < b->schema_version();
764
2
                                  });
765
4
        CHECK(max_version_schema);
766
4
        build_schema_without_extracted_columns(max_version_schema);
767
35
    } else {
768
        // use input base_schema schema as base schema
769
35
        build_schema_without_extracted_columns(base_schema);
770
35
    }
771
772
39
    for (int32_t unique_id : variant_column_unique_id) {
773
39
        std::set<PathInData> path_set;
774
39
        RETURN_IF_ERROR(update_least_common_schema(schemas, output_schema, unique_id, &path_set));
775
39
    }
776
777
39
    inherit_column_attributes(output_schema);
778
39
    if (check_schema_size &&
779
39
        output_schema->columns().size() > config::variant_max_merged_tablet_schema_size) {
780
0
        return Status::DataQualityError("Reached max column size limit {}",
781
0
                                        config::variant_max_merged_tablet_schema_size);
782
0
    }
783
784
39
    return Status::OK();
785
39
}
786
787
// sort by paths in lexicographical order
788
755
ColumnVariant::Subcolumns get_sorted_subcolumns(const ColumnVariant::Subcolumns& subcolumns) {
789
    // sort by paths in lexicographical order
790
755
    ColumnVariant::Subcolumns sorted = subcolumns;
791
23.2k
    std::sort(sorted.begin(), sorted.end(), [](const auto& lhsItem, const auto& rhsItem) {
792
23.2k
        return lhsItem->path < rhsItem->path;
793
23.2k
    });
794
755
    return sorted;
795
755
}
796
797
bool has_schema_index_diff(const TabletSchema* new_schema, const TabletSchema* old_schema,
798
4
                           int32_t new_col_idx, int32_t old_col_idx) {
799
4
    const auto& column_new = new_schema->column(new_col_idx);
800
4
    const auto& column_old = old_schema->column(old_col_idx);
801
802
4
    if (column_new.is_bf_column() != column_old.is_bf_column()) {
803
2
        return true;
804
2
    }
805
806
2
    auto new_schema_inverted_indexs = new_schema->inverted_indexs(column_new);
807
2
    auto old_schema_inverted_indexs = old_schema->inverted_indexs(column_old);
808
809
2
    if (new_schema_inverted_indexs.size() != old_schema_inverted_indexs.size()) {
810
1
        return true;
811
1
    }
812
813
2
    for (size_t i = 0; i < new_schema_inverted_indexs.size(); ++i) {
814
1
        if (!new_schema_inverted_indexs[i]->is_same_except_id(old_schema_inverted_indexs[i])) {
815
0
            return true;
816
0
        }
817
1
    }
818
819
1
    return false;
820
1
}
821
822
780
TabletColumn create_sparse_column(const TabletColumn& variant) {
823
780
    TabletColumn res;
824
780
    res.set_name(variant.name_lower_case() + "." + SPARSE_COLUMN_PATH);
825
780
    res.set_type(FieldType::OLAP_FIELD_TYPE_MAP);
826
780
    res.set_aggregation_method(variant.aggregation());
827
780
    res.set_path_info(PathInData {variant.name_lower_case() + "." + SPARSE_COLUMN_PATH});
828
780
    res.set_parent_unique_id(variant.unique_id());
829
    // set default value to "NULL" DefaultColumnIterator will call insert_many_defaults
830
780
    res.set_default_value("NULL");
831
780
    TabletColumn child_tcolumn;
832
780
    child_tcolumn.set_type(FieldType::OLAP_FIELD_TYPE_STRING);
833
780
    res.add_sub_column(child_tcolumn);
834
780
    res.add_sub_column(child_tcolumn);
835
780
    return res;
836
780
}
837
838
63
TabletColumn create_sparse_shard_column(const TabletColumn& variant, int bucket_index) {
839
63
    TabletColumn res;
840
63
    std::string name = variant.name_lower_case() + "." + SPARSE_COLUMN_PATH + ".b" +
841
63
                       std::to_string(bucket_index);
842
63
    res.set_name(name);
843
63
    res.set_type(FieldType::OLAP_FIELD_TYPE_MAP);
844
63
    res.set_aggregation_method(variant.aggregation());
845
63
    res.set_parent_unique_id(variant.unique_id());
846
63
    res.set_default_value("NULL");
847
63
    PathInData path(name);
848
63
    res.set_path_info(path);
849
63
    TabletColumn child_tcolumn;
850
63
    child_tcolumn.set_type(FieldType::OLAP_FIELD_TYPE_STRING);
851
63
    res.add_sub_column(child_tcolumn);
852
63
    res.add_sub_column(child_tcolumn);
853
63
    return res;
854
63
}
855
856
44
TabletColumn create_doc_value_column(const TabletColumn& variant, int bucket_index) {
857
44
    TabletColumn res;
858
44
    std::string name = variant.name_lower_case() + "." + DOC_VALUE_COLUMN_PATH + ".b" +
859
44
                       std::to_string(bucket_index);
860
44
    res.set_name(name);
861
44
    res.set_type(FieldType::OLAP_FIELD_TYPE_MAP);
862
44
    res.set_aggregation_method(variant.aggregation());
863
44
    res.set_parent_unique_id(variant.unique_id());
864
44
    res.set_default_value("NULL");
865
44
    res.set_path_info(PathInData {name});
866
867
44
    TabletColumn child_tcolumn;
868
44
    child_tcolumn.set_type(FieldType::OLAP_FIELD_TYPE_STRING);
869
44
    res.add_sub_column(child_tcolumn);
870
44
    res.add_sub_column(child_tcolumn);
871
44
    return res;
872
44
}
873
874
5.40k
uint32_t variant_binary_shard_of(const StringRef& path, uint32_t bucket_num) {
875
5.40k
    if (bucket_num <= 1) return 0;
876
5.40k
    SipHash hash;
877
5.40k
    hash.update(path.data, path.size);
878
5.40k
    uint64_t h = hash.get64();
879
5.40k
    return static_cast<uint32_t>(h % bucket_num);
880
5.40k
}
881
882
Status VariantCompactionUtil::aggregate_path_to_stats(
883
        const RowsetSharedPtr& rs,
884
97
        std::unordered_map<int32_t, PathToNoneNullValues>* uid_to_path_stats) {
885
97
    SegmentCacheHandle segment_cache;
886
97
    RETURN_IF_ERROR(SegmentLoader::instance()->load_segments(
887
97
            std::static_pointer_cast<BetaRowset>(rs), &segment_cache));
888
889
314
    for (const auto& column : rs->tablet_schema()->columns()) {
890
314
        if (!column->is_variant_type() || column->unique_id() < 0) {
891
192
            continue;
892
192
        }
893
122
        if (!should_check_variant_path_stats(*column)) {
894
0
            continue;
895
0
        }
896
267
        for (const auto& segment : segment_cache.get_segments()) {
897
267
            std::shared_ptr<ColumnReader> column_reader;
898
267
            OlapReaderStatistics stats;
899
267
            RETURN_IF_ERROR(
900
267
                    segment->get_column_reader(column->unique_id(), &column_reader, &stats));
901
267
            if (!column_reader) {
902
0
                continue;
903
0
            }
904
905
267
            CHECK(column_reader->get_meta_type() == FieldType::OLAP_FIELD_TYPE_VARIANT);
906
267
            auto* variant_column_reader =
907
267
                    assert_cast<segment_v2::VariantColumnReader*>(column_reader.get());
908
            // load external meta before getting stats
909
267
            RETURN_IF_ERROR(variant_column_reader->load_external_meta_once());
910
267
            const auto* source_stats = variant_column_reader->get_stats();
911
267
            CHECK(source_stats);
912
913
            // agg path -> stats
914
1.31k
            for (const auto& [path, size] : source_stats->sparse_column_non_null_size) {
915
1.31k
                (*uid_to_path_stats)[column->unique_id()][path] += size;
916
1.31k
            }
917
918
588
            for (const auto& [path, size] : source_stats->subcolumns_non_null_size) {
919
588
                (*uid_to_path_stats)[column->unique_id()][path] += size;
920
588
            }
921
267
        }
922
122
    }
923
97
    return Status::OK();
924
97
}
925
926
Status VariantCompactionUtil::aggregate_variant_extended_info(
927
        const RowsetSharedPtr& rs,
928
64
        std::unordered_map<int32_t, VariantExtendedInfo>* uid_to_variant_extended_info) {
929
64
    SegmentCacheHandle segment_cache;
930
64
    RETURN_IF_ERROR(SegmentLoader::instance()->load_segments(
931
64
            std::static_pointer_cast<BetaRowset>(rs), &segment_cache));
932
933
150
    for (const auto& column : rs->tablet_schema()->columns()) {
934
150
        if (!column->is_variant_type()) {
935
72
            continue;
936
72
        }
937
78
        auto& extended_info = (*uid_to_variant_extended_info)[column->unique_id()];
938
78
        if (column->variant_enable_nested_group()) {
939
0
            extended_info.has_nested_group = true;
940
0
        }
941
125
        for (const auto& segment : segment_cache.get_segments()) {
942
125
            std::shared_ptr<ColumnReader> column_reader;
943
125
            OlapReaderStatistics stats;
944
125
            RETURN_IF_ERROR(
945
125
                    segment->get_column_reader(column->unique_id(), &column_reader, &stats));
946
124
            if (!column_reader) {
947
0
                continue;
948
0
            }
949
950
124
            CHECK(column_reader->get_meta_type() == FieldType::OLAP_FIELD_TYPE_VARIANT);
951
124
            auto* variant_column_reader =
952
124
                    assert_cast<segment_v2::VariantColumnReader*>(column_reader.get());
953
            // load external meta before getting stats
954
124
            RETURN_IF_ERROR(variant_column_reader->load_external_meta_once());
955
124
            const auto* source_stats = variant_column_reader->get_stats();
956
124
            CHECK(source_stats);
957
958
124
            if (!column->variant_enable_nested_group()) {
959
                // NG roots still need type metadata for regular subpaths such as `v.owner`,
960
                // but their compaction schema should not be driven by flat path stats.
961
465
                for (const auto& [path, size] : source_stats->sparse_column_non_null_size) {
962
465
                    extended_info.path_to_none_null_values[path] += size;
963
465
                    extended_info.sparse_paths.emplace(path);
964
465
                }
965
966
234
                for (const auto& [path, size] : source_stats->subcolumns_non_null_size) {
967
234
                    extended_info.path_to_none_null_values[path] += size;
968
234
                }
969
124
            }
970
971
            //2. agg path -> schema
972
124
            variant_column_reader->get_subcolumns_types(&extended_info.path_to_data_types);
973
974
            // 3. extract typed paths
975
124
            variant_column_reader->get_typed_paths(&extended_info.typed_paths);
976
977
            // 4. extract nested paths
978
124
            if (!column->variant_enable_nested_group()) {
979
124
                variant_column_reader->get_nested_paths(&extended_info.nested_paths);
980
124
            }
981
124
        }
982
78
    }
983
63
    return Status::OK();
984
64
}
985
986
// get the subpaths and sparse paths for the variant column
987
void VariantCompactionUtil::get_subpaths(int32_t max_subcolumns_count,
988
                                         const PathToNoneNullValues& stats,
989
42
                                         TabletSchema::PathsSetInfo& paths_set_info) {
990
    // max_subcolumns_count is 0 means no limit
991
42
    if (max_subcolumns_count > 0 && stats.size() > max_subcolumns_count) {
992
17
        std::vector<std::pair<size_t, std::string_view>> paths_with_sizes;
993
17
        paths_with_sizes.reserve(stats.size());
994
94
        for (const auto& [path, size] : stats) {
995
94
            paths_with_sizes.emplace_back(size, path);
996
94
        }
997
17
        std::sort(paths_with_sizes.begin(), paths_with_sizes.end(), std::greater());
998
999
        // Select top N paths as subcolumns, remaining paths as sparse columns
1000
94
        for (const auto& [size, path] : paths_with_sizes) {
1001
94
            if (paths_set_info.sub_path_set.size() < max_subcolumns_count) {
1002
29
                paths_set_info.sub_path_set.emplace(path);
1003
65
            } else {
1004
65
                paths_set_info.sparse_path_set.emplace(path);
1005
65
            }
1006
94
        }
1007
17
        LOG(INFO) << "subpaths " << paths_set_info.sub_path_set.size() << " sparse paths "
1008
17
                  << paths_set_info.sparse_path_set.size() << " variant max subcolumns count "
1009
17
                  << max_subcolumns_count << " stats size " << paths_with_sizes.size();
1010
25
    } else {
1011
        // Apply all paths as subcolumns
1012
35
        for (const auto& [path, _] : stats) {
1013
35
            paths_set_info.sub_path_set.emplace(path);
1014
35
        }
1015
25
    }
1016
42
}
1017
1018
Status VariantCompactionUtil::check_path_stats(const std::vector<RowsetSharedPtr>& intputs,
1019
41
                                               RowsetSharedPtr output, BaseTabletSPtr tablet) {
1020
41
    if (output->tablet_schema()->num_variant_columns() == 0) {
1021
9
        return Status::OK();
1022
9
    }
1023
77
    for (const auto& rowset : intputs) {
1024
236
        for (const auto& column : rowset->tablet_schema()->columns()) {
1025
236
            if (column->is_variant_type() && !should_check_variant_path_stats(*column)) {
1026
0
                return Status::OK();
1027
0
            }
1028
236
        }
1029
77
    }
1030
    // check no extended schema in input rowsets
1031
77
    for (const auto& rowset : intputs) {
1032
236
        for (const auto& column : rowset->tablet_schema()->columns()) {
1033
236
            if (column->is_extracted_column()) {
1034
0
                return Status::OK();
1035
0
            }
1036
236
        }
1037
77
    }
1038
#ifndef BE_TEST
1039
    // check no extended schema in output rowset
1040
    for (const auto& column : output->tablet_schema()->columns()) {
1041
        if (column->is_extracted_column()) {
1042
            const auto& name = column->name();
1043
            if (name.find("." + DOC_VALUE_COLUMN_PATH + ".") != std::string::npos ||
1044
                name.find("." + SPARSE_COLUMN_PATH + ".") != std::string::npos ||
1045
                name.ends_with("." + SPARSE_COLUMN_PATH)) {
1046
                continue;
1047
            }
1048
            return Status::InternalError("Unexpected extracted column {} in output rowset",
1049
                                         column->name());
1050
        }
1051
    }
1052
#endif
1053
    // only check path stats for dup_keys since the rows may be merged in other models
1054
32
    if (tablet->keys_type() != KeysType::DUP_KEYS) {
1055
2
        return Status::OK();
1056
2
    }
1057
    // if there is a delete predicate in the input rowsets, we skip the path stats check
1058
66
    for (auto& rowset : intputs) {
1059
66
        if (rowset->rowset_meta()->has_delete_predicate()) {
1060
0
            return Status::OK();
1061
0
        }
1062
66
    }
1063
128
    for (const auto& column : output->tablet_schema()->columns()) {
1064
128
        if (column->is_variant_type() && !should_check_variant_path_stats(*column)) {
1065
0
            return Status::OK();
1066
0
        }
1067
128
    }
1068
30
    std::unordered_map<int32_t, PathToNoneNullValues> original_uid_to_path_stats;
1069
66
    for (const auto& rs : intputs) {
1070
66
        RETURN_IF_ERROR(aggregate_path_to_stats(rs, &original_uid_to_path_stats));
1071
66
    }
1072
30
    std::unordered_map<int32_t, PathToNoneNullValues> output_uid_to_path_stats;
1073
30
    RETURN_IF_ERROR(aggregate_path_to_stats(output, &output_uid_to_path_stats));
1074
30
    for (const auto& [uid, stats] : output_uid_to_path_stats) {
1075
22
        if (output->tablet_schema()->column_by_uid(uid).is_variant_type() &&
1076
22
            output->tablet_schema()->column_by_uid(uid).variant_enable_doc_mode()) {
1077
0
            continue;
1078
0
        }
1079
22
        if (original_uid_to_path_stats.find(uid) == original_uid_to_path_stats.end()) {
1080
0
            return Status::InternalError("Path stats not found for uid {}, tablet_id {}", uid,
1081
0
                                         tablet->tablet_id());
1082
0
        }
1083
1084
        // In input rowsets, some rowsets may have statistics values exceeding the maximum limit,
1085
        // which leads to inaccurate statistics
1086
22
        if (stats.size() > output->tablet_schema()
1087
22
                                   ->column_by_uid(uid)
1088
22
                                   .variant_max_sparse_column_statistics_size()) {
1089
            // When there is only one segment, we can ensure that the size of each path in output stats is accurate
1090
5
            if (output->num_segments() == 1) {
1091
15
                for (const auto& [path, size] : stats) {
1092
15
                    if (original_uid_to_path_stats.at(uid).find(path) ==
1093
15
                        original_uid_to_path_stats.at(uid).end()) {
1094
0
                        continue;
1095
0
                    }
1096
15
                    if (original_uid_to_path_stats.at(uid).at(path) > size) {
1097
0
                        return Status::InternalError(
1098
0
                                "Path stats not smaller for uid {} with path `{}`, input size {}, "
1099
0
                                "output "
1100
0
                                "size {}, "
1101
0
                                "tablet_id {}",
1102
0
                                uid, path, original_uid_to_path_stats.at(uid).at(path), size,
1103
0
                                tablet->tablet_id());
1104
0
                    }
1105
15
                }
1106
5
            }
1107
5
        }
1108
        // in this case, input stats is accurate, so we check the stats size and stats value
1109
17
        else {
1110
75
            for (const auto& [path, size] : stats) {
1111
75
                if (original_uid_to_path_stats.at(uid).find(path) ==
1112
75
                    original_uid_to_path_stats.at(uid).end()) {
1113
0
                    return Status::InternalError(
1114
0
                            "Path stats not found for uid {}, path {}, tablet_id {}", uid, path,
1115
0
                            tablet->tablet_id());
1116
0
                }
1117
75
                if (original_uid_to_path_stats.at(uid).at(path) != size) {
1118
1
                    return Status::InternalError(
1119
1
                            "Path stats not match for uid {} with path `{}`, input size {}, output "
1120
1
                            "size {}, "
1121
1
                            "tablet_id {}",
1122
1
                            uid, path, original_uid_to_path_stats.at(uid).at(path), size,
1123
1
                            tablet->tablet_id());
1124
1
                }
1125
75
            }
1126
17
        }
1127
22
    }
1128
1129
29
    return Status::OK();
1130
30
}
1131
1132
Status VariantCompactionUtil::get_compaction_typed_columns(
1133
        const TabletSchemaSPtr& target, const std::unordered_set<std::string>& typed_paths,
1134
        const TabletColumnPtr parent_column, TabletSchemaSPtr& output_schema,
1135
35
        TabletSchema::PathsSetInfo& paths_set_info) {
1136
35
    if (parent_column->variant_enable_typed_paths_to_sparse()) {
1137
0
        return Status::OK();
1138
0
    }
1139
35
    for (const auto& path : typed_paths) {
1140
20
        TabletSchema::SubColumnInfo sub_column_info;
1141
20
        if (generate_sub_column_info(*target, parent_column->unique_id(), path, &sub_column_info)) {
1142
19
            inherit_column_attributes(*parent_column, sub_column_info.column);
1143
19
            output_schema->append_column(sub_column_info.column);
1144
19
            paths_set_info.typed_path_set.insert({path, std::move(sub_column_info)});
1145
19
            VLOG_DEBUG << "append typed column " << path;
1146
19
        } else {
1147
1
            return Status::InternalError("Failed to generate sub column info for path {}", path);
1148
1
        }
1149
20
    }
1150
34
    return Status::OK();
1151
35
}
1152
1153
Status VariantCompactionUtil::get_compaction_nested_columns(
1154
        const std::unordered_set<PathInData, PathInData::Hash>& nested_paths,
1155
        const PathToDataTypes& path_to_data_types, const TabletColumnPtr parent_column,
1156
34
        TabletSchemaSPtr& output_schema, TabletSchema::PathsSetInfo& paths_set_info) {
1157
34
    const auto& parent_indexes = output_schema->inverted_indexs(parent_column->unique_id());
1158
34
    for (const auto& path : nested_paths) {
1159
3
        const auto& find_data_types = path_to_data_types.find(path);
1160
3
        if (find_data_types == path_to_data_types.end() || find_data_types->second.empty()) {
1161
1
            return Status::InternalError("Nested path {} has no data type", path.get_path());
1162
1
        }
1163
2
        DataTypePtr data_type;
1164
2
        get_least_supertype_jsonb(find_data_types->second, &data_type);
1165
1166
2
        const std::string& column_name = parent_column->name_lower_case() + "." + path.get_path();
1167
2
        PathInDataBuilder full_path_builder;
1168
2
        auto full_path = full_path_builder.append(parent_column->name_lower_case(), false)
1169
2
                                 .append(path.get_parts(), false)
1170
2
                                 .build();
1171
2
        TabletColumn nested_column =
1172
2
                get_column_by_type(data_type, column_name,
1173
2
                                   ExtraInfo {.unique_id = -1,
1174
2
                                              .parent_unique_id = parent_column->unique_id(),
1175
2
                                              .path_info = full_path});
1176
2
        inherit_column_attributes(*parent_column, nested_column);
1177
2
        TabletIndexes sub_column_indexes;
1178
2
        inherit_index(parent_indexes, sub_column_indexes, nested_column);
1179
2
        paths_set_info.subcolumn_indexes.emplace(path.get_path(), std::move(sub_column_indexes));
1180
2
        output_schema->append_column(nested_column);
1181
2
        VLOG_DEBUG << "append nested column " << path.get_path();
1182
2
    }
1183
33
    return Status::OK();
1184
34
}
1185
1186
void VariantCompactionUtil::get_compaction_subcolumns_from_subpaths(
1187
        TabletSchema::PathsSetInfo& paths_set_info, const TabletColumnPtr parent_column,
1188
        const TabletSchemaSPtr& target, const PathToDataTypes& path_to_data_types,
1189
38
        const std::unordered_set<std::string>& sparse_paths, TabletSchemaSPtr& output_schema) {
1190
38
    auto& path_set = paths_set_info.sub_path_set;
1191
38
    std::vector<StringRef> sorted_subpaths(path_set.begin(), path_set.end());
1192
38
    std::sort(sorted_subpaths.begin(), sorted_subpaths.end());
1193
38
    const auto& parent_indexes = target->inverted_indexs(parent_column->unique_id());
1194
    // append subcolumns
1195
62
    for (const auto& subpath : sorted_subpaths) {
1196
62
        auto column_name = parent_column->name_lower_case() + "." + subpath.to_string();
1197
62
        auto column_path = PathInData(column_name);
1198
1199
62
        const auto& find_data_types = path_to_data_types.find(PathInData(subpath));
1200
1201
        // some cases: the subcolumn type is variant
1202
        // 1. this path has no data type in segments
1203
        // 2. this path is in sparse paths
1204
        // 3. the sparse paths are too much
1205
62
        TabletSchema::SubColumnInfo sub_column_info;
1206
62
        if (parent_column->variant_enable_typed_paths_to_sparse() &&
1207
62
            generate_sub_column_info(*target, parent_column->unique_id(), std::string(subpath),
1208
16
                                     &sub_column_info)) {
1209
8
            inherit_column_attributes(*parent_column, sub_column_info.column);
1210
8
            output_schema->append_column(sub_column_info.column);
1211
8
            paths_set_info.subcolumn_indexes.emplace(subpath, std::move(sub_column_info.indexes));
1212
8
            VLOG_DEBUG << "append typed column " << subpath;
1213
54
        } else if (find_data_types == path_to_data_types.end() || find_data_types->second.empty() ||
1214
54
                   sparse_paths.find(std::string(subpath)) != sparse_paths.end() ||
1215
54
                   sparse_paths.size() >=
1216
42
                           parent_column->variant_max_sparse_column_statistics_size()) {
1217
19
            TabletColumn subcolumn;
1218
19
            subcolumn.set_name(column_name);
1219
19
            subcolumn.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
1220
19
            subcolumn.set_parent_unique_id(parent_column->unique_id());
1221
19
            subcolumn.set_path_info(column_path);
1222
19
            subcolumn.set_aggregation_method(parent_column->aggregation());
1223
19
            subcolumn.set_variant_max_subcolumns_count(
1224
19
                    parent_column->variant_max_subcolumns_count());
1225
19
            subcolumn.set_variant_enable_doc_mode(parent_column->variant_enable_doc_mode());
1226
19
            subcolumn.set_is_nullable(true);
1227
19
            output_schema->append_column(subcolumn);
1228
19
            VLOG_DEBUG << "append sub column " << subpath << " data type "
1229
0
                       << "VARIANT";
1230
19
        }
1231
        // normal case: the subcolumn type can be calculated from the data types in segments
1232
35
        else {
1233
35
            DataTypePtr data_type;
1234
35
            get_least_supertype_jsonb(find_data_types->second, &data_type);
1235
35
            TabletColumn sub_column =
1236
35
                    get_column_by_type(data_type, column_name,
1237
35
                                       ExtraInfo {.unique_id = -1,
1238
35
                                                  .parent_unique_id = parent_column->unique_id(),
1239
35
                                                  .path_info = column_path});
1240
35
            inherit_column_attributes(*parent_column, sub_column);
1241
35
            TabletIndexes sub_column_indexes;
1242
35
            inherit_index(parent_indexes, sub_column_indexes, sub_column);
1243
35
            paths_set_info.subcolumn_indexes.emplace(subpath, std::move(sub_column_indexes));
1244
35
            output_schema->append_column(sub_column);
1245
35
            VLOG_DEBUG << "append sub column " << subpath << " data type " << data_type->get_name();
1246
35
        }
1247
62
    }
1248
38
}
1249
1250
void VariantCompactionUtil::get_compaction_subcolumns_from_data_types(
1251
        TabletSchema::PathsSetInfo& paths_set_info, const TabletColumnPtr parent_column,
1252
        const TabletSchemaSPtr& target, const PathToDataTypes& path_to_data_types,
1253
4
        TabletSchemaSPtr& output_schema) {
1254
4
    const auto& parent_indexes = target->inverted_indexs(parent_column->unique_id());
1255
8
    for (const auto& [path, data_types] : path_to_data_types) {
1256
        // Typed paths are materialized by get_compaction_typed_columns(); this helper only
1257
        // materializes regular subcolumns inferred from rowset data types.
1258
8
        if (data_types.empty() || path.empty() || path.get_is_typed() || path.has_nested_part()) {
1259
2
            continue;
1260
2
        }
1261
6
        DataTypePtr data_type;
1262
6
        get_least_supertype_jsonb(data_types, &data_type);
1263
6
        auto column_name = parent_column->name_lower_case() + "." + path.get_path();
1264
6
        auto column_path = PathInData(column_name);
1265
6
        TabletColumn sub_column =
1266
6
                get_column_by_type(data_type, column_name,
1267
6
                                   ExtraInfo {.unique_id = -1,
1268
6
                                              .parent_unique_id = parent_column->unique_id(),
1269
6
                                              .path_info = column_path});
1270
6
        inherit_column_attributes(*parent_column, sub_column);
1271
6
        TabletIndexes sub_column_indexes;
1272
6
        inherit_index(parent_indexes, sub_column_indexes, sub_column);
1273
6
        paths_set_info.sub_path_set.emplace(path.get_path());
1274
6
        paths_set_info.subcolumn_indexes.emplace(path.get_path(), std::move(sub_column_indexes));
1275
6
        output_schema->append_column(sub_column);
1276
6
        VLOG_DEBUG << "append sub column " << path.get_path() << " data type "
1277
0
                   << data_type->get_name();
1278
6
    }
1279
4
}
1280
1281
// Build the temporary schema for compaction.
1282
// NestedGroup roots are special: the root VARIANT column owns the NG tree and the streaming NG
1283
// writer handles NG children, while regular non-NG paths beside the arrays are materialized as
1284
// ordinary extracted subcolumns. NG typed paths still use get_compaction_typed_columns(), keeping
1285
// typed-column rules out of the NG-specific regular-path filtering.
1286
Status VariantCompactionUtil::get_extended_compaction_schema(
1287
67
        const std::vector<RowsetSharedPtr>& rowsets, TabletSchemaSPtr& target) {
1288
67
    std::unordered_map<int32_t, VariantExtendedInfo> uid_to_variant_extended_info;
1289
67
    const bool needs_variant_extended_info =
1290
872
            std::ranges::any_of(target->columns(), [](const TabletColumnPtr& column) {
1291
872
                return column->is_variant_type() && (should_check_variant_path_stats(*column) ||
1292
30
                                                     column->variant_enable_nested_group());
1293
872
            });
1294
67
    if (needs_variant_extended_info) {
1295
        // collect path stats from all rowsets and segments
1296
59
        for (const auto& rs : rowsets) {
1297
59
            RETURN_IF_ERROR(aggregate_variant_extended_info(rs, &uid_to_variant_extended_info));
1298
59
        }
1299
30
    }
1300
1301
    // build the output schema
1302
67
    TabletSchemaSPtr output_schema = std::make_shared<TabletSchema>();
1303
67
    output_schema->shawdow_copy_without_columns(*target);
1304
67
    std::unordered_map<int32_t, TabletSchema::PathsSetInfo> uid_to_paths_set_info;
1305
67
    const auto ng_root_uids =
1306
67
            collect_nested_group_compaction_root_uids(target, uid_to_variant_extended_info);
1307
881
    for (const TabletColumnPtr& column : target->columns()) {
1308
881
        if (!column->is_extracted_column()) {
1309
880
            output_schema->append_column(*column);
1310
880
        }
1311
881
        if (!column->is_variant_type()) {
1312
847
            continue;
1313
847
        }
1314
34
        VLOG_DEBUG << "column " << column->name() << " unique id " << column->unique_id();
1315
1316
34
        const auto info_it = uid_to_variant_extended_info.find(column->unique_id());
1317
34
        const VariantExtendedInfo empty_extended_info;
1318
34
        const VariantExtendedInfo& extended_info = info_it == uid_to_variant_extended_info.end()
1319
34
                                                           ? empty_extended_info
1320
34
                                                           : info_it->second;
1321
34
        auto& paths_set_info = uid_to_paths_set_info[column->unique_id()];
1322
34
        const bool use_nested_group_compaction_schema = ng_root_uids.contains(column->unique_id());
1323
1324
34
        if (use_nested_group_compaction_schema) {
1325
            // 1. append typed columns. Keep this shared with the non-NG typed helper; only the
1326
            // regular-path selection below is NG-specific.
1327
1
            RETURN_IF_ERROR(get_compaction_typed_columns(target, extended_info.typed_paths, column,
1328
1
                                                         output_schema, paths_set_info));
1329
1330
            // NG roots do not record path-count stats for ordinary Variant paths, so their regular
1331
            // non-NG subcolumns use the same data-types materialization helper as the
1332
            // all-materialized non-NG branch below.
1333
1
            auto regular_path_to_data_types =
1334
1
                    collect_regular_types_outside_nested_group(extended_info);
1335
1
            get_compaction_subcolumns_from_data_types(paths_set_info, column, target,
1336
1
                                                      regular_path_to_data_types, output_schema);
1337
1
            LOG(INFO) << "Variant column uid=" << column->unique_id()
1338
1
                      << " keeps nested-group root and materializes regular non-NG subcolumns in "
1339
1
                         "compaction schema";
1340
1
            continue;
1341
1
        }
1342
1343
33
        if (column->variant_enable_doc_mode()) {
1344
1
            const int bucket_num = std::max(1, column->variant_doc_hash_shard_count());
1345
3
            for (int b = 0; b < bucket_num; ++b) {
1346
2
                TabletColumn doc_value_bucket_column = create_doc_value_column(*column, b);
1347
2
                doc_value_bucket_column.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
1348
2
                doc_value_bucket_column.set_is_nullable(false);
1349
2
                doc_value_bucket_column.set_variant_enable_doc_mode(true);
1350
2
                output_schema->append_column(doc_value_bucket_column);
1351
2
            }
1352
1
            continue;
1353
1
        }
1354
1355
        // 1. append typed columns
1356
32
        RETURN_IF_ERROR(get_compaction_typed_columns(target, extended_info.typed_paths, column,
1357
32
                                                     output_schema, paths_set_info));
1358
1359
        // 2. append nested columns
1360
32
        RETURN_IF_ERROR(get_compaction_nested_columns(extended_info.nested_paths,
1361
32
                                                      extended_info.path_to_data_types, column,
1362
32
                                                      output_schema, paths_set_info));
1363
1364
        // 3. get the subpaths
1365
32
        get_subpaths(column->variant_max_subcolumns_count(), extended_info.path_to_none_null_values,
1366
32
                     paths_set_info);
1367
1368
        // 4. append subcolumns
1369
32
        if (column->variant_max_subcolumns_count() > 0 || !column->get_sub_columns().empty()) {
1370
30
            get_compaction_subcolumns_from_subpaths(paths_set_info, column, target,
1371
30
                                                    extended_info.path_to_data_types,
1372
30
                                                    extended_info.sparse_paths, output_schema);
1373
30
        }
1374
        // variant_max_subcolumns_count == 0 and no typed paths materialized
1375
        // it means that all subcolumns are materialized, may be from old data
1376
2
        else {
1377
2
            get_compaction_subcolumns_from_data_types(paths_set_info, column, target,
1378
2
                                                      extended_info.path_to_data_types,
1379
2
                                                      output_schema);
1380
2
        }
1381
1382
        // append sparse column(s)
1383
        // If variant uses bucketized sparse columns, append one sparse bucket column per bucket.
1384
        // Otherwise, append the single sparse column.
1385
32
        int bucket_num = std::max(1, column->variant_sparse_hash_shard_count());
1386
32
        if (bucket_num > 1) {
1387
9
            for (int b = 0; b < bucket_num; ++b) {
1388
6
                TabletColumn sparse_bucket_column = create_sparse_shard_column(*column, b);
1389
6
                output_schema->append_column(sparse_bucket_column);
1390
6
            }
1391
29
        } else {
1392
29
            TabletColumn sparse_column = create_sparse_column(*column);
1393
29
            output_schema->append_column(sparse_column);
1394
29
        }
1395
32
    }
1396
1397
67
    target = output_schema;
1398
    // used to merge & filter path to sparse column during reading in compaction
1399
67
    target->set_path_set_info(std::move(uid_to_paths_set_info));
1400
67
    VLOG_DEBUG << "dump schema " << target->dump_full_schema();
1401
67
    return Status::OK();
1402
67
}
1403
1404
// Calculate statistics about variant data paths from the encoded sparse column
1405
void VariantCompactionUtil::calculate_variant_stats(const IColumn& encoded_sparse_column,
1406
                                                    segment_v2::VariantStatisticsPB* stats,
1407
                                                    size_t max_sparse_column_statistics_size,
1408
28
                                                    size_t row_pos, size_t num_rows) {
1409
    // Cast input column to ColumnMap type since sparse column is stored as a map
1410
28
    const auto& map_column = assert_cast<const ColumnMap&>(encoded_sparse_column);
1411
1412
    // Get the keys column which contains the paths as strings
1413
28
    const auto& sparse_data_paths =
1414
28
            assert_cast<const ColumnString*>(map_column.get_keys_ptr().get());
1415
28
    const auto& serialized_sparse_column_offsets = map_column.get_offsets();
1416
28
    auto& count_map = *stats->mutable_sparse_column_non_null_size();
1417
    // Iterate through all paths in the sparse column
1418
150
    for (size_t i = row_pos; i != row_pos + num_rows; ++i) {
1419
122
        size_t offset = serialized_sparse_column_offsets[i - 1];
1420
122
        size_t end = serialized_sparse_column_offsets[i];
1421
200
        for (size_t j = offset; j != end; ++j) {
1422
78
            auto path = sparse_data_paths->get_data_at(j);
1423
1424
78
            const auto& sparse_path = path.to_string();
1425
            // If path already exists in statistics, increment its count
1426
78
            if (auto it = count_map.find(sparse_path); it != count_map.end()) {
1427
29
                ++it->second;
1428
29
            }
1429
            // If path doesn't exist and we haven't hit the max statistics size limit,
1430
            // add it with count 1
1431
49
            else if (count_map.size() < max_sparse_column_statistics_size) {
1432
31
                count_map.emplace(sparse_path, 1);
1433
31
            }
1434
78
        }
1435
122
    }
1436
1437
28
    if (stats->sparse_column_non_null_size().size() > max_sparse_column_statistics_size) {
1438
0
        throw doris::Exception(
1439
0
                ErrorCode::INTERNAL_ERROR,
1440
0
                "Sparse column non null size: {} is greater than max statistics size: {}",
1441
0
                stats->sparse_column_non_null_size().size(), max_sparse_column_statistics_size);
1442
0
    }
1443
28
}
1444
1445
/// Calculates number of dimensions in array field.
1446
/// Returns 0 for scalar fields.
1447
class FieldVisitorToNumberOfDimensions : public StaticVisitor<size_t> {
1448
public:
1449
    FieldVisitorToNumberOfDimensions() = default;
1450
    template <PrimitiveType T>
1451
2.40M
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1452
2.40M
        if constexpr (T == TYPE_ARRAY) {
1453
128k
            const size_t size = x.size();
1454
128k
            size_t dimensions = 0;
1455
873k
            for (size_t i = 0; i < size; ++i) {
1456
745k
                size_t element_dimensions = apply_visitor(*this, x[i]);
1457
745k
                dimensions = std::max(dimensions, element_dimensions);
1458
745k
            }
1459
128k
            return 1 + dimensions;
1460
2.28M
        } else {
1461
2.28M
            return 0;
1462
2.28M
        }
1463
2.40M
    }
_ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE1EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1451
24.0k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1452
        if constexpr (T == TYPE_ARRAY) {
1453
            const size_t size = x.size();
1454
            size_t dimensions = 0;
1455
            for (size_t i = 0; i < size; ++i) {
1456
                size_t element_dimensions = apply_visitor(*this, x[i]);
1457
                dimensions = std::max(dimensions, element_dimensions);
1458
            }
1459
            return 1 + dimensions;
1460
24.0k
        } else {
1461
24.0k
            return 0;
1462
24.0k
        }
1463
24.0k
    }
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE26EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE42EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE7EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1451
40.9k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1452
        if constexpr (T == TYPE_ARRAY) {
1453
            const size_t size = x.size();
1454
            size_t dimensions = 0;
1455
            for (size_t i = 0; i < size; ++i) {
1456
                size_t element_dimensions = apply_visitor(*this, x[i]);
1457
                dimensions = std::max(dimensions, element_dimensions);
1458
            }
1459
            return 1 + dimensions;
1460
40.9k
        } else {
1461
40.9k
            return 0;
1462
40.9k
        }
1463
40.9k
    }
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE12EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE11EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE25EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE2EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1451
69.6k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1452
        if constexpr (T == TYPE_ARRAY) {
1453
            const size_t size = x.size();
1454
            size_t dimensions = 0;
1455
            for (size_t i = 0; i < size; ++i) {
1456
                size_t element_dimensions = apply_visitor(*this, x[i]);
1457
                dimensions = std::max(dimensions, element_dimensions);
1458
            }
1459
            return 1 + dimensions;
1460
69.6k
        } else {
1461
69.6k
            return 0;
1462
69.6k
        }
1463
69.6k
    }
_ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE3EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1451
6
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1452
        if constexpr (T == TYPE_ARRAY) {
1453
            const size_t size = x.size();
1454
            size_t dimensions = 0;
1455
            for (size_t i = 0; i < size; ++i) {
1456
                size_t element_dimensions = apply_visitor(*this, x[i]);
1457
                dimensions = std::max(dimensions, element_dimensions);
1458
            }
1459
            return 1 + dimensions;
1460
6
        } else {
1461
6
            return 0;
1462
6
        }
1463
6
    }
_ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE4EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1451
7
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1452
        if constexpr (T == TYPE_ARRAY) {
1453
            const size_t size = x.size();
1454
            size_t dimensions = 0;
1455
            for (size_t i = 0; i < size; ++i) {
1456
                size_t element_dimensions = apply_visitor(*this, x[i]);
1457
                dimensions = std::max(dimensions, element_dimensions);
1458
            }
1459
            return 1 + dimensions;
1460
7
        } else {
1461
7
            return 0;
1462
7
        }
1463
7
    }
_ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE5EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1451
1.20k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1452
        if constexpr (T == TYPE_ARRAY) {
1453
            const size_t size = x.size();
1454
            size_t dimensions = 0;
1455
            for (size_t i = 0; i < size; ++i) {
1456
                size_t element_dimensions = apply_visitor(*this, x[i]);
1457
                dimensions = std::max(dimensions, element_dimensions);
1458
            }
1459
            return 1 + dimensions;
1460
1.20k
        } else {
1461
1.20k
            return 0;
1462
1.20k
        }
1463
1.20k
    }
_ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE6EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1451
1.00M
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1452
        if constexpr (T == TYPE_ARRAY) {
1453
            const size_t size = x.size();
1454
            size_t dimensions = 0;
1455
            for (size_t i = 0; i < size; ++i) {
1456
                size_t element_dimensions = apply_visitor(*this, x[i]);
1457
                dimensions = std::max(dimensions, element_dimensions);
1458
            }
1459
            return 1 + dimensions;
1460
1.00M
        } else {
1461
1.00M
            return 0;
1462
1.00M
        }
1463
1.00M
    }
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE38EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE39EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE8EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1451
1
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1452
        if constexpr (T == TYPE_ARRAY) {
1453
            const size_t size = x.size();
1454
            size_t dimensions = 0;
1455
            for (size_t i = 0; i < size; ++i) {
1456
                size_t element_dimensions = apply_visitor(*this, x[i]);
1457
                dimensions = std::max(dimensions, element_dimensions);
1458
            }
1459
            return 1 + dimensions;
1460
1
        } else {
1461
1
            return 0;
1462
1
        }
1463
1
    }
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE27EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE9EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1451
164k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1452
        if constexpr (T == TYPE_ARRAY) {
1453
            const size_t size = x.size();
1454
            size_t dimensions = 0;
1455
            for (size_t i = 0; i < size; ++i) {
1456
                size_t element_dimensions = apply_visitor(*this, x[i]);
1457
                dimensions = std::max(dimensions, element_dimensions);
1458
            }
1459
            return 1 + dimensions;
1460
164k
        } else {
1461
164k
            return 0;
1462
164k
        }
1463
164k
    }
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE36EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE37EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE23EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1451
976k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1452
        if constexpr (T == TYPE_ARRAY) {
1453
            const size_t size = x.size();
1454
            size_t dimensions = 0;
1455
            for (size_t i = 0; i < size; ++i) {
1456
                size_t element_dimensions = apply_visitor(*this, x[i]);
1457
                dimensions = std::max(dimensions, element_dimensions);
1458
            }
1459
            return 1 + dimensions;
1460
976k
        } else {
1461
976k
            return 0;
1462
976k
        }
1463
976k
    }
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE15EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE10EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE41EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE17EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1451
128k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1452
128k
        if constexpr (T == TYPE_ARRAY) {
1453
128k
            const size_t size = x.size();
1454
128k
            size_t dimensions = 0;
1455
873k
            for (size_t i = 0; i < size; ++i) {
1456
745k
                size_t element_dimensions = apply_visitor(*this, x[i]);
1457
745k
                dimensions = std::max(dimensions, element_dimensions);
1458
745k
            }
1459
128k
            return 1 + dimensions;
1460
        } else {
1461
            return 0;
1462
        }
1463
128k
    }
_ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE16EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1451
1
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1452
        if constexpr (T == TYPE_ARRAY) {
1453
            const size_t size = x.size();
1454
            size_t dimensions = 0;
1455
            for (size_t i = 0; i < size; ++i) {
1456
                size_t element_dimensions = apply_visitor(*this, x[i]);
1457
                dimensions = std::max(dimensions, element_dimensions);
1458
            }
1459
            return 1 + dimensions;
1460
1
        } else {
1461
1
            return 0;
1462
1
        }
1463
1
    }
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE18EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE32EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1451
1
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1452
        if constexpr (T == TYPE_ARRAY) {
1453
            const size_t size = x.size();
1454
            size_t dimensions = 0;
1455
            for (size_t i = 0; i < size; ++i) {
1456
                size_t element_dimensions = apply_visitor(*this, x[i]);
1457
                dimensions = std::max(dimensions, element_dimensions);
1458
            }
1459
            return 1 + dimensions;
1460
1
        } else {
1461
1
            return 0;
1462
1
        }
1463
1
    }
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE28EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE29EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE20EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE30EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE35EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE22EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE19EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE24EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE31EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1451
31
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1452
        if constexpr (T == TYPE_ARRAY) {
1453
            const size_t size = x.size();
1454
            size_t dimensions = 0;
1455
            for (size_t i = 0; i < size; ++i) {
1456
                size_t element_dimensions = apply_visitor(*this, x[i]);
1457
                dimensions = std::max(dimensions, element_dimensions);
1458
            }
1459
            return 1 + dimensions;
1460
31
        } else {
1461
31
            return 0;
1462
31
        }
1463
31
    }
1464
};
1465
1466
// Visitor that allows to get type of scalar field
1467
// but exclude fields contain complex field.This is a faster version
1468
// for FieldVisitorToScalarType which does not support complex field.
1469
class SimpleFieldVisitorToScalarType : public StaticVisitor<size_t> {
1470
public:
1471
    template <PrimitiveType T>
1472
1.58M
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1473
1.58M
        if constexpr (T == TYPE_ARRAY) {
1474
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Array type is not supported");
1475
12.3k
        } else if constexpr (T == TYPE_NULL) {
1476
12.3k
            have_nulls = true;
1477
12.3k
            return 1;
1478
1.57M
        } else {
1479
1.57M
            type = T;
1480
1.57M
            return 1;
1481
1.57M
        }
1482
1.58M
    }
_ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE1EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1472
12.3k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1473
        if constexpr (T == TYPE_ARRAY) {
1474
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Array type is not supported");
1475
12.3k
        } else if constexpr (T == TYPE_NULL) {
1476
12.3k
            have_nulls = true;
1477
12.3k
            return 1;
1478
        } else {
1479
            type = T;
1480
            return 1;
1481
        }
1482
12.3k
    }
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE26EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE42EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE7EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1472
12.3k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1473
        if constexpr (T == TYPE_ARRAY) {
1474
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Array type is not supported");
1475
        } else if constexpr (T == TYPE_NULL) {
1476
            have_nulls = true;
1477
            return 1;
1478
12.3k
        } else {
1479
12.3k
            type = T;
1480
12.3k
            return 1;
1481
12.3k
        }
1482
12.3k
    }
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE12EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE11EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE25EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE2EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1472
12.4k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1473
        if constexpr (T == TYPE_ARRAY) {
1474
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Array type is not supported");
1475
        } else if constexpr (T == TYPE_NULL) {
1476
            have_nulls = true;
1477
            return 1;
1478
12.4k
        } else {
1479
12.4k
            type = T;
1480
12.4k
            return 1;
1481
12.4k
        }
1482
12.4k
    }
_ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE3EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1472
2
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1473
        if constexpr (T == TYPE_ARRAY) {
1474
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Array type is not supported");
1475
        } else if constexpr (T == TYPE_NULL) {
1476
            have_nulls = true;
1477
            return 1;
1478
2
        } else {
1479
2
            type = T;
1480
2
            return 1;
1481
2
        }
1482
2
    }
_ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE4EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1472
7
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1473
        if constexpr (T == TYPE_ARRAY) {
1474
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Array type is not supported");
1475
        } else if constexpr (T == TYPE_NULL) {
1476
            have_nulls = true;
1477
            return 1;
1478
7
        } else {
1479
7
            type = T;
1480
7
            return 1;
1481
7
        }
1482
7
    }
_ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE5EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1472
676
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1473
        if constexpr (T == TYPE_ARRAY) {
1474
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Array type is not supported");
1475
        } else if constexpr (T == TYPE_NULL) {
1476
            have_nulls = true;
1477
            return 1;
1478
676
        } else {
1479
676
            type = T;
1480
676
            return 1;
1481
676
        }
1482
676
    }
_ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE6EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1472
855k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1473
        if constexpr (T == TYPE_ARRAY) {
1474
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Array type is not supported");
1475
        } else if constexpr (T == TYPE_NULL) {
1476
            have_nulls = true;
1477
            return 1;
1478
855k
        } else {
1479
855k
            type = T;
1480
855k
            return 1;
1481
855k
        }
1482
855k
    }
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE38EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE39EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE8EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1472
1
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1473
        if constexpr (T == TYPE_ARRAY) {
1474
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Array type is not supported");
1475
        } else if constexpr (T == TYPE_NULL) {
1476
            have_nulls = true;
1477
            return 1;
1478
1
        } else {
1479
1
            type = T;
1480
1
            return 1;
1481
1
        }
1482
1
    }
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE27EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE9EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1472
12.7k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1473
        if constexpr (T == TYPE_ARRAY) {
1474
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Array type is not supported");
1475
        } else if constexpr (T == TYPE_NULL) {
1476
            have_nulls = true;
1477
            return 1;
1478
12.7k
        } else {
1479
12.7k
            type = T;
1480
12.7k
            return 1;
1481
12.7k
        }
1482
12.7k
    }
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE36EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE37EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE23EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1472
679k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1473
        if constexpr (T == TYPE_ARRAY) {
1474
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Array type is not supported");
1475
        } else if constexpr (T == TYPE_NULL) {
1476
            have_nulls = true;
1477
            return 1;
1478
679k
        } else {
1479
679k
            type = T;
1480
679k
            return 1;
1481
679k
        }
1482
679k
    }
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE15EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE10EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE41EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE17EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE16EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE18EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE32EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE28EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE29EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE20EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE30EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE35EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE22EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE19EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE24EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE31EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1472
5
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1473
        if constexpr (T == TYPE_ARRAY) {
1474
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Array type is not supported");
1475
        } else if constexpr (T == TYPE_NULL) {
1476
            have_nulls = true;
1477
            return 1;
1478
5
        } else {
1479
5
            type = T;
1480
5
            return 1;
1481
5
        }
1482
5
    }
1483
1.58M
    void get_scalar_type(PrimitiveType* data_type) const { *data_type = type; }
1484
1.58M
    bool contain_nulls() const { return have_nulls; }
1485
1486
1.58M
    bool need_convert_field() const { return false; }
1487
1488
private:
1489
    PrimitiveType type = PrimitiveType::INVALID_TYPE;
1490
    bool have_nulls = false;
1491
};
1492
1493
/// Visitor that allows to get type of scalar field
1494
/// or least common type of scalars in array.
1495
/// More optimized version of FieldToDataType.
1496
class FieldVisitorToScalarType : public StaticVisitor<size_t> {
1497
public:
1498
    template <PrimitiveType T>
1499
823k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1500
823k
        if constexpr (T == TYPE_ARRAY) {
1501
128k
            size_t size = x.size();
1502
873k
            for (size_t i = 0; i < size; ++i) {
1503
745k
                apply_visitor(*this, x[i]);
1504
745k
            }
1505
128k
            return 0;
1506
128k
        } else if constexpr (T == TYPE_NULL) {
1507
11.6k
            have_nulls = true;
1508
11.6k
            return 0;
1509
683k
        } else {
1510
683k
            field_types.insert(T);
1511
683k
            type_indexes.insert(T);
1512
683k
            return 0;
1513
683k
        }
1514
823k
    }
_ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE1EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1499
11.6k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1500
        if constexpr (T == TYPE_ARRAY) {
1501
            size_t size = x.size();
1502
            for (size_t i = 0; i < size; ++i) {
1503
                apply_visitor(*this, x[i]);
1504
            }
1505
            return 0;
1506
11.6k
        } else if constexpr (T == TYPE_NULL) {
1507
11.6k
            have_nulls = true;
1508
11.6k
            return 0;
1509
        } else {
1510
            field_types.insert(T);
1511
            type_indexes.insert(T);
1512
            return 0;
1513
        }
1514
11.6k
    }
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE26EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE42EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE7EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1499
28.6k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1500
        if constexpr (T == TYPE_ARRAY) {
1501
            size_t size = x.size();
1502
            for (size_t i = 0; i < size; ++i) {
1503
                apply_visitor(*this, x[i]);
1504
            }
1505
            return 0;
1506
        } else if constexpr (T == TYPE_NULL) {
1507
            have_nulls = true;
1508
            return 0;
1509
28.6k
        } else {
1510
28.6k
            field_types.insert(T);
1511
28.6k
            type_indexes.insert(T);
1512
28.6k
            return 0;
1513
28.6k
        }
1514
28.6k
    }
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE12EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE11EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE25EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE2EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1499
57.2k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1500
        if constexpr (T == TYPE_ARRAY) {
1501
            size_t size = x.size();
1502
            for (size_t i = 0; i < size; ++i) {
1503
                apply_visitor(*this, x[i]);
1504
            }
1505
            return 0;
1506
        } else if constexpr (T == TYPE_NULL) {
1507
            have_nulls = true;
1508
            return 0;
1509
57.2k
        } else {
1510
57.2k
            field_types.insert(T);
1511
57.2k
            type_indexes.insert(T);
1512
57.2k
            return 0;
1513
57.2k
        }
1514
57.2k
    }
_ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE3EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1499
4
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1500
        if constexpr (T == TYPE_ARRAY) {
1501
            size_t size = x.size();
1502
            for (size_t i = 0; i < size; ++i) {
1503
                apply_visitor(*this, x[i]);
1504
            }
1505
            return 0;
1506
        } else if constexpr (T == TYPE_NULL) {
1507
            have_nulls = true;
1508
            return 0;
1509
4
        } else {
1510
4
            field_types.insert(T);
1511
4
            type_indexes.insert(T);
1512
4
            return 0;
1513
4
        }
1514
4
    }
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE4EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE5EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1499
531
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1500
        if constexpr (T == TYPE_ARRAY) {
1501
            size_t size = x.size();
1502
            for (size_t i = 0; i < size; ++i) {
1503
                apply_visitor(*this, x[i]);
1504
            }
1505
            return 0;
1506
        } else if constexpr (T == TYPE_NULL) {
1507
            have_nulls = true;
1508
            return 0;
1509
531
        } else {
1510
531
            field_types.insert(T);
1511
531
            type_indexes.insert(T);
1512
531
            return 0;
1513
531
        }
1514
531
    }
_ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE6EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1499
147k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1500
        if constexpr (T == TYPE_ARRAY) {
1501
            size_t size = x.size();
1502
            for (size_t i = 0; i < size; ++i) {
1503
                apply_visitor(*this, x[i]);
1504
            }
1505
            return 0;
1506
        } else if constexpr (T == TYPE_NULL) {
1507
            have_nulls = true;
1508
            return 0;
1509
147k
        } else {
1510
147k
            field_types.insert(T);
1511
147k
            type_indexes.insert(T);
1512
147k
            return 0;
1513
147k
        }
1514
147k
    }
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE38EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE39EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE8EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE27EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE9EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1499
151k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1500
        if constexpr (T == TYPE_ARRAY) {
1501
            size_t size = x.size();
1502
            for (size_t i = 0; i < size; ++i) {
1503
                apply_visitor(*this, x[i]);
1504
            }
1505
            return 0;
1506
        } else if constexpr (T == TYPE_NULL) {
1507
            have_nulls = true;
1508
            return 0;
1509
151k
        } else {
1510
151k
            field_types.insert(T);
1511
151k
            type_indexes.insert(T);
1512
151k
            return 0;
1513
151k
        }
1514
151k
    }
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE36EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE37EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE23EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1499
297k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1500
        if constexpr (T == TYPE_ARRAY) {
1501
            size_t size = x.size();
1502
            for (size_t i = 0; i < size; ++i) {
1503
                apply_visitor(*this, x[i]);
1504
            }
1505
            return 0;
1506
        } else if constexpr (T == TYPE_NULL) {
1507
            have_nulls = true;
1508
            return 0;
1509
297k
        } else {
1510
297k
            field_types.insert(T);
1511
297k
            type_indexes.insert(T);
1512
297k
            return 0;
1513
297k
        }
1514
297k
    }
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE15EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE10EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE41EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE17EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1499
128k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1500
128k
        if constexpr (T == TYPE_ARRAY) {
1501
128k
            size_t size = x.size();
1502
873k
            for (size_t i = 0; i < size; ++i) {
1503
745k
                apply_visitor(*this, x[i]);
1504
745k
            }
1505
128k
            return 0;
1506
        } else if constexpr (T == TYPE_NULL) {
1507
            have_nulls = true;
1508
            return 0;
1509
        } else {
1510
            field_types.insert(T);
1511
            type_indexes.insert(T);
1512
            return 0;
1513
        }
1514
128k
    }
_ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE16EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1499
1
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1500
        if constexpr (T == TYPE_ARRAY) {
1501
            size_t size = x.size();
1502
            for (size_t i = 0; i < size; ++i) {
1503
                apply_visitor(*this, x[i]);
1504
            }
1505
            return 0;
1506
        } else if constexpr (T == TYPE_NULL) {
1507
            have_nulls = true;
1508
            return 0;
1509
1
        } else {
1510
1
            field_types.insert(T);
1511
1
            type_indexes.insert(T);
1512
1
            return 0;
1513
1
        }
1514
1
    }
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE18EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE32EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1499
1
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1500
        if constexpr (T == TYPE_ARRAY) {
1501
            size_t size = x.size();
1502
            for (size_t i = 0; i < size; ++i) {
1503
                apply_visitor(*this, x[i]);
1504
            }
1505
            return 0;
1506
        } else if constexpr (T == TYPE_NULL) {
1507
            have_nulls = true;
1508
            return 0;
1509
1
        } else {
1510
1
            field_types.insert(T);
1511
1
            type_indexes.insert(T);
1512
1
            return 0;
1513
1
        }
1514
1
    }
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE28EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE29EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE20EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE30EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE35EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE22EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE19EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE24EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE31EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1499
26
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1500
        if constexpr (T == TYPE_ARRAY) {
1501
            size_t size = x.size();
1502
            for (size_t i = 0; i < size; ++i) {
1503
                apply_visitor(*this, x[i]);
1504
            }
1505
            return 0;
1506
        } else if constexpr (T == TYPE_NULL) {
1507
            have_nulls = true;
1508
            return 0;
1509
26
        } else {
1510
26
            field_types.insert(T);
1511
26
            type_indexes.insert(T);
1512
26
            return 0;
1513
26
        }
1514
26
    }
1515
77.9k
    void get_scalar_type(PrimitiveType* type) const {
1516
77.9k
        if (type_indexes.size() == 1) {
1517
            // Most cases will have only one type
1518
64.6k
            *type = *type_indexes.begin();
1519
64.6k
            return;
1520
64.6k
        }
1521
13.2k
        DataTypePtr data_type;
1522
13.2k
        get_least_supertype_jsonb(type_indexes, &data_type);
1523
13.2k
        *type = data_type->get_primitive_type();
1524
13.2k
    }
1525
77.9k
    bool contain_nulls() const { return have_nulls; }
1526
77.9k
    bool need_convert_field() const { return field_types.size() > 1; }
1527
1528
private:
1529
    phmap::flat_hash_set<PrimitiveType> type_indexes;
1530
    phmap::flat_hash_set<PrimitiveType> field_types;
1531
    bool have_nulls = false;
1532
};
1533
1534
template <typename Visitor>
1535
1.66M
void get_field_info_impl(const Field& field, FieldInfo* info) {
1536
1.66M
    Visitor to_scalar_type_visitor;
1537
1.66M
    apply_visitor(to_scalar_type_visitor, field);
1538
1.66M
    PrimitiveType type_id;
1539
1.66M
    to_scalar_type_visitor.get_scalar_type(&type_id);
1540
    // array item's dimension may missmatch, eg. [1, 2, [1, 2, 3]]
1541
1.66M
    *info = {type_id, to_scalar_type_visitor.contain_nulls(),
1542
1.66M
             to_scalar_type_visitor.need_convert_field(),
1543
1.66M
             apply_visitor(FieldVisitorToNumberOfDimensions(), field)};
1544
1.66M
}
_ZN5doris12variant_util19get_field_info_implINS0_24FieldVisitorToScalarTypeEEEvRKNS_5FieldEPNS_9FieldInfoE
Line
Count
Source
1535
77.9k
void get_field_info_impl(const Field& field, FieldInfo* info) {
1536
77.9k
    Visitor to_scalar_type_visitor;
1537
77.9k
    apply_visitor(to_scalar_type_visitor, field);
1538
77.9k
    PrimitiveType type_id;
1539
77.9k
    to_scalar_type_visitor.get_scalar_type(&type_id);
1540
    // array item's dimension may missmatch, eg. [1, 2, [1, 2, 3]]
1541
77.9k
    *info = {type_id, to_scalar_type_visitor.contain_nulls(),
1542
77.9k
             to_scalar_type_visitor.need_convert_field(),
1543
77.9k
             apply_visitor(FieldVisitorToNumberOfDimensions(), field)};
1544
77.9k
}
_ZN5doris12variant_util19get_field_info_implINS0_30SimpleFieldVisitorToScalarTypeEEEvRKNS_5FieldEPNS_9FieldInfoE
Line
Count
Source
1535
1.58M
void get_field_info_impl(const Field& field, FieldInfo* info) {
1536
1.58M
    Visitor to_scalar_type_visitor;
1537
1.58M
    apply_visitor(to_scalar_type_visitor, field);
1538
1.58M
    PrimitiveType type_id;
1539
1.58M
    to_scalar_type_visitor.get_scalar_type(&type_id);
1540
    // array item's dimension may missmatch, eg. [1, 2, [1, 2, 3]]
1541
1.58M
    *info = {type_id, to_scalar_type_visitor.contain_nulls(),
1542
1.58M
             to_scalar_type_visitor.need_convert_field(),
1543
1.58M
             apply_visitor(FieldVisitorToNumberOfDimensions(), field)};
1544
1.58M
}
1545
1546
1.66M
void get_field_info(const Field& field, FieldInfo* info) {
1547
1.66M
    if (field.is_complex_field()) {
1548
77.9k
        get_field_info_impl<FieldVisitorToScalarType>(field, info);
1549
1.58M
    } else {
1550
1.58M
        get_field_info_impl<SimpleFieldVisitorToScalarType>(field, info);
1551
1.58M
    }
1552
1.66M
}
1553
1554
bool generate_sub_column_info(const TabletSchema& schema, int32_t col_unique_id,
1555
                              const std::string& path,
1556
4.44k
                              TabletSchema::SubColumnInfo* sub_column_info) {
1557
4.44k
    const auto& parent_column = schema.column_by_uid(col_unique_id);
1558
4.44k
    std::function<void(const TabletColumn&, TabletColumn*)> generate_result_column =
1559
4.44k
            [&](const TabletColumn& from_column, TabletColumn* to_column) {
1560
239
                to_column->set_name(parent_column.name_lower_case() + "." + path);
1561
239
                to_column->set_type(from_column.type());
1562
239
                to_column->set_parent_unique_id(parent_column.unique_id());
1563
239
                bool is_typed = !parent_column.variant_enable_typed_paths_to_sparse();
1564
239
                to_column->set_path_info(
1565
239
                        PathInData(parent_column.name_lower_case() + "." + path, is_typed));
1566
239
                to_column->set_aggregation_method(parent_column.aggregation());
1567
239
                to_column->set_is_nullable(true);
1568
239
                to_column->set_parent_unique_id(parent_column.unique_id());
1569
239
                if (from_column.is_decimal()) {
1570
0
                    to_column->set_precision(from_column.precision());
1571
0
                }
1572
239
                to_column->set_frac(from_column.frac());
1573
1574
239
                if (from_column.is_array_type()) {
1575
41
                    TabletColumn nested_column;
1576
41
                    generate_result_column(*from_column.get_sub_columns()[0], &nested_column);
1577
41
                    to_column->add_sub_column(nested_column);
1578
41
                }
1579
239
            };
1580
1581
4.44k
    auto generate_index = [&](const std::string& pattern) {
1582
        // 1. find subcolumn's index
1583
198
        if (const auto& indexes = schema.inverted_index_by_field_pattern(col_unique_id, pattern);
1584
198
            !indexes.empty()) {
1585
152
            for (const auto& index : indexes) {
1586
152
                auto index_ptr = std::make_shared<TabletIndex>(*index);
1587
152
                index_ptr->set_escaped_escaped_index_suffix_path(
1588
152
                        sub_column_info->column.path_info_ptr()->get_path());
1589
152
                sub_column_info->indexes.emplace_back(std::move(index_ptr));
1590
152
            }
1591
136
        }
1592
        // 2. find parent column's index
1593
62
        else if (const auto parent_index = schema.inverted_indexs(col_unique_id);
1594
62
                 !parent_index.empty()) {
1595
2
            inherit_index(parent_index, sub_column_info->indexes, sub_column_info->column);
1596
60
        } else {
1597
60
            sub_column_info->indexes.clear();
1598
60
        }
1599
198
    };
1600
1601
4.44k
    const auto& sub_columns = parent_column.get_sub_columns();
1602
4.44k
    for (const auto& sub_column : sub_columns) {
1603
400
        const char* pattern = sub_column->name().c_str();
1604
400
        switch (sub_column->pattern_type()) {
1605
170
        case PatternTypePB::MATCH_NAME: {
1606
170
            if (strcmp(pattern, path.c_str()) == 0) {
1607
89
                generate_result_column(*sub_column, &sub_column_info->column);
1608
89
                generate_index(sub_column->name());
1609
89
                return true;
1610
89
            }
1611
81
            break;
1612
170
        }
1613
230
        case PatternTypePB::MATCH_NAME_GLOB: {
1614
230
            if (glob_match_re2(pattern, path)) {
1615
109
                generate_result_column(*sub_column, &sub_column_info->column);
1616
109
                generate_index(sub_column->name());
1617
109
                return true;
1618
109
            }
1619
121
            break;
1620
230
        }
1621
121
        default:
1622
0
            break;
1623
400
        }
1624
400
    }
1625
4.25k
    return false;
1626
4.44k
}
1627
1628
TabletSchemaSPtr VariantCompactionUtil::calculate_variant_extended_schema(
1629
36
        const std::vector<RowsetSharedPtr>& rowsets, const TabletSchemaSPtr& base_schema) {
1630
36
    if (rowsets.empty()) {
1631
1
        return nullptr;
1632
1
    }
1633
1634
35
    std::vector<TabletSchemaSPtr> schemas;
1635
41
    for (const auto& rs : rowsets) {
1636
41
        if (rs->num_segments() == 0) {
1637
0
            continue;
1638
0
        }
1639
41
        const auto& tablet_schema = rs->tablet_schema();
1640
41
        SegmentCacheHandle segment_cache;
1641
41
        auto st = SegmentLoader::instance()->load_segments(std::static_pointer_cast<BetaRowset>(rs),
1642
41
                                                           &segment_cache);
1643
41
        if (!st.ok()) {
1644
0
            return base_schema;
1645
0
        }
1646
48
        for (const auto& segment : segment_cache.get_segments()) {
1647
48
            TabletSchemaSPtr schema = tablet_schema->copy_without_variant_extracted_columns();
1648
96
            for (const auto& column : tablet_schema->columns()) {
1649
96
                if (!column->is_variant_type()) {
1650
48
                    continue;
1651
48
                }
1652
48
                std::shared_ptr<ColumnReader> column_reader;
1653
48
                OlapReaderStatistics stats;
1654
48
                st = segment->get_column_reader(column->unique_id(), &column_reader, &stats);
1655
48
                if (!st.ok()) {
1656
0
                    LOG(WARNING) << "Failed to get column reader for column: " << column->name()
1657
0
                                 << " error: " << st.to_string();
1658
0
                    continue;
1659
0
                }
1660
48
                if (!column_reader) {
1661
0
                    continue;
1662
0
                }
1663
1664
48
                CHECK(column_reader->get_meta_type() == FieldType::OLAP_FIELD_TYPE_VARIANT);
1665
48
                auto* variant_column_reader =
1666
48
                        assert_cast<segment_v2::VariantColumnReader*>(column_reader.get());
1667
                // load external meta before getting subcolumn meta info
1668
48
                st = variant_column_reader->load_external_meta_once();
1669
48
                if (!st.ok()) {
1670
0
                    LOG(WARNING) << "Failed to load external meta for column: " << column->name()
1671
0
                                 << " error: " << st.to_string();
1672
0
                    continue;
1673
0
                }
1674
48
                const auto* subcolumn_meta_info = variant_column_reader->get_subcolumns_meta_info();
1675
114
                for (const auto& entry : *subcolumn_meta_info) {
1676
114
                    if (entry->path.empty()) {
1677
48
                        continue;
1678
48
                    }
1679
66
                    const std::string& column_name =
1680
66
                            column->name_lower_case() + "." + entry->path.get_path();
1681
66
                    const DataTypePtr& data_type = entry->data.file_column_type;
1682
66
                    PathInDataBuilder full_path_builder;
1683
66
                    auto full_path = full_path_builder.append(column->name_lower_case(), false)
1684
66
                                             .append(entry->path.get_parts(), false)
1685
66
                                             .build();
1686
66
                    TabletColumn subcolumn =
1687
66
                            get_column_by_type(data_type, column_name,
1688
66
                                               ExtraInfo {.unique_id = -1,
1689
66
                                                          .parent_unique_id = column->unique_id(),
1690
66
                                                          .path_info = full_path});
1691
66
                    schema->append_column(subcolumn);
1692
66
                }
1693
48
            }
1694
48
            schemas.emplace_back(schema);
1695
48
        }
1696
41
    }
1697
35
    TabletSchemaSPtr least_common_schema;
1698
35
    auto st = get_least_common_schema(schemas, base_schema, least_common_schema, false);
1699
35
    if (!st.ok()) {
1700
0
        return base_schema;
1701
0
    }
1702
35
    return least_common_schema;
1703
35
}
1704
1705
bool inherit_index(const std::vector<const TabletIndex*>& parent_indexes,
1706
                   TabletIndexes& subcolumns_indexes, FieldType column_type,
1707
1.13k
                   const std::string& suffix_path, bool is_array_nested_type) {
1708
1.13k
    if (parent_indexes.empty()) {
1709
1.10k
        return false;
1710
1.10k
    }
1711
32
    subcolumns_indexes.clear();
1712
    // bkd index or array index only need to inherit one index
1713
32
    if (field_is_numeric_type(column_type) ||
1714
32
        (is_array_nested_type &&
1715
23
         (field_is_numeric_type(column_type) || field_is_slice_type(column_type)))) {
1716
13
        auto index_ptr = std::make_shared<TabletIndex>(*parent_indexes[0]);
1717
13
        index_ptr->set_escaped_escaped_index_suffix_path(suffix_path);
1718
        // no need parse for bkd index or array index
1719
13
        index_ptr->remove_parser_and_analyzer();
1720
13
        subcolumns_indexes.emplace_back(std::move(index_ptr));
1721
13
        return true;
1722
13
    }
1723
    // string type need to inherit all indexes
1724
19
    else if (field_is_slice_type(column_type) && !is_array_nested_type) {
1725
18
        for (const auto& index : parent_indexes) {
1726
18
            auto index_ptr = std::make_shared<TabletIndex>(*index);
1727
18
            index_ptr->set_escaped_escaped_index_suffix_path(suffix_path);
1728
18
            subcolumns_indexes.emplace_back(std::move(index_ptr));
1729
18
        }
1730
17
        return true;
1731
17
    }
1732
2
    return false;
1733
32
}
1734
1735
bool inherit_index(const std::vector<const TabletIndex*>& parent_indexes,
1736
1.14k
                   TabletIndexes& subcolumns_indexes, const TabletColumn& column) {
1737
1.14k
    if (!column.is_extracted_column()) {
1738
3
        return false;
1739
3
    }
1740
1.13k
    if (column.is_array_type()) {
1741
18
        if (column.get_sub_columns().empty()) {
1742
0
            return false;
1743
0
        }
1744
18
        const TabletColumn* nested = column.get_sub_columns()[0].get();
1745
18
        while (nested != nullptr && nested->is_array_type()) {
1746
0
            if (nested->get_sub_columns().empty()) {
1747
0
                return false;
1748
0
            }
1749
0
            nested = nested->get_sub_columns()[0].get();
1750
0
        }
1751
18
        if (nested == nullptr) {
1752
0
            return false;
1753
0
        }
1754
18
        return inherit_index(parent_indexes, subcolumns_indexes, nested->type(),
1755
18
                             column.path_info_ptr()->get_path(), true);
1756
18
    }
1757
1.12k
    return inherit_index(parent_indexes, subcolumns_indexes, column.type(),
1758
1.12k
                         column.path_info_ptr()->get_path());
1759
1.13k
}
1760
1761
bool inherit_index(const std::vector<const TabletIndex*>& parent_indexes,
1762
0
                   TabletIndexes& subcolumns_indexes, const ColumnMetaPB& column_pb) {
1763
0
    if (!column_pb.has_column_path_info()) {
1764
0
        return false;
1765
0
    }
1766
0
    if (column_pb.type() == (int)FieldType::OLAP_FIELD_TYPE_ARRAY) {
1767
0
        if (column_pb.children_columns_size() == 0) {
1768
0
            return false;
1769
0
        }
1770
0
        const ColumnMetaPB* nested = &column_pb.children_columns(0);
1771
0
        while (nested != nullptr && nested->type() == (int)FieldType::OLAP_FIELD_TYPE_ARRAY) {
1772
0
            if (nested->children_columns_size() == 0) {
1773
0
                return false;
1774
0
            }
1775
0
            nested = &nested->children_columns(0);
1776
0
        }
1777
0
        if (nested == nullptr) {
1778
0
            return false;
1779
0
        }
1780
0
        return inherit_index(parent_indexes, subcolumns_indexes, (FieldType)nested->type(),
1781
0
                             column_pb.column_path_info().path(), true);
1782
0
    }
1783
0
    return inherit_index(parent_indexes, subcolumns_indexes, (FieldType)column_pb.type(),
1784
0
                         column_pb.column_path_info().path());
1785
0
}
1786
1787
// ============ Implementation from parse2column.cpp ============
1788
1789
/** Pool for objects that cannot be used from different threads simultaneously.
1790
  * Allows to create an object for each thread.
1791
  * Pool has unbounded size and objects are not destroyed before destruction of pool.
1792
  *
1793
  * Use it in cases when thread local storage is not appropriate
1794
  *  (when maximum number of simultaneously used objects is less
1795
  *   than number of running/sleeping threads, that has ever used object,
1796
  *   and creation/destruction of objects is expensive).
1797
  */
1798
template <typename T>
1799
class SimpleObjectPool {
1800
protected:
1801
    /// Hold all available objects in stack.
1802
    std::mutex mutex;
1803
    std::stack<std::unique_ptr<T>> stack;
1804
    /// Specialized deleter for std::unique_ptr.
1805
    /// Returns underlying pointer back to stack thus reclaiming its ownership.
1806
    struct Deleter {
1807
        SimpleObjectPool<T>* parent;
1808
12.7k
        Deleter(SimpleObjectPool<T>* parent_ = nullptr) : parent {parent_} {} /// NOLINT
1809
12.7k
        void operator()(T* owning_ptr) const {
1810
12.7k
            std::lock_guard lock {parent->mutex};
1811
12.7k
            parent->stack.emplace(owning_ptr);
1812
12.7k
        }
1813
    };
1814
1815
public:
1816
    using Pointer = std::unique_ptr<T, Deleter>;
1817
    /// Extracts and returns a pointer from the stack if it's not empty,
1818
    ///  creates a new one by calling provided f() otherwise.
1819
    template <typename Factory>
1820
12.7k
    Pointer get(Factory&& f) {
1821
12.7k
        std::unique_lock lock(mutex);
1822
12.7k
        if (stack.empty()) {
1823
1
            return {f(), this};
1824
1
        }
1825
12.7k
        auto object = stack.top().release();
1826
12.7k
        stack.pop();
1827
12.7k
        return std::unique_ptr<T, Deleter>(object, Deleter(this));
1828
12.7k
    }
variant_util.cpp:_ZN5doris12variant_util16SimpleObjectPoolINS_14JSONDataParserINS_14SimdJSONParserEEEE3getIZNS0_21parse_json_to_variantERNS_7IColumnERKNS_9StringRefEPS4_RKNS_11ParseConfigEE3$_0EESt10unique_ptrIS4_NS5_7DeleterEEOT_
Line
Count
Source
1820
12.4k
    Pointer get(Factory&& f) {
1821
12.4k
        std::unique_lock lock(mutex);
1822
12.4k
        if (stack.empty()) {
1823
1
            return {f(), this};
1824
1
        }
1825
12.4k
        auto object = stack.top().release();
1826
12.4k
        stack.pop();
1827
12.4k
        return std::unique_ptr<T, Deleter>(object, Deleter(this));
1828
12.4k
    }
variant_util.cpp:_ZN5doris12variant_util16SimpleObjectPoolINS_14JSONDataParserINS_14SimdJSONParserEEEE3getIZNS0_21parse_json_to_variantERNS_7IColumnERKNS_9ColumnStrIjEERKNS_11ParseConfigEE3$_0EESt10unique_ptrIS4_NS5_7DeleterEEOT_
Line
Count
Source
1820
306
    Pointer get(Factory&& f) {
1821
306
        std::unique_lock lock(mutex);
1822
306
        if (stack.empty()) {
1823
0
            return {f(), this};
1824
0
        }
1825
306
        auto object = stack.top().release();
1826
306
        stack.pop();
1827
306
        return std::unique_ptr<T, Deleter>(object, Deleter(this));
1828
306
    }
1829
    /// Like get(), but creates object using default constructor.
1830
    Pointer getDefault() {
1831
        return get([] { return new T; });
1832
    }
1833
};
1834
1835
SimpleObjectPool<JsonParser> parsers_pool;
1836
1837
using Node = typename ColumnVariant::Subcolumns::Node;
1838
1839
148k
static inline void append_binary_bytes(ColumnString::Chars& chars, const void* data, size_t size) {
1840
148k
    const auto old_size = chars.size();
1841
148k
    chars.resize(old_size + size);
1842
148k
    memcpy(chars.data() + old_size, reinterpret_cast<const char*>(data), size);
1843
148k
}
1844
1845
72.3k
static inline void append_binary_type(ColumnString::Chars& chars, FieldType type) {
1846
72.3k
    const uint8_t t = static_cast<uint8_t>(type);
1847
72.3k
    append_binary_bytes(chars, &t, sizeof(uint8_t));
1848
72.3k
}
1849
1850
3.74k
static inline void append_binary_sizet(ColumnString::Chars& chars, size_t v) {
1851
3.74k
    append_binary_bytes(chars, &v, sizeof(size_t));
1852
3.74k
}
1853
1854
72.3k
static void append_field_to_binary_chars(const Field& field, ColumnString::Chars& chars) {
1855
72.3k
    switch (field.get_type()) {
1856
0
    case PrimitiveType::TYPE_NULL: {
1857
0
        append_binary_type(chars, FieldType::OLAP_FIELD_TYPE_NONE);
1858
0
        return;
1859
0
    }
1860
2
    case PrimitiveType::TYPE_BOOLEAN: {
1861
2
        append_binary_type(chars,
1862
2
                           TabletColumn::get_field_type_by_type(PrimitiveType::TYPE_BOOLEAN));
1863
2
        const auto v = static_cast<UInt8>(field.get<PrimitiveType::TYPE_BOOLEAN>());
1864
2
        append_binary_bytes(chars, &v, sizeof(UInt8));
1865
2
        return;
1866
0
    }
1867
68.5k
    case PrimitiveType::TYPE_BIGINT: {
1868
68.5k
        append_binary_type(chars, TabletColumn::get_field_type_by_type(PrimitiveType::TYPE_BIGINT));
1869
68.5k
        const auto v = field.get<PrimitiveType::TYPE_BIGINT>();
1870
68.5k
        append_binary_bytes(chars, &v, sizeof(Int64));
1871
68.5k
        return;
1872
0
    }
1873
1
    case PrimitiveType::TYPE_LARGEINT: {
1874
1
        append_binary_type(chars,
1875
1
                           TabletColumn::get_field_type_by_type(PrimitiveType::TYPE_LARGEINT));
1876
1
        const auto v = field.get<PrimitiveType::TYPE_LARGEINT>();
1877
1
        append_binary_bytes(chars, &v, sizeof(int128_t));
1878
1
        return;
1879
0
    }
1880
3
    case PrimitiveType::TYPE_DOUBLE: {
1881
3
        append_binary_type(chars, TabletColumn::get_field_type_by_type(PrimitiveType::TYPE_DOUBLE));
1882
3
        const auto v = field.get<PrimitiveType::TYPE_DOUBLE>();
1883
3
        append_binary_bytes(chars, &v, sizeof(Float64));
1884
3
        return;
1885
0
    }
1886
3.73k
    case PrimitiveType::TYPE_STRING: {
1887
3.73k
        append_binary_type(chars, FieldType::OLAP_FIELD_TYPE_STRING);
1888
3.73k
        const auto& v = field.get<PrimitiveType::TYPE_STRING>();
1889
3.73k
        append_binary_sizet(chars, v.size());
1890
3.73k
        append_binary_bytes(chars, v.data(), v.size());
1891
3.73k
        return;
1892
0
    }
1893
0
    case PrimitiveType::TYPE_JSONB: {
1894
0
        append_binary_type(chars, FieldType::OLAP_FIELD_TYPE_JSONB);
1895
0
        const auto& v = field.get<PrimitiveType::TYPE_JSONB>();
1896
0
        append_binary_sizet(chars, v.get_size());
1897
0
        append_binary_bytes(chars, v.get_value(), v.get_size());
1898
0
        return;
1899
0
    }
1900
13
    case PrimitiveType::TYPE_ARRAY: {
1901
13
        append_binary_type(chars, FieldType::OLAP_FIELD_TYPE_ARRAY);
1902
13
        const auto& a = field.get<PrimitiveType::TYPE_ARRAY>();
1903
13
        append_binary_sizet(chars, a.size());
1904
18
        for (const auto& elem : a) {
1905
18
            append_field_to_binary_chars(elem, chars);
1906
18
        }
1907
13
        return;
1908
0
    }
1909
0
    default:
1910
0
        throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Unsupported field type {}",
1911
0
                               field.get_type());
1912
72.3k
    }
1913
72.3k
}
1914
template <typename ParserImpl>
1915
void parse_json_to_variant_impl(IColumn& column, const char* src, size_t length,
1916
107k
                                JSONDataParser<ParserImpl>* parser, const ParseConfig& config) {
1917
107k
    auto& column_variant = assert_cast<ColumnVariant&>(column);
1918
107k
    std::optional<ParseResult> result;
1919
    /// Treat empty string as an empty object
1920
    /// for better CAST from String to Object.
1921
107k
    if (length > 0) {
1922
107k
        result = parser->parse(src, length, config);
1923
107k
    } else {
1924
0
        result = ParseResult {};
1925
0
    }
1926
107k
    if (!result) {
1927
11
        VLOG_DEBUG << "failed to parse " << std::string_view(src, length) << ", length= " << length;
1928
11
        if (config::variant_throw_exeception_on_invalid_json) {
1929
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Failed to parse object {}",
1930
0
                                   std::string_view(src, length));
1931
0
        }
1932
        // Treat as string
1933
11
        PathInData root_path;
1934
11
        Field field = Field::create_field<TYPE_STRING>(String(src, length));
1935
11
        result = ParseResult {{root_path}, {field}};
1936
11
    }
1937
107k
    auto& [paths, values] = *result;
1938
107k
    assert(paths.size() == values.size());
1939
107k
    size_t old_num_rows = column_variant.rows();
1940
107k
    if (config.deprecated_enable_flatten_nested) {
1941
        // here we should check the paths in variant and paths in result,
1942
        // if two paths which same prefix have different structure, we should throw an exception
1943
3.00k
        std::vector<PathInData> check_paths;
1944
11.9k
        for (const auto& entry : column_variant.get_subcolumns()) {
1945
11.9k
            check_paths.push_back(entry->path);
1946
11.9k
        }
1947
3.00k
        check_paths.insert(check_paths.end(), paths.begin(), paths.end());
1948
3.00k
        THROW_IF_ERROR(check_variant_has_no_ambiguous_paths(check_paths));
1949
3.00k
    }
1950
107k
    auto [doc_value_data_paths, doc_value_data_values] =
1951
107k
            column_variant.get_doc_value_data_paths_and_values();
1952
107k
    auto& doc_value_data_offsets = column_variant.serialized_doc_value_column_offsets();
1953
1954
1.43M
    auto flush_defaults = [](ColumnVariant::Subcolumn* subcolumn) {
1955
1.43M
        const auto num_defaults = subcolumn->cur_num_of_defaults();
1956
1.43M
        if (num_defaults > 0) {
1957
165k
            subcolumn->insert_many_defaults(num_defaults);
1958
165k
            subcolumn->reset_current_num_of_defaults();
1959
165k
        }
1960
1.43M
    };
1961
1962
107k
    auto is_plain_path = [](const PathInData& path) {
1963
13
        for (const auto& part : path.get_parts()) {
1964
13
            if (part.is_nested || part.anonymous_array_level != 0) {
1965
0
                return false;
1966
0
            }
1967
13
        }
1968
9
        return true;
1969
9
    };
1970
1971
107k
    auto get_or_create_subcolumn = [&](const PathInData& path, size_t index_hint,
1972
1.43M
                                       const FieldInfo& field_info) -> ColumnVariant::Subcolumn* {
1973
1.43M
        auto* subcolumn = column_variant.get_subcolumn(path, index_hint);
1974
1.43M
        if (subcolumn == nullptr) {
1975
3.78k
            if (path.has_nested_part()) {
1976
8
                column_variant.add_nested_subcolumn(path, field_info, old_num_rows);
1977
3.77k
            } else {
1978
3.77k
                column_variant.add_sub_column(path, old_num_rows);
1979
3.77k
            }
1980
3.78k
            subcolumn = column_variant.get_subcolumn(path, index_hint);
1981
3.78k
        }
1982
1.43M
        if (!subcolumn) {
1983
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Failed to find sub column {}",
1984
0
                                   path.get_path());
1985
0
        }
1986
1.43M
        return subcolumn;
1987
1.43M
    };
1988
1989
1.43M
    auto normalize_plain_path = [&](const PathInData& path) {
1990
1.43M
        if (!config.check_duplicate_json_path || path.empty() || !is_plain_path(path)) {
1991
1.43M
            return path;
1992
1.43M
        }
1993
9
        return PathInData(path.get_path());
1994
1.43M
    };
1995
1996
107k
    auto insert_into_subcolumn = [&](size_t i,
1997
1.43M
                                     bool check_size_mismatch) -> ColumnVariant::Subcolumn* {
1998
1.43M
        FieldInfo field_info;
1999
1.43M
        get_field_info(values[i], &field_info);
2000
1.43M
        if (field_info.scalar_type_id == PrimitiveType::INVALID_TYPE) {
2001
109
            return nullptr;
2002
109
        }
2003
1.43M
        auto path = normalize_plain_path(paths[i]);
2004
1.43M
        auto* subcolumn = get_or_create_subcolumn(path, i, field_info);
2005
1.43M
        flush_defaults(subcolumn);
2006
1.43M
        if (check_size_mismatch && subcolumn->size() != old_num_rows) {
2007
1
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
2008
1
                                   "subcolumn {} size missmatched, may contains duplicated entry",
2009
1
                                   path.get_path());
2010
1
        }
2011
1.43M
        subcolumn->insert(std::move(values[i]), std::move(field_info));
2012
1.43M
        return subcolumn;
2013
1.43M
    };
2014
2015
107k
    switch (config.parse_to) {
2016
102k
    case ParseConfig::ParseTo::OnlySubcolumns:
2017
1.54M
        for (size_t i = 0; i < paths.size(); ++i) {
2018
1.43M
            insert_into_subcolumn(i, true);
2019
1.43M
        }
2020
102k
        break;
2021
4.30k
    case ParseConfig::ParseTo::OnlyDocValueColumn: {
2022
4.30k
        std::vector<size_t> doc_item_indexes;
2023
4.30k
        doc_item_indexes.reserve(paths.size());
2024
4.30k
        phmap::flat_hash_set<StringRef, StringRefHash> seen_paths;
2025
4.30k
        seen_paths.reserve(paths.size());
2026
2027
76.6k
        for (size_t i = 0; i < paths.size(); ++i) {
2028
72.3k
            FieldInfo field_info;
2029
72.3k
            get_field_info(values[i], &field_info);
2030
72.3k
            if (paths[i].empty()) {
2031
                // Plain non-doc VARIANT can use doc-value KV as writer-side staging. An
2032
                // invalid root entry from JSON object/array is neither a scalar root value nor
2033
                // a doc KV path, so leave this row's doc offset empty. Doc-mode and valid scalar
2034
                // roots still populate the root subcolumn below.
2035
2
                if (!column_variant.enable_doc_mode() &&
2036
2
                    field_info.scalar_type_id == PrimitiveType::INVALID_TYPE) {
2037
1
                    continue;
2038
1
                }
2039
1
                auto* subcolumn = column_variant.get_subcolumn(paths[i]);
2040
1
                DCHECK(subcolumn != nullptr);
2041
1
                flush_defaults(subcolumn);
2042
1
                subcolumn->insert(std::move(values[i]), std::move(field_info));
2043
1
                continue;
2044
2
            }
2045
72.3k
            if (field_info.scalar_type_id == PrimitiveType::INVALID_TYPE ||
2046
72.3k
                values[i].get_type() == PrimitiveType::TYPE_NULL) {
2047
0
                continue;
2048
0
            }
2049
72.3k
            const auto& path_str = paths[i].get_path();
2050
72.3k
            StringRef path_ref {path_str.data(), path_str.size()};
2051
72.3k
            if (UNLIKELY(!seen_paths.emplace(path_ref).second)) {
2052
0
                throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
2053
0
                                       "may contains duplicated entry : {}",
2054
0
                                       std::string_view(path_str));
2055
0
            }
2056
72.3k
            doc_item_indexes.push_back(i);
2057
72.3k
        }
2058
2059
4.30k
        std::sort(doc_item_indexes.begin(), doc_item_indexes.end(),
2060
617k
                  [&](size_t l, size_t r) { return paths[l].get_path() < paths[r].get_path(); });
2061
72.3k
        for (const auto idx : doc_item_indexes) {
2062
72.3k
            const auto& path_str = paths[idx].get_path();
2063
72.3k
            doc_value_data_paths->insert_data(path_str.data(), path_str.size());
2064
72.3k
            auto& chars = doc_value_data_values->get_chars();
2065
72.3k
            append_field_to_binary_chars(values[idx], chars);
2066
72.3k
            doc_value_data_values->get_offsets().push_back(chars.size());
2067
72.3k
        }
2068
4.30k
    } break;
2069
107k
    }
2070
107k
    doc_value_data_offsets.push_back(doc_value_data_paths->size());
2071
    // /// Insert default values to missed subcolumns.
2072
107k
    const auto& subcolumns = column_variant.get_subcolumns();
2073
4.32M
    for (const auto& entry : subcolumns) {
2074
4.32M
        if (entry->data.size() == old_num_rows) {
2075
            // Handle nested paths differently from simple paths
2076
2.88M
            if (entry->path.has_nested_part()) {
2077
                // Try to insert default from nested, if failed, insert regular default
2078
0
                bool success = UNLIKELY(column_variant.try_insert_default_from_nested(entry));
2079
0
                if (!success) {
2080
0
                    entry->data.insert_default();
2081
0
                }
2082
2.88M
            } else {
2083
                // For non-nested paths, increment default counter
2084
2.88M
                entry->data.increment_default_counter();
2085
2.88M
            }
2086
2.88M
        }
2087
4.32M
    }
2088
107k
    column_variant.incr_num_rows();
2089
107k
    if (column_variant.get_sparse_column()->size() == old_num_rows) {
2090
107k
        column_variant.get_sparse_column_mutable().insert_default();
2091
107k
    }
2092
107k
#ifndef NDEBUG
2093
107k
    column_variant.check_consistency();
2094
107k
#endif
2095
107k
}
2096
2097
// exposed interfaces
2098
void parse_json_to_variant(IColumn& column, const StringRef& json, JsonParser* parser,
2099
12.4k
                           const ParseConfig& config) {
2100
12.4k
    if (parser) {
2101
0
        return parse_json_to_variant_impl(column, json.data, json.size, parser, config);
2102
12.4k
    } else {
2103
12.4k
        auto pool_parser = parsers_pool.get([] { return new JsonParser(); });
2104
12.4k
        return parse_json_to_variant_impl(column, json.data, json.size, pool_parser.get(), config);
2105
12.4k
    }
2106
12.4k
}
2107
2108
void parse_json_to_variant(IColumn& column, const ColumnString& raw_json_column,
2109
306
                           const ParseConfig& config) {
2110
306
    auto parser = parsers_pool.get([] { return new JsonParser(); });
2111
95.4k
    for (size_t i = 0; i < raw_json_column.size(); ++i) {
2112
95.1k
        StringRef raw_json = raw_json_column.get_data_at(i);
2113
95.1k
        parse_json_to_variant_impl(column, raw_json.data, raw_json.size, parser.get(), config);
2114
95.1k
    }
2115
306
    column.finalize();
2116
306
}
2117
2118
// parse the doc snapshot column to subcolumns
2119
0
void materialize_docs_to_subcolumns(ColumnVariant& column_variant) {
2120
0
    auto subcolumns = materialize_docs_to_subcolumns_map(column_variant);
2121
2122
0
    for (auto& entry : subcolumns) {
2123
0
        entry.second.finalize();
2124
0
        if (!column_variant.add_sub_column(PathInData(entry.first),
2125
0
                                           IColumn::mutate(entry.second.get_finalized_column_ptr()),
2126
0
                                           entry.second.get_least_common_type())) {
2127
0
            throw doris::Exception(ErrorCode::INTERNAL_ERROR,
2128
0
                                   "Failed to add subcolumn {}, which is from doc snapshot column",
2129
0
                                   entry.first);
2130
0
        }
2131
0
    }
2132
2133
0
    column_variant.finalize();
2134
0
}
2135
2136
// ============ Implementation from variant_util.cpp ============
2137
2138
phmap::flat_hash_map<std::string_view, ColumnVariant::Subcolumn> materialize_docs_to_subcolumns_map(
2139
11
        const ColumnVariant& variant, size_t expected_unique_paths) {
2140
11
    constexpr size_t kInitialPathReserve = 8192;
2141
11
    phmap::flat_hash_map<std::string_view, ColumnVariant::Subcolumn> subcolumns;
2142
2143
11
    const auto [column_key, column_value] = variant.get_doc_value_data_paths_and_values();
2144
11
    const auto& column_offsets = variant.serialized_doc_value_column_offsets();
2145
11
    const size_t num_rows = column_offsets.size();
2146
2147
11
    DCHECK_EQ(num_rows, variant.size()) << "doc snapshot offsets size mismatch with variant rows";
2148
2149
11
    subcolumns.reserve(expected_unique_paths != 0
2150
11
                               ? expected_unique_paths
2151
11
                               : std::min<size_t>(column_key->size(), kInitialPathReserve));
2152
2153
36
    for (size_t row = 0; row < num_rows; ++row) {
2154
25
        const size_t start = column_offsets[row - 1];
2155
25
        const size_t end = column_offsets[row];
2156
71
        for (size_t i = start; i < end; ++i) {
2157
46
            const auto& key = column_key->get_data_at(i);
2158
46
            const std::string_view path_sv(key.data, key.size);
2159
2160
46
            auto [it, inserted] =
2161
46
                    subcolumns.try_emplace(path_sv, ColumnVariant::Subcolumn {0, true, false});
2162
46
            auto& subcolumn = it->second;
2163
46
            if (inserted) {
2164
27
                subcolumn.insert_many_defaults(row);
2165
27
            } else if (subcolumn.size() != row) {
2166
4
                subcolumn.insert_many_defaults(row - subcolumn.size());
2167
4
            }
2168
46
            subcolumn.deserialize_from_binary_column(column_value, i);
2169
46
        }
2170
25
    }
2171
2172
27
    for (auto& [path, subcolumn] : subcolumns) {
2173
27
        if (subcolumn.size() != num_rows) {
2174
7
            subcolumn.insert_many_defaults(num_rows - subcolumn.size());
2175
7
        }
2176
27
    }
2177
2178
11
    return subcolumns;
2179
11
}
2180
2181
Status _parse_and_materialize_variant_columns(Block& block,
2182
                                              const std::vector<uint32_t>& variant_pos,
2183
241
                                              const std::vector<ParseConfig>& configs) {
2184
621
    for (size_t i = 0; i < variant_pos.size(); ++i) {
2185
380
        auto column_ref = block.get_by_position(variant_pos[i]).column;
2186
380
        bool is_nullable = is_column_nullable(*column_ref);
2187
380
        MutableColumnPtr owner_column = IColumn::mutate(std::move(column_ref));
2188
380
        ColumnPtr nullable_null_map;
2189
380
        MutableColumnPtr var_column;
2190
380
        if (is_nullable) {
2191
15
            const auto& nullable = assert_cast<const ColumnNullable&>(*owner_column);
2192
15
            nullable_null_map = nullable.get_null_map_column_ptr();
2193
15
            var_column = IColumn::mutate(nullable.get_nested_column_ptr());
2194
365
        } else {
2195
365
            var_column = std::move(owner_column);
2196
365
        }
2197
380
        auto& var = assert_cast<ColumnVariant&>(*var_column);
2198
380
        var_column->finalize();
2199
2200
380
        MutableColumnPtr variant_column;
2201
380
        if (!var.is_scalar_variant()) {
2202
            // already parsed
2203
355
            continue;
2204
355
        }
2205
2206
25
        VLOG_DEBUG << "parse scalar variant column: " << var.get_root_type()->get_name();
2207
25
        ColumnPtr scalar_root_column;
2208
25
        if (var.get_root_type()->get_primitive_type() == TYPE_JSONB) {
2209
2
            scalar_root_column = jsonb_root_to_json_string_column(*var.get_root());
2210
23
        } else {
2211
23
            const auto& root = *var.get_root();
2212
23
            scalar_root_column =
2213
23
                    is_column_nullable(root)
2214
23
                            ? assert_cast<const ColumnNullable&>(root).get_nested_column_ptr()
2215
23
                            : var.get_root();
2216
23
        }
2217
2218
25
        if (scalar_root_column->is_column_string()) {
2219
24
            variant_column = ColumnVariant::create(0, var.enable_doc_mode());
2220
24
            parse_json_to_variant(*variant_column.get(),
2221
24
                                  assert_cast<const ColumnString&>(*scalar_root_column),
2222
24
                                  configs[i]);
2223
24
        } else {
2224
            // Root maybe other types rather than string like ColumnVariant(Int32).
2225
            // In this case, we should finlize the root and cast to JSON type
2226
1
            auto expected_root_type =
2227
1
                    make_nullable(std::make_shared<ColumnVariant::MostCommonType>());
2228
1
            var.ensure_root_node_type(expected_root_type);
2229
1
            variant_column = std::move(var_column);
2230
1
        }
2231
2232
        // Wrap variant with nullmap if it is nullable
2233
25
        ColumnPtr result = variant_column->get_ptr();
2234
25
        if (is_nullable) {
2235
2
            result = ColumnNullable::create(result, nullable_null_map);
2236
2
        }
2237
25
        block.get_by_position(variant_pos[i]).column = result;
2238
25
    }
2239
241
    return Status::OK();
2240
241
}
2241
2242
Status parse_and_materialize_variant_columns(Block& block, const std::vector<uint32_t>& variant_pos,
2243
241
                                             const std::vector<ParseConfig>& configs) {
2244
241
    RETURN_IF_CATCH_EXCEPTION(
2245
241
            { return _parse_and_materialize_variant_columns(block, variant_pos, configs); });
2246
241
}
2247
2248
namespace {
2249
2250
ParseConfig::ParseTo select_storage_variant_parse_target(const TabletColumn& column,
2251
369
                                                         const ParseConfig& config) {
2252
    // NestedGroup consumes the parse-time subcolumn tree to build nested storage structures, so it
2253
    // must not go through doc-value staging.
2254
369
    if (column.variant_enable_nested_group()) {
2255
0
        return ParseConfig::ParseTo::OnlySubcolumns;
2256
0
    }
2257
2258
    // Persistent doc mode owns doc-value bucket columns in VariantDocWriter. Keep it separate from
2259
    // the plain non-doc staging optimization, even when typed paths or parent indexes exist.
2260
369
    if (column.variant_enable_doc_mode()) {
2261
3
        return ParseConfig::ParseTo::OnlyDocValueColumn;
2262
3
    }
2263
2264
    // Deprecated flatten-nested still consumes parse-time subcolumns. Predefined typed paths and
2265
    // parent inverted indexes are handled later by regular doc-value staging: typed paths are
2266
    // forced into the materialized set unless typed-to-sparse is enabled, and materialized dynamic
2267
    // subcolumns inherit parent indexes while sparse payloads stay unindexed.
2268
366
    if (config.deprecated_enable_flatten_nested) {
2269
0
        return ParseConfig::ParseTo::OnlySubcolumns;
2270
0
    }
2271
2272
    // Plain dynamic non-doc VARIANT can avoid eagerly creating thousands of parse-time subcolumns.
2273
    // The segment writer will pick the materialized/sparse split from this doc-value KV staging.
2274
    // Keep a BE switch so tests and rollouts can compare the old parse-time path with staging under
2275
    // the same writer and schema.
2276
366
    switch (config::variant_storage_parse_mode) {
2277
362
    case 0:
2278
364
    case 2:
2279
364
        return ParseConfig::ParseTo::OnlyDocValueColumn;
2280
2
    case 1:
2281
2
        return ParseConfig::ParseTo::OnlySubcolumns;
2282
0
    default:
2283
0
        CHECK(false) << "invalid variant_storage_parse_mode: "
2284
0
                     << config::variant_storage_parse_mode;
2285
0
        return ParseConfig::ParseTo::OnlyDocValueColumn;
2286
366
    }
2287
366
}
2288
2289
} // namespace
2290
2291
Status parse_and_materialize_variant_columns(Block& block, const TabletSchema& tablet_schema,
2292
230
                                             const std::vector<uint32_t>& column_pos) {
2293
230
    std::vector<uint32_t> variant_column_pos;
2294
230
    std::vector<uint32_t> variant_schema_pos;
2295
230
    variant_column_pos.reserve(column_pos.size());
2296
230
    variant_schema_pos.reserve(column_pos.size());
2297
1.07k
    for (size_t block_pos = 0; block_pos < column_pos.size(); ++block_pos) {
2298
849
        const uint32_t schema_pos = column_pos[block_pos];
2299
849
        const auto& column = tablet_schema.column(schema_pos);
2300
849
        if (column.is_variant_type()) {
2301
369
            variant_column_pos.push_back(schema_pos);
2302
369
            variant_schema_pos.push_back(schema_pos);
2303
369
        }
2304
849
    }
2305
2306
230
    if (variant_column_pos.empty()) {
2307
0
        return Status::OK();
2308
0
    }
2309
2310
230
    std::vector<ParseConfig> configs(variant_column_pos.size());
2311
599
    for (size_t i = 0; i < variant_column_pos.size(); ++i) {
2312
        // Deprecated legacy flatten-nested switch. Distinct from variant_enable_nested_group.
2313
369
        configs[i].deprecated_enable_flatten_nested =
2314
369
                tablet_schema.deprecated_variant_flatten_nested();
2315
369
        configs[i].check_duplicate_json_path = config::variant_enable_duplicate_json_path_check;
2316
369
        const auto& column = tablet_schema.column(variant_schema_pos[i]);
2317
369
        if (!column.is_variant_type()) {
2318
0
            return Status::InternalError("column is not variant type, column name: {}",
2319
0
                                         column.name());
2320
0
        }
2321
369
        configs[i].parse_to = select_storage_variant_parse_target(column, configs[i]);
2322
369
    }
2323
2324
230
    RETURN_IF_ERROR(parse_and_materialize_variant_columns(block, variant_column_pos, configs));
2325
230
    return Status::OK();
2326
230
}
2327
2328
} // namespace doris::variant_util