Coverage Report

Created: 2026-05-14 09:45

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exec/common/variant_util.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "exec/common/variant_util.h"
19
20
#include <fmt/format.h>
21
#include <gen_cpp/FrontendService.h>
22
#include <gen_cpp/FrontendService_types.h>
23
#include <gen_cpp/HeartbeatService_types.h>
24
#include <gen_cpp/MasterService_types.h>
25
#include <gen_cpp/Status_types.h>
26
#include <gen_cpp/Types_types.h>
27
#include <glog/logging.h>
28
#include <rapidjson/document.h>
29
#include <rapidjson/stringbuffer.h>
30
#include <rapidjson/writer.h>
31
#include <simdjson/simdjson.h> // IWYU pragma: keep
32
#include <unicode/uchar.h>
33
34
#include <algorithm>
35
#include <cassert>
36
#include <cctype>
37
#include <cstddef>
38
#include <cstdint>
39
#include <cstring>
40
#include <list>
41
#include <map>
42
#include <memory>
43
#include <mutex>
44
#include <optional>
45
#include <ostream>
46
#include <ranges>
47
#include <set>
48
#include <stack>
49
#include <string>
50
#include <string_view>
51
#include <unordered_map>
52
#include <utility>
53
#include <vector>
54
55
#include "common/config.h"
56
#include "common/status.h"
57
#include "core/assert_cast.h"
58
#include "core/block/block.h"
59
#include "core/block/column_numbers.h"
60
#include "core/block/column_with_type_and_name.h"
61
#include "core/column/column.h"
62
#include "core/column/column_array.h"
63
#include "core/column/column_map.h"
64
#include "core/column/column_nullable.h"
65
#include "core/column/column_string.h"
66
#include "core/column/column_variant.h"
67
#include "core/data_type/data_type.h"
68
#include "core/data_type/data_type_array.h"
69
#include "core/data_type/data_type_factory.hpp"
70
#include "core/data_type/data_type_jsonb.h"
71
#include "core/data_type/data_type_nullable.h"
72
#include "core/data_type/data_type_string.h"
73
#include "core/data_type/data_type_variant.h"
74
#include "core/data_type/define_primitive_type.h"
75
#include "core/data_type/get_least_supertype.h"
76
#include "core/data_type/primitive_type.h"
77
#include "core/field.h"
78
#include "core/typeid_cast.h"
79
#include "core/types.h"
80
#include "core/value/bitmap_value.h"
81
#include "exec/common/field_visitors.h"
82
#include "exec/common/sip_hash.h"
83
#include "exprs/function/function.h"
84
#include "exprs/function/simple_function_factory.h"
85
#include "exprs/function_context.h"
86
#include "exprs/json_functions.h"
87
#include "re2/re2.h"
88
#include "runtime/exec_env.h"
89
#include "runtime/runtime_state.h"
90
#include "storage/olap_common.h"
91
#include "storage/rowset/beta_rowset.h"
92
#include "storage/rowset/rowset.h"
93
#include "storage/rowset/rowset_fwd.h"
94
#include "storage/segment/segment_loader.h"
95
#include "storage/segment/variant/nested_group_path.h"
96
#include "storage/segment/variant/variant_column_reader.h"
97
#include "storage/segment/variant/variant_column_writer_impl.h"
98
#include "storage/tablet/tablet.h"
99
#include "storage/tablet/tablet_fwd.h"
100
#include "storage/tablet/tablet_schema.h"
101
#include "util/client_cache.h"
102
#include "util/defer_op.h"
103
#include "util/json/json_parser.h"
104
#include "util/json/path_in_data.h"
105
#include "util/json/simd_json_parser.h"
106
#include "util/jsonb_utils.h"
107
108
namespace doris::variant_util {
109
110
232
inline void append_escaped_regex_char(std::string* regex_output, char ch) {
111
232
    switch (ch) {
112
11
    case '.':
113
13
    case '^':
114
15
    case '$':
115
17
    case '+':
116
22
    case '*':
117
24
    case '?':
118
26
    case '(':
119
28
    case ')':
120
30
    case '|':
121
32
    case '{':
122
34
    case '}':
123
36
    case '[':
124
36
    case ']':
125
40
    case '\\':
126
40
        regex_output->push_back('\\');
127
40
        regex_output->push_back(ch);
128
40
        break;
129
192
    default:
130
192
        regex_output->push_back(ch);
131
192
        break;
132
232
    }
133
232
}
134
135
// Small LRU to cap compiled glob patterns
136
constexpr size_t kGlobRegexCacheCapacity = 256;
137
138
struct GlobRegexCacheEntry {
139
    std::shared_ptr<RE2> re2;
140
    std::list<std::string>::iterator lru_it;
141
};
142
143
static std::mutex g_glob_regex_cache_mutex;
144
static std::list<std::string> g_glob_regex_cache_lru;
145
static std::unordered_map<std::string, GlobRegexCacheEntry> g_glob_regex_cache;
146
147
174
std::shared_ptr<RE2> get_or_build_re2(const std::string& glob_pattern) {
148
174
    {
149
174
        std::lock_guard<std::mutex> lock(g_glob_regex_cache_mutex);
150
174
        auto it = g_glob_regex_cache.find(glob_pattern);
151
174
        if (it != g_glob_regex_cache.end()) {
152
126
            g_glob_regex_cache_lru.splice(g_glob_regex_cache_lru.begin(), g_glob_regex_cache_lru,
153
126
                                          it->second.lru_it);
154
126
            return it->second.re2;
155
126
        }
156
174
    }
157
48
    std::string regex_pattern;
158
48
    Status st = glob_to_regex(glob_pattern, &regex_pattern);
159
48
    if (!st.ok()) {
160
2
        return nullptr;
161
2
    }
162
46
    auto compiled = std::make_shared<RE2>(regex_pattern);
163
46
    if (!compiled->ok()) {
164
3
        return nullptr;
165
3
    }
166
43
    {
167
43
        std::lock_guard<std::mutex> lock(g_glob_regex_cache_mutex);
168
43
        auto it = g_glob_regex_cache.find(glob_pattern);
169
43
        if (it != g_glob_regex_cache.end()) {
170
0
            g_glob_regex_cache_lru.splice(g_glob_regex_cache_lru.begin(), g_glob_regex_cache_lru,
171
0
                                          it->second.lru_it);
172
0
            return it->second.re2;
173
0
        }
174
43
        g_glob_regex_cache_lru.push_front(glob_pattern);
175
43
        g_glob_regex_cache.emplace(glob_pattern,
176
43
                                   GlobRegexCacheEntry {compiled, g_glob_regex_cache_lru.begin()});
177
43
        if (g_glob_regex_cache.size() > kGlobRegexCacheCapacity) {
178
0
            const std::string& evict_key = g_glob_regex_cache_lru.back();
179
0
            g_glob_regex_cache.erase(evict_key);
180
0
            g_glob_regex_cache_lru.pop_back();
181
0
        }
182
43
    }
183
0
    return compiled;
184
43
}
185
186
// Convert a restricted glob pattern into a regex.
187
// Supported: '*', '?', '[...]', '\\' escape. Others are treated as literals.
188
86
Status glob_to_regex(const std::string& glob_pattern, std::string* regex_pattern) {
189
86
    regex_pattern->clear();
190
86
    regex_pattern->append("^");
191
86
    bool is_escaped = false;
192
86
    size_t pattern_length = glob_pattern.size();
193
384
    for (size_t index = 0; index < pattern_length; ++index) {
194
302
        char current_char = glob_pattern[index];
195
302
        if (is_escaped) {
196
9
            append_escaped_regex_char(regex_pattern, current_char);
197
9
            is_escaped = false;
198
9
            continue;
199
9
        }
200
293
        if (current_char == '\\') {
201
13
            is_escaped = true;
202
13
            continue;
203
13
        }
204
280
        if (current_char == '*') {
205
16
            regex_pattern->append(".*");
206
16
            continue;
207
16
        }
208
264
        if (current_char == '?') {
209
13
            regex_pattern->append(".");
210
13
            continue;
211
13
        }
212
251
        if (current_char == '[') {
213
32
            size_t class_index = index + 1;
214
32
            bool class_closed = false;
215
32
            bool is_class_escaped = false;
216
32
            std::string class_buffer;
217
32
            if (class_index < pattern_length &&
218
32
                (glob_pattern[class_index] == '!' || glob_pattern[class_index] == '^')) {
219
9
                class_buffer.push_back('^');
220
9
                ++class_index;
221
9
            }
222
95
            for (; class_index < pattern_length; ++class_index) {
223
91
                char class_char = glob_pattern[class_index];
224
91
                if (is_class_escaped) {
225
10
                    class_buffer.push_back(class_char);
226
10
                    is_class_escaped = false;
227
10
                    continue;
228
10
                }
229
81
                if (class_char == '\\') {
230
10
                    is_class_escaped = true;
231
10
                    continue;
232
10
                }
233
71
                if (class_char == ']') {
234
28
                    class_closed = true;
235
28
                    break;
236
28
                }
237
43
                class_buffer.push_back(class_char);
238
43
            }
239
32
            if (!class_closed) {
240
4
                return Status::InvalidArgument("Unclosed character class in glob pattern: {}",
241
4
                                               glob_pattern);
242
4
            }
243
28
            regex_pattern->append("[");
244
28
            regex_pattern->append(class_buffer);
245
28
            regex_pattern->append("]");
246
28
            index = class_index;
247
28
            continue;
248
32
        }
249
219
        append_escaped_regex_char(regex_pattern, current_char);
250
219
    }
251
82
    if (is_escaped) {
252
4
        append_escaped_regex_char(regex_pattern, '\\');
253
4
    }
254
82
    regex_pattern->append("$");
255
82
    return Status::OK();
256
86
}
257
258
174
bool glob_match_re2(const std::string& glob_pattern, const std::string& candidate_path) {
259
174
    auto compiled = get_or_build_re2(glob_pattern);
260
174
    if (compiled == nullptr) {
261
5
        return false;
262
5
    }
263
169
    return RE2::FullMatch(candidate_path, *compiled);
264
174
}
265
266
// NestedGroup's physical children and offsets are produced by NestedGroupWriteProvider, not by
267
// appending TabletSchema extracted columns here. This predicate keeps only ordinary Variant paths
268
// that are outside the NG tree, for example `v.owner` beside `v.items[*]`.
269
0
bool is_regular_path_outside_nested_group(const PathInData& path) {
270
0
    const std::string& relative_path = path.get_path();
271
0
    return !relative_path.empty() && !path.get_is_typed() && !path.has_nested_part() &&
272
0
           !segment_v2::contains_nested_group_marker(relative_path) &&
273
0
           !segment_v2::is_root_nested_group_path(relative_path) &&
274
0
           relative_path != SPARSE_COLUMN_PATH &&
275
0
           relative_path.find(DOC_VALUE_COLUMN_PATH) == std::string::npos;
276
0
}
277
278
bool should_materialize_nested_group_regular_subcolumns(
279
        const TabletColumnPtr& column,
280
7
        const std::unordered_map<int32_t, VariantExtendedInfo>& uid_to_variant_extended_info) {
281
7
    const auto info_it = uid_to_variant_extended_info.find(column->unique_id());
282
7
    return column->variant_enable_nested_group() ||
283
7
           (info_it != uid_to_variant_extended_info.end() && info_it->second.has_nested_group);
284
7
}
285
286
std::unordered_set<int32_t> collect_nested_group_compaction_root_uids(
287
        const TabletSchemaSPtr& target,
288
41
        const std::unordered_map<int32_t, VariantExtendedInfo>& uid_to_variant_extended_info) {
289
41
    std::unordered_set<int32_t> root_uids;
290
828
    for (const TabletColumnPtr& column : target->columns()) {
291
828
        if (column->is_variant_type() && should_materialize_nested_group_regular_subcolumns(
292
7
                                                 column, uid_to_variant_extended_info)) {
293
1
            root_uids.insert(column->unique_id());
294
1
        }
295
828
    }
296
41
    return root_uids;
297
41
}
298
299
PathToDataTypes collect_regular_types_outside_nested_group(
300
1
        const VariantExtendedInfo& extended_info) {
301
1
    PathToDataTypes regular_path_to_data_types;
302
1
    for (const auto& [path, data_types] : extended_info.path_to_data_types) {
303
0
        if (!is_regular_path_outside_nested_group(path)) {
304
0
            continue;
305
0
        }
306
0
        regular_path_to_data_types.emplace(path, data_types);
307
0
    }
308
1
    return regular_path_to_data_types;
309
1
}
310
311
14
size_t get_number_of_dimensions(const IDataType& type) {
312
14
    if (const auto* type_array = typeid_cast<const DataTypeArray*>(&type)) {
313
4
        return type_array->get_number_of_dimensions();
314
4
    }
315
10
    return 0;
316
14
}
317
3
size_t get_number_of_dimensions(const IColumn& column) {
318
3
    if (const auto* column_array = check_and_get_column<ColumnArray>(column)) {
319
2
        return column_array->get_number_of_dimensions();
320
2
    }
321
1
    return 0;
322
3
}
323
324
980
DataTypePtr get_base_type_of_array(const DataTypePtr& type) {
325
    /// Get raw pointers to avoid extra copying of type pointers.
326
980
    const DataTypeArray* last_array = nullptr;
327
980
    const auto* current_type = type.get();
328
980
    if (const auto* nullable = typeid_cast<const DataTypeNullable*>(current_type)) {
329
976
        current_type = nullable->get_nested_type().get();
330
976
    }
331
998
    while (const auto* type_array = typeid_cast<const DataTypeArray*>(current_type)) {
332
18
        current_type = type_array->get_nested_type().get();
333
18
        last_array = type_array;
334
18
        if (const auto* nullable = typeid_cast<const DataTypeNullable*>(current_type)) {
335
12
            current_type = nullable->get_nested_type().get();
336
12
        }
337
18
    }
338
980
    return last_array ? last_array->get_nested_type() : type;
339
980
}
340
341
49.1k
Status cast_column(const ColumnWithTypeAndName& arg, const DataTypePtr& type, ColumnPtr* result) {
342
49.1k
    ColumnsWithTypeAndName arguments {arg, {nullptr, type, type->get_name()}};
343
344
    // To prevent from null info lost, we should not call function since the function framework will wrap
345
    // nullable to Variant instead of the root of Variant
346
    // correct output: Nullable(Array(int)) -> Nullable(Variant(Nullable(Array(int))))
347
    // incorrect output: Nullable(Array(int)) -> Nullable(Variant(Array(int)))
348
49.1k
    if (type->get_primitive_type() == TYPE_VARIANT) {
349
        // If source column is variant, so the nullable info is different from dst column
350
3
        if (arg.type->get_primitive_type() == TYPE_VARIANT) {
351
1
            *result = type->is_nullable() ? make_nullable(arg.column) : remove_nullable(arg.column);
352
1
            return Status::OK();
353
1
        }
354
        // set variant root column/type to from column/type
355
3
        CHECK(arg.column->is_nullable());
356
2
        auto to_type = remove_nullable(type);
357
2
        const auto& data_type_object = assert_cast<const DataTypeVariant&>(*to_type);
358
2
        auto variant = ColumnVariant::create(data_type_object.variant_max_subcolumns_count(),
359
2
                                             data_type_object.enable_doc_mode());
360
361
2
        variant->create_root(arg.type, arg.column->assume_mutable());
362
2
        ColumnPtr nullable = ColumnNullable::create(
363
2
                variant->get_ptr(),
364
2
                check_and_get_column<ColumnNullable>(arg.column.get())->get_null_map_column_ptr());
365
2
        *result = type->is_nullable() ? nullable : variant->get_ptr();
366
2
        return Status::OK();
367
3
    }
368
369
49.1k
    auto function = SimpleFunctionFactory::instance().get_function("CAST", arguments, type);
370
49.1k
    if (!function) {
371
0
        return Status::InternalError("Not found cast function {} to {}", arg.type->get_name(),
372
0
                                     type->get_name());
373
0
    }
374
49.1k
    Block tmp_block {arguments};
375
49.1k
    uint32_t result_column = cast_set<uint32_t>(tmp_block.columns());
376
49.1k
    RuntimeState state;
377
49.1k
    auto ctx = FunctionContext::create_context(&state, {}, {});
378
379
49.1k
    if (arg.type->get_primitive_type() == INVALID_TYPE) {
380
        // cast from nothing to any type should result in nulls
381
643
        *result = type->create_column_const_with_default_value(arg.column->size())
382
643
                          ->convert_to_full_column_if_const();
383
643
        return Status::OK();
384
643
    }
385
386
    // We convert column string to jsonb type just add a string jsonb field to dst column instead of parse
387
    // each line in original string column.
388
48.4k
    ctx->set_string_as_jsonb_string(true);
389
48.4k
    ctx->set_jsonb_string_as_string(true);
390
48.4k
    tmp_block.insert({nullptr, type, arg.name});
391
    // TODO(lihangyu): we should handle this error in strict mode
392
48.4k
    if (!function->execute(ctx.get(), tmp_block, {0}, result_column, arg.column->size())) {
393
0
        LOG_EVERY_N(WARNING, 100) << fmt::format("cast from {} to {}", arg.type->get_name(),
394
0
                                                 type->get_name());
395
0
        *result = type->create_column_const_with_default_value(arg.column->size())
396
0
                          ->convert_to_full_column_if_const();
397
0
        return Status::OK();
398
0
    }
399
48.4k
    *result = tmp_block.get_by_position(result_column).column->convert_to_full_column_if_const();
400
48.4k
    VLOG_DEBUG << fmt::format("{} before convert {}, after convert {}", arg.name,
401
0
                              arg.column->get_name(), (*result)->get_name());
402
48.4k
    return Status::OK();
403
48.4k
}
404
405
void get_column_by_type(const DataTypePtr& data_type, const std::string& name, TabletColumn& column,
406
2.01k
                        const ExtraInfo& ext_info) {
407
2.01k
    column.set_name(name);
408
2.01k
    column.set_type(data_type->get_storage_field_type());
409
2.01k
    if (ext_info.unique_id >= 0) {
410
4
        column.set_unique_id(ext_info.unique_id);
411
4
    }
412
2.01k
    if (ext_info.parent_unique_id >= 0) {
413
1.00k
        column.set_parent_unique_id(ext_info.parent_unique_id);
414
1.00k
    }
415
2.01k
    if (!ext_info.path_info.empty()) {
416
1.00k
        column.set_path_info(ext_info.path_info);
417
1.00k
    }
418
2.01k
    if (data_type->is_nullable()) {
419
998
        const auto& real_type = static_cast<const DataTypeNullable&>(*data_type);
420
998
        column.set_is_nullable(true);
421
998
        get_column_by_type(real_type.get_nested_type(), name, column, {});
422
998
        return;
423
998
    }
424
1.02k
    if (data_type->get_primitive_type() == PrimitiveType::TYPE_ARRAY) {
425
13
        TabletColumn child;
426
13
        get_column_by_type(assert_cast<const DataTypeArray*>(data_type.get())->get_nested_type(),
427
13
                           "", child, {});
428
13
        column.set_length(TabletColumn::get_field_length_by_type(TPrimitiveType::ARRAY, 0));
429
13
        column.add_sub_column(child);
430
13
        return;
431
13
    }
432
1.00k
    if (data_type->get_primitive_type() == PrimitiveType::TYPE_VARIANT) {
433
0
        const auto* dt_variant = assert_cast<const DataTypeVariant*>(data_type.get());
434
0
        column.set_variant_max_subcolumns_count(dt_variant->variant_max_subcolumns_count());
435
0
        column.set_variant_enable_doc_mode(dt_variant->enable_doc_mode());
436
0
        return;
437
0
    }
438
    // size is not fixed when type is string or json
439
1.00k
    if (is_string_type(data_type->get_primitive_type()) ||
440
1.00k
        data_type->get_primitive_type() == TYPE_JSONB) {
441
366
        column.set_length(INT_MAX);
442
366
        return;
443
366
    }
444
445
641
    PrimitiveType type = data_type->get_primitive_type();
446
641
    if (is_int_or_bool(type) || is_string_type(type) || is_float_or_double(type) || is_ip(type) ||
447
641
        is_date_or_datetime(type) || type == PrimitiveType::TYPE_DATEV2) {
448
638
        column.set_length(cast_set<int32_t>(data_type->get_size_of_value_in_memory()));
449
638
        return;
450
638
    }
451
3
    if (is_decimal(type)) {
452
1
        column.set_precision(data_type->get_precision());
453
1
        column.set_frac(data_type->get_scale());
454
1
        return;
455
1
    }
456
    // datetimev2 needs scale
457
2
    if (type == PrimitiveType::TYPE_DATETIMEV2 || type == PrimitiveType::TYPE_TIMESTAMPTZ) {
458
1
        column.set_precision(-1);
459
1
        column.set_frac(data_type->get_scale());
460
1
        return;
461
1
    }
462
463
1
    throw doris::Exception(doris::ErrorCode::INTERNAL_ERROR,
464
1
                           "unexcepted data column type: {}, column name is: {}",
465
1
                           data_type->get_name(), name);
466
2
}
467
468
TabletColumn get_column_by_type(const DataTypePtr& data_type, const std::string& name,
469
1.00k
                                const ExtraInfo& ext_info) {
470
1.00k
    TabletColumn result;
471
1.00k
    get_column_by_type(data_type, name, result, ext_info);
472
1.00k
    return result;
473
1.00k
}
474
475
// check if two paths which same prefix have different structure
476
static bool has_different_structure_in_same_path(const PathInData::Parts& lhs,
477
9.00k
                                                 const PathInData::Parts& rhs) {
478
9.00k
    if (lhs.size() != rhs.size()) {
479
1
        return false; // different size means different structure
480
1
    }
481
    // Since we group by path string, lhs and rhs must have the same size and keys
482
    // We only need to check if they have different nested structure
483
35.9k
    for (size_t i = 0; i < lhs.size(); ++i) {
484
26.9k
        if (lhs[i] != rhs[i]) {
485
5
            VLOG_DEBUG << fmt::format(
486
0
                    "Check different structure: {} vs {}, lhs[i].is_nested: {}, rhs[i].is_nested: "
487
0
                    "{}",
488
0
                    lhs[i].key, rhs[i].key, lhs[i].is_nested, rhs[i].is_nested);
489
5
            return true;
490
5
        }
491
26.9k
    }
492
8.99k
    return false;
493
8.99k
}
494
495
3.02k
Status check_variant_has_no_ambiguous_paths(const PathsInData& tuple_paths) {
496
    // Group paths by their string representation to reduce comparisons
497
3.02k
    std::unordered_map<std::string, std::vector<size_t>> path_groups;
498
499
24.0k
    for (size_t i = 0; i < tuple_paths.size(); ++i) {
500
        // same path should have same structure, so we group them by path
501
21.0k
        path_groups[tuple_paths[i].get_path()].push_back(i);
502
        // print part of tuple_paths[i]
503
21.0k
        VLOG_DEBUG << "tuple_paths[i]: " << tuple_paths[i].get_path();
504
21.0k
    }
505
506
    // Only compare paths within the same group
507
12.0k
    for (const auto& [path_str, indices] : path_groups) {
508
12.0k
        if (indices.size() <= 1) {
509
3.02k
            continue; // No conflicts possible
510
3.02k
        }
511
512
        // Compare all pairs within this group
513
26.9k
        for (size_t i = 0; i < indices.size(); ++i) {
514
26.9k
            for (size_t j = 0; j < i; ++j) {
515
9.00k
                if (has_different_structure_in_same_path(tuple_paths[indices[i]].get_parts(),
516
9.00k
                                                         tuple_paths[indices[j]].get_parts())) {
517
5
                    return Status::DataQualityError(
518
5
                            "Ambiguous paths: {} vs {} with different nested part {} vs {}",
519
5
                            tuple_paths[indices[i]].get_path(), tuple_paths[indices[j]].get_path(),
520
5
                            tuple_paths[indices[i]].has_nested_part(),
521
5
                            tuple_paths[indices[j]].has_nested_part());
522
5
                }
523
9.00k
            }
524
18.0k
        }
525
9.00k
    }
526
3.01k
    return Status::OK();
527
3.02k
}
528
529
Status update_least_schema_internal(const std::map<PathInData, DataTypes>& subcolumns_types,
530
                                    TabletSchemaSPtr& common_schema, int32_t variant_col_unique_id,
531
                                    const std::map<std::string, TabletColumnPtr>& typed_columns,
532
8
                                    std::set<PathInData>* path_set) {
533
8
    PathsInData tuple_paths;
534
8
    DataTypes tuple_types;
535
8
    CHECK(common_schema.use_count() == 1);
536
    // Get the least common type for all paths.
537
8
    for (const auto& [key, subtypes] : subcolumns_types) {
538
7
        assert(!subtypes.empty());
539
7
        if (key.get_path() == ColumnVariant::COLUMN_NAME_DUMMY) {
540
0
            continue;
541
0
        }
542
7
        size_t first_dim = get_number_of_dimensions(*subtypes[0]);
543
7
        tuple_paths.emplace_back(key);
544
10
        for (size_t i = 1; i < subtypes.size(); ++i) {
545
4
            if (first_dim != get_number_of_dimensions(*subtypes[i])) {
546
1
                tuple_types.emplace_back(make_nullable(std::make_shared<DataTypeJsonb>()));
547
1
                LOG(INFO) << fmt::format(
548
1
                        "Uncompatible types of subcolumn '{}': {} and {}, cast to JSONB",
549
1
                        key.get_path(), subtypes[0]->get_name(), subtypes[i]->get_name());
550
1
                break;
551
1
            }
552
4
        }
553
7
        if (tuple_paths.size() == tuple_types.size()) {
554
1
            continue;
555
1
        }
556
6
        DataTypePtr common_type;
557
6
        get_least_supertype_jsonb(subtypes, &common_type);
558
6
        if (!common_type->is_nullable()) {
559
3
            common_type = make_nullable(common_type);
560
3
        }
561
6
        tuple_types.emplace_back(common_type);
562
6
    }
563
8
    CHECK_EQ(tuple_paths.size(), tuple_types.size());
564
565
    // Append all common type columns of this variant
566
15
    for (int i = 0; i < tuple_paths.size(); ++i) {
567
7
        TabletColumn common_column;
568
        // typed path not contains root part
569
7
        auto path_without_root = tuple_paths[i].copy_pop_front().get_path();
570
7
        if (typed_columns.contains(path_without_root) && !tuple_paths[i].has_nested_part()) {
571
0
            common_column = *typed_columns.at(path_without_root);
572
            // parent unique id and path may not be init in write path
573
0
            common_column.set_parent_unique_id(variant_col_unique_id);
574
0
            common_column.set_path_info(tuple_paths[i]);
575
0
            common_column.set_name(tuple_paths[i].get_path());
576
7
        } else {
577
            // const std::string& column_name = variant_col_name + "." + tuple_paths[i].get_path();
578
7
            get_column_by_type(tuple_types[i], tuple_paths[i].get_path(), common_column,
579
7
                               ExtraInfo {.unique_id = -1,
580
7
                                          .parent_unique_id = variant_col_unique_id,
581
7
                                          .path_info = tuple_paths[i]});
582
7
        }
583
7
        common_schema->append_column(common_column);
584
7
        if (path_set != nullptr) {
585
4
            path_set->insert(tuple_paths[i]);
586
4
        }
587
7
    }
588
8
    return Status::OK();
589
8
}
590
591
Status update_least_common_schema(const std::vector<TabletSchemaSPtr>& schemas,
592
                                  TabletSchemaSPtr& common_schema, int32_t variant_col_unique_id,
593
7
                                  std::set<PathInData>* path_set) {
594
7
    std::map<std::string, TabletColumnPtr> typed_columns;
595
7
    for (const TabletColumnPtr& col :
596
7
         common_schema->column_by_uid(variant_col_unique_id).get_sub_columns()) {
597
2
        typed_columns[col->name()] = col;
598
2
    }
599
    // Types of subcolumns by path from all tuples.
600
7
    std::map<PathInData, DataTypes> subcolumns_types;
601
602
    // Collect all paths first to enable batch checking
603
7
    std::vector<PathInData> all_paths;
604
605
12
    for (const TabletSchemaSPtr& schema : schemas) {
606
16
        for (const TabletColumnPtr& col : schema->columns()) {
607
            // Get subcolumns of this variant
608
16
            if (col->has_path_info() && col->parent_unique_id() >= 0 &&
609
16
                col->parent_unique_id() == variant_col_unique_id) {
610
6
                subcolumns_types[*col->path_info_ptr()].emplace_back(
611
6
                        DataTypeFactory::instance().create_data_type(*col, col->is_nullable()));
612
6
                all_paths.push_back(*col->path_info_ptr());
613
6
            }
614
16
        }
615
12
    }
616
617
    // Batch check for conflicts
618
7
    RETURN_IF_ERROR(check_variant_has_no_ambiguous_paths(all_paths));
619
620
7
    return update_least_schema_internal(subcolumns_types, common_schema, variant_col_unique_id,
621
7
                                        typed_columns, path_set);
622
7
}
623
624
// Keep variant subcolumn BF support aligned with FE DDL checks.
625
1.01k
bool is_bf_supported_by_fe_for_variant_subcolumn(FieldType type) {
626
1.01k
    switch (type) {
627
0
    case FieldType::OLAP_FIELD_TYPE_SMALLINT:
628
7
    case FieldType::OLAP_FIELD_TYPE_INT:
629
628
    case FieldType::OLAP_FIELD_TYPE_BIGINT:
630
628
    case FieldType::OLAP_FIELD_TYPE_LARGEINT:
631
628
    case FieldType::OLAP_FIELD_TYPE_CHAR:
632
628
    case FieldType::OLAP_FIELD_TYPE_VARCHAR:
633
993
    case FieldType::OLAP_FIELD_TYPE_STRING:
634
993
    case FieldType::OLAP_FIELD_TYPE_DATE:
635
993
    case FieldType::OLAP_FIELD_TYPE_DATETIME:
636
1.00k
    case FieldType::OLAP_FIELD_TYPE_DATEV2:
637
1.00k
    case FieldType::OLAP_FIELD_TYPE_DATETIMEV2:
638
1.00k
    case FieldType::OLAP_FIELD_TYPE_TIMESTAMPTZ:
639
1.00k
    case FieldType::OLAP_FIELD_TYPE_DECIMAL:
640
1.00k
    case FieldType::OLAP_FIELD_TYPE_DECIMAL32:
641
1.00k
    case FieldType::OLAP_FIELD_TYPE_DECIMAL64:
642
1.00k
    case FieldType::OLAP_FIELD_TYPE_DECIMAL128I:
643
1.00k
    case FieldType::OLAP_FIELD_TYPE_DECIMAL256:
644
1.00k
    case FieldType::OLAP_FIELD_TYPE_IPV4:
645
1.00k
    case FieldType::OLAP_FIELD_TYPE_IPV6:
646
1.00k
        return true;
647
16
    default:
648
16
        return false;
649
1.01k
    }
650
1.01k
}
651
652
void inherit_column_attributes(const TabletColumn& source, TabletColumn& target,
653
1.01k
                               TabletSchemaSPtr* target_schema) {
654
1.01k
    if (!target.is_extracted_column()) {
655
0
        return;
656
0
    }
657
1.01k
    target.set_aggregation_method(source.aggregation());
658
659
    // 1. bloom filter
660
1.01k
    if (is_bf_supported_by_fe_for_variant_subcolumn(target.type())) {
661
1.00k
        target.set_is_bf_column(source.is_bf_column());
662
1.00k
    }
663
664
1.01k
    if (!target_schema) {
665
1.00k
        return;
666
1.00k
    }
667
668
    // 2. inverted index
669
8
    TabletIndexes indexes_to_add;
670
8
    auto source_indexes = (*target_schema)->inverted_indexs(source.unique_id());
671
    // if target is variant type, we need to inherit all indexes
672
    // because this schema is a read schema from fe
673
8
    if (target.is_variant_type()) {
674
0
        for (auto& index : source_indexes) {
675
0
            auto index_info = std::make_shared<TabletIndex>(*index);
676
0
            index_info->set_escaped_escaped_index_suffix_path(target.path_info_ptr()->get_path());
677
0
            indexes_to_add.emplace_back(std::move(index_info));
678
0
        }
679
8
    } else {
680
8
        inherit_index(source_indexes, indexes_to_add, target);
681
8
    }
682
8
    auto target_indexes = (*target_schema)
683
8
                                  ->inverted_indexs(target.parent_unique_id(),
684
8
                                                    target.path_info_ptr()->get_path());
685
8
    if (target_indexes.empty()) {
686
8
        for (auto& index_info : indexes_to_add) {
687
8
            (*target_schema)->append_index(std::move(*index_info));
688
8
        }
689
8
    }
690
691
    // 3. TODO: gnragm bf index
692
8
}
693
694
6
void inherit_column_attributes(TabletSchemaSPtr& schema) {
695
    // Add index meta if extracted column is missing index meta
696
23
    for (size_t i = 0; i < schema->num_columns(); ++i) {
697
17
        TabletColumn& col = schema->mutable_column(i);
698
17
        if (!col.is_extracted_column()) {
699
9
            continue;
700
9
        }
701
8
        if (schema->field_index(col.parent_unique_id()) == -1) {
702
            // parent column is missing, maybe dropped
703
0
            continue;
704
0
        }
705
8
        inherit_column_attributes(schema->column_by_uid(col.parent_unique_id()), col, &schema);
706
8
    }
707
6
}
708
709
Status get_least_common_schema(const std::vector<TabletSchemaSPtr>& schemas,
710
                               const TabletSchemaSPtr& base_schema, TabletSchemaSPtr& output_schema,
711
4
                               bool check_schema_size) {
712
4
    std::vector<int32_t> variant_column_unique_id;
713
    // Construct a schema excluding the extracted columns and gather unique identifiers for variants.
714
    // Ensure that the output schema also excludes these extracted columns. This approach prevents
715
    // duplicated paths following the update_least_common_schema process.
716
4
    auto build_schema_without_extracted_columns = [&](const TabletSchemaSPtr& base_schema) {
717
4
        output_schema = std::make_shared<TabletSchema>();
718
        // not copy columns but only shadow copy other attributes
719
4
        output_schema->shawdow_copy_without_columns(*base_schema);
720
        // Get all columns without extracted columns and collect variant col unique id
721
7
        for (const TabletColumnPtr& col : base_schema->columns()) {
722
7
            if (col->is_variant_type()) {
723
4
                variant_column_unique_id.push_back(col->unique_id());
724
4
            }
725
7
            if (!col->is_extracted_column()) {
726
4
                output_schema->append_column(*col);
727
4
            }
728
7
        }
729
4
    };
730
4
    if (base_schema == nullptr) {
731
        // Pick tablet schema with max schema version
732
4
        auto max_version_schema =
733
4
                *std::max_element(schemas.cbegin(), schemas.cend(),
734
4
                                  [](const TabletSchemaSPtr a, const TabletSchemaSPtr b) {
735
2
                                      return a->schema_version() < b->schema_version();
736
2
                                  });
737
4
        CHECK(max_version_schema);
738
4
        build_schema_without_extracted_columns(max_version_schema);
739
4
    } else {
740
        // use input base_schema schema as base schema
741
0
        build_schema_without_extracted_columns(base_schema);
742
0
    }
743
744
4
    for (int32_t unique_id : variant_column_unique_id) {
745
4
        std::set<PathInData> path_set;
746
4
        RETURN_IF_ERROR(update_least_common_schema(schemas, output_schema, unique_id, &path_set));
747
4
    }
748
749
4
    inherit_column_attributes(output_schema);
750
4
    if (check_schema_size &&
751
4
        output_schema->columns().size() > config::variant_max_merged_tablet_schema_size) {
752
0
        return Status::DataQualityError("Reached max column size limit {}",
753
0
                                        config::variant_max_merged_tablet_schema_size);
754
0
    }
755
756
4
    return Status::OK();
757
4
}
758
759
// sort by paths in lexicographical order
760
613
ColumnVariant::Subcolumns get_sorted_subcolumns(const ColumnVariant::Subcolumns& subcolumns) {
761
    // sort by paths in lexicographical order
762
613
    ColumnVariant::Subcolumns sorted = subcolumns;
763
3.93k
    std::sort(sorted.begin(), sorted.end(), [](const auto& lhsItem, const auto& rhsItem) {
764
3.93k
        return lhsItem->path < rhsItem->path;
765
3.93k
    });
766
613
    return sorted;
767
613
}
768
769
bool has_schema_index_diff(const TabletSchema* new_schema, const TabletSchema* old_schema,
770
4
                           int32_t new_col_idx, int32_t old_col_idx) {
771
4
    const auto& column_new = new_schema->column(new_col_idx);
772
4
    const auto& column_old = old_schema->column(old_col_idx);
773
774
4
    if (column_new.is_bf_column() != column_old.is_bf_column()) {
775
2
        return true;
776
2
    }
777
778
2
    auto new_schema_inverted_indexs = new_schema->inverted_indexs(column_new);
779
2
    auto old_schema_inverted_indexs = old_schema->inverted_indexs(column_old);
780
781
2
    if (new_schema_inverted_indexs.size() != old_schema_inverted_indexs.size()) {
782
1
        return true;
783
1
    }
784
785
2
    for (size_t i = 0; i < new_schema_inverted_indexs.size(); ++i) {
786
1
        if (!new_schema_inverted_indexs[i]->is_same_except_id(old_schema_inverted_indexs[i])) {
787
0
            return true;
788
0
        }
789
1
    }
790
791
1
    return false;
792
1
}
793
794
601
TabletColumn create_sparse_column(const TabletColumn& variant) {
795
601
    TabletColumn res;
796
601
    res.set_name(variant.name_lower_case() + "." + SPARSE_COLUMN_PATH);
797
601
    res.set_type(FieldType::OLAP_FIELD_TYPE_MAP);
798
601
    res.set_aggregation_method(variant.aggregation());
799
601
    res.set_path_info(PathInData {variant.name_lower_case() + "." + SPARSE_COLUMN_PATH});
800
601
    res.set_parent_unique_id(variant.unique_id());
801
    // set default value to "NULL" DefaultColumnIterator will call insert_many_defaults
802
601
    res.set_default_value("NULL");
803
601
    TabletColumn child_tcolumn;
804
601
    child_tcolumn.set_type(FieldType::OLAP_FIELD_TYPE_STRING);
805
601
    res.add_sub_column(child_tcolumn);
806
601
    res.add_sub_column(child_tcolumn);
807
601
    return res;
808
601
}
809
810
29
TabletColumn create_sparse_shard_column(const TabletColumn& variant, int bucket_index) {
811
29
    TabletColumn res;
812
29
    std::string name = variant.name_lower_case() + "." + SPARSE_COLUMN_PATH + ".b" +
813
29
                       std::to_string(bucket_index);
814
29
    res.set_name(name);
815
29
    res.set_type(FieldType::OLAP_FIELD_TYPE_MAP);
816
29
    res.set_aggregation_method(variant.aggregation());
817
29
    res.set_parent_unique_id(variant.unique_id());
818
29
    res.set_default_value("NULL");
819
29
    PathInData path(name);
820
29
    res.set_path_info(path);
821
29
    TabletColumn child_tcolumn;
822
29
    child_tcolumn.set_type(FieldType::OLAP_FIELD_TYPE_STRING);
823
29
    res.add_sub_column(child_tcolumn);
824
29
    res.add_sub_column(child_tcolumn);
825
29
    return res;
826
29
}
827
828
28
TabletColumn create_doc_value_column(const TabletColumn& variant, int bucket_index) {
829
28
    TabletColumn res;
830
28
    std::string name = variant.name_lower_case() + "." + DOC_VALUE_COLUMN_PATH + ".b" +
831
28
                       std::to_string(bucket_index);
832
28
    res.set_name(name);
833
28
    res.set_type(FieldType::OLAP_FIELD_TYPE_MAP);
834
28
    res.set_aggregation_method(variant.aggregation());
835
28
    res.set_parent_unique_id(variant.unique_id());
836
28
    res.set_default_value("NULL");
837
28
    res.set_path_info(PathInData {name});
838
839
28
    TabletColumn child_tcolumn;
840
28
    child_tcolumn.set_type(FieldType::OLAP_FIELD_TYPE_STRING);
841
28
    res.add_sub_column(child_tcolumn);
842
28
    res.add_sub_column(child_tcolumn);
843
28
    return res;
844
28
}
845
846
5.34k
uint32_t variant_binary_shard_of(const StringRef& path, uint32_t bucket_num) {
847
5.34k
    if (bucket_num <= 1) {
848
2
        return 0;
849
2
    }
850
5.34k
    SipHash hash;
851
5.34k
    hash.update(path.data, path.size);
852
5.34k
    uint64_t h = hash.get64();
853
5.34k
    return static_cast<uint32_t>(h % bucket_num);
854
5.34k
}
855
856
Status VariantCompactionUtil::aggregate_path_to_stats(
857
        const RowsetSharedPtr& rs,
858
19
        std::unordered_map<int32_t, PathToNoneNullValues>* uid_to_path_stats) {
859
19
    SegmentCacheHandle segment_cache;
860
19
    RETURN_IF_ERROR(SegmentLoader::instance()->load_segments(
861
19
            std::static_pointer_cast<BetaRowset>(rs), &segment_cache));
862
863
95
    for (const auto& column : rs->tablet_schema()->columns()) {
864
95
        if (!column->is_variant_type() || column->unique_id() < 0) {
865
57
            continue;
866
57
        }
867
38
        if (!should_check_variant_path_stats(*column)) {
868
0
            continue;
869
0
        }
870
178
        for (const auto& segment : segment_cache.get_segments()) {
871
178
            std::shared_ptr<ColumnReader> column_reader;
872
178
            OlapReaderStatistics stats;
873
178
            RETURN_IF_ERROR(
874
178
                    segment->get_column_reader(column->unique_id(), &column_reader, &stats));
875
178
            if (!column_reader) {
876
0
                continue;
877
0
            }
878
879
178
            CHECK(column_reader->get_meta_type() == FieldType::OLAP_FIELD_TYPE_VARIANT);
880
178
            auto* variant_column_reader =
881
178
                    assert_cast<segment_v2::VariantColumnReader*>(column_reader.get());
882
            // load external meta before getting stats
883
178
            RETURN_IF_ERROR(variant_column_reader->load_external_meta_once());
884
178
            const auto* source_stats = variant_column_reader->get_stats();
885
178
            CHECK(source_stats);
886
887
            // agg path -> stats
888
1.24k
            for (const auto& [path, size] : source_stats->sparse_column_non_null_size) {
889
1.24k
                (*uid_to_path_stats)[column->unique_id()][path] += size;
890
1.24k
            }
891
892
524
            for (const auto& [path, size] : source_stats->subcolumns_non_null_size) {
893
524
                (*uid_to_path_stats)[column->unique_id()][path] += size;
894
524
            }
895
178
        }
896
38
    }
897
19
    return Status::OK();
898
19
}
899
900
Status VariantCompactionUtil::aggregate_variant_extended_info(
901
        const RowsetSharedPtr& rs,
902
10
        std::unordered_map<int32_t, VariantExtendedInfo>* uid_to_variant_extended_info) {
903
10
    SegmentCacheHandle segment_cache;
904
10
    RETURN_IF_ERROR(SegmentLoader::instance()->load_segments(
905
10
            std::static_pointer_cast<BetaRowset>(rs), &segment_cache));
906
907
34
    for (const auto& column : rs->tablet_schema()->columns()) {
908
34
        if (!column->is_variant_type()) {
909
18
            continue;
910
18
        }
911
16
        auto& extended_info = (*uid_to_variant_extended_info)[column->unique_id()];
912
16
        if (column->variant_enable_nested_group()) {
913
0
            extended_info.has_nested_group = true;
914
0
        }
915
62
        for (const auto& segment : segment_cache.get_segments()) {
916
62
            std::shared_ptr<ColumnReader> column_reader;
917
62
            OlapReaderStatistics stats;
918
62
            RETURN_IF_ERROR(
919
62
                    segment->get_column_reader(column->unique_id(), &column_reader, &stats));
920
62
            if (!column_reader) {
921
0
                continue;
922
0
            }
923
924
62
            CHECK(column_reader->get_meta_type() == FieldType::OLAP_FIELD_TYPE_VARIANT);
925
62
            auto* variant_column_reader =
926
62
                    assert_cast<segment_v2::VariantColumnReader*>(column_reader.get());
927
            // load external meta before getting stats
928
62
            RETURN_IF_ERROR(variant_column_reader->load_external_meta_once());
929
62
            const auto* source_stats = variant_column_reader->get_stats();
930
62
            CHECK(source_stats);
931
932
62
            if (!column->variant_enable_nested_group()) {
933
                // NG roots still need type metadata for regular subpaths such as `v.owner`,
934
                // but their compaction schema should not be driven by flat path stats.
935
420
                for (const auto& [path, size] : source_stats->sparse_column_non_null_size) {
936
420
                    extended_info.path_to_none_null_values[path] += size;
937
420
                    extended_info.sparse_paths.emplace(path);
938
420
                }
939
940
174
                for (const auto& [path, size] : source_stats->subcolumns_non_null_size) {
941
174
                    extended_info.path_to_none_null_values[path] += size;
942
174
                }
943
62
            }
944
945
            //2. agg path -> schema
946
62
            variant_column_reader->get_subcolumns_types(&extended_info.path_to_data_types);
947
948
            // 3. extract typed paths
949
62
            variant_column_reader->get_typed_paths(&extended_info.typed_paths);
950
951
            // 4. extract nested paths
952
62
            if (!column->variant_enable_nested_group()) {
953
62
                variant_column_reader->get_nested_paths(&extended_info.nested_paths);
954
62
            }
955
62
        }
956
16
    }
957
10
    return Status::OK();
958
10
}
959
960
// get the subpaths and sparse paths for the variant column
961
void VariantCompactionUtil::get_subpaths(int32_t max_subcolumns_count,
962
                                         const PathToNoneNullValues& stats,
963
12
                                         TabletSchema::PathsSetInfo& paths_set_info) {
964
    // max_subcolumns_count is 0 means no limit
965
12
    if (max_subcolumns_count > 0 && stats.size() > max_subcolumns_count) {
966
6
        std::vector<std::pair<size_t, std::string_view>> paths_with_sizes;
967
6
        paths_with_sizes.reserve(stats.size());
968
48
        for (const auto& [path, size] : stats) {
969
48
            paths_with_sizes.emplace_back(size, path);
970
48
        }
971
6
        std::sort(paths_with_sizes.begin(), paths_with_sizes.end(), std::greater());
972
973
        // Select top N paths as subcolumns, remaining paths as sparse columns
974
48
        for (const auto& [size, path] : paths_with_sizes) {
975
48
            if (paths_set_info.sub_path_set.size() < max_subcolumns_count) {
976
18
                paths_set_info.sub_path_set.emplace(path);
977
30
            } else {
978
30
                paths_set_info.sparse_path_set.emplace(path);
979
30
            }
980
48
        }
981
6
        LOG(INFO) << "subpaths " << paths_set_info.sub_path_set.size() << " sparse paths "
982
6
                  << paths_set_info.sparse_path_set.size() << " variant max subcolumns count "
983
6
                  << max_subcolumns_count << " stats size " << paths_with_sizes.size();
984
6
    } else {
985
        // Apply all paths as subcolumns
986
13
        for (const auto& [path, _] : stats) {
987
13
            paths_set_info.sub_path_set.emplace(path);
988
13
        }
989
6
    }
990
12
}
991
992
Status VariantCompactionUtil::check_path_stats(const std::vector<RowsetSharedPtr>& intputs,
993
11
                                               RowsetSharedPtr output, BaseTabletSPtr tablet) {
994
11
    if (output->tablet_schema()->num_variant_columns() == 0) {
995
6
        return Status::OK();
996
6
    }
997
26
    for (const auto& rowset : intputs) {
998
130
        for (const auto& column : rowset->tablet_schema()->columns()) {
999
130
            if (column->is_variant_type() && !should_check_variant_path_stats(*column)) {
1000
0
                return Status::OK();
1001
0
            }
1002
130
        }
1003
26
    }
1004
    // check no extended schema in input rowsets
1005
26
    for (const auto& rowset : intputs) {
1006
130
        for (const auto& column : rowset->tablet_schema()->columns()) {
1007
130
            if (column->is_extracted_column()) {
1008
0
                return Status::OK();
1009
0
            }
1010
130
        }
1011
26
    }
1012
#ifndef BE_TEST
1013
    // check no extended schema in output rowset
1014
    for (const auto& column : output->tablet_schema()->columns()) {
1015
        if (column->is_extracted_column()) {
1016
            const auto& name = column->name();
1017
            if (name.find("." + DOC_VALUE_COLUMN_PATH + ".") != std::string::npos ||
1018
                name.find("." + SPARSE_COLUMN_PATH + ".") != std::string::npos ||
1019
                name.ends_with("." + SPARSE_COLUMN_PATH)) {
1020
                continue;
1021
            }
1022
            return Status::InternalError("Unexpected extracted column {} in output rowset",
1023
                                         column->name());
1024
        }
1025
    }
1026
#endif
1027
    // only check path stats for dup_keys since the rows may be merged in other models
1028
5
    if (tablet->keys_type() != KeysType::DUP_KEYS) {
1029
2
        return Status::OK();
1030
2
    }
1031
    // if there is a delete predicate in the input rowsets, we skip the path stats check
1032
15
    for (auto& rowset : intputs) {
1033
15
        if (rowset->rowset_meta()->has_delete_predicate()) {
1034
0
            return Status::OK();
1035
0
        }
1036
15
    }
1037
15
    for (const auto& column : output->tablet_schema()->columns()) {
1038
15
        if (column->is_variant_type() && !should_check_variant_path_stats(*column)) {
1039
0
            return Status::OK();
1040
0
        }
1041
15
    }
1042
3
    std::unordered_map<int32_t, PathToNoneNullValues> original_uid_to_path_stats;
1043
15
    for (const auto& rs : intputs) {
1044
15
        RETURN_IF_ERROR(aggregate_path_to_stats(rs, &original_uid_to_path_stats));
1045
15
    }
1046
3
    std::unordered_map<int32_t, PathToNoneNullValues> output_uid_to_path_stats;
1047
3
    RETURN_IF_ERROR(aggregate_path_to_stats(output, &output_uid_to_path_stats));
1048
5
    for (const auto& [uid, stats] : output_uid_to_path_stats) {
1049
5
        if (output->tablet_schema()->column_by_uid(uid).is_variant_type() &&
1050
5
            output->tablet_schema()->column_by_uid(uid).variant_enable_doc_mode()) {
1051
0
            continue;
1052
0
        }
1053
5
        if (original_uid_to_path_stats.find(uid) == original_uid_to_path_stats.end()) {
1054
0
            return Status::InternalError("Path stats not found for uid {}, tablet_id {}", uid,
1055
0
                                         tablet->tablet_id());
1056
0
        }
1057
1058
        // In input rowsets, some rowsets may have statistics values exceeding the maximum limit,
1059
        // which leads to inaccurate statistics
1060
5
        if (stats.size() > output->tablet_schema()
1061
5
                                   ->column_by_uid(uid)
1062
5
                                   .variant_max_sparse_column_statistics_size()) {
1063
            // When there is only one segment, we can ensure that the size of each path in output stats is accurate
1064
0
            if (output->num_segments() == 1) {
1065
0
                for (const auto& [path, size] : stats) {
1066
0
                    if (original_uid_to_path_stats.at(uid).find(path) ==
1067
0
                        original_uid_to_path_stats.at(uid).end()) {
1068
0
                        continue;
1069
0
                    }
1070
0
                    if (original_uid_to_path_stats.at(uid).at(path) > size) {
1071
0
                        return Status::InternalError(
1072
0
                                "Path stats not smaller for uid {} with path `{}`, input size {}, "
1073
0
                                "output "
1074
0
                                "size {}, "
1075
0
                                "tablet_id {}",
1076
0
                                uid, path, original_uid_to_path_stats.at(uid).at(path), size,
1077
0
                                tablet->tablet_id());
1078
0
                    }
1079
0
                }
1080
0
            }
1081
0
        }
1082
        // in this case, input stats is accurate, so we check the stats size and stats value
1083
5
        else {
1084
41
            for (const auto& [path, size] : stats) {
1085
41
                if (original_uid_to_path_stats.at(uid).find(path) ==
1086
41
                    original_uid_to_path_stats.at(uid).end()) {
1087
0
                    return Status::InternalError(
1088
0
                            "Path stats not found for uid {}, path {}, tablet_id {}", uid, path,
1089
0
                            tablet->tablet_id());
1090
0
                }
1091
41
                if (original_uid_to_path_stats.at(uid).at(path) != size) {
1092
1
                    return Status::InternalError(
1093
1
                            "Path stats not match for uid {} with path `{}`, input size {}, output "
1094
1
                            "size {}, "
1095
1
                            "tablet_id {}",
1096
1
                            uid, path, original_uid_to_path_stats.at(uid).at(path), size,
1097
1
                            tablet->tablet_id());
1098
1
                }
1099
41
            }
1100
5
        }
1101
5
    }
1102
1103
2
    return Status::OK();
1104
3
}
1105
1106
Status VariantCompactionUtil::get_compaction_typed_columns(
1107
        const TabletSchemaSPtr& target, const std::unordered_set<std::string>& typed_paths,
1108
        const TabletColumnPtr parent_column, TabletSchemaSPtr& output_schema,
1109
9
        TabletSchema::PathsSetInfo& paths_set_info) {
1110
9
    if (parent_column->variant_enable_typed_paths_to_sparse()) {
1111
0
        return Status::OK();
1112
0
    }
1113
9
    for (const auto& path : typed_paths) {
1114
4
        TabletSchema::SubColumnInfo sub_column_info;
1115
4
        if (generate_sub_column_info(*target, parent_column->unique_id(), path, &sub_column_info)) {
1116
3
            inherit_column_attributes(*parent_column, sub_column_info.column);
1117
3
            output_schema->append_column(sub_column_info.column);
1118
3
            paths_set_info.typed_path_set.insert({path, std::move(sub_column_info)});
1119
3
            VLOG_DEBUG << "append typed column " << path;
1120
3
        } else {
1121
1
            return Status::InternalError("Failed to generate sub column info for path {}", path);
1122
1
        }
1123
4
    }
1124
8
    return Status::OK();
1125
9
}
1126
1127
Status VariantCompactionUtil::get_compaction_nested_columns(
1128
        const std::unordered_set<PathInData, PathInData::Hash>& nested_paths,
1129
        const PathToDataTypes& path_to_data_types, const TabletColumnPtr parent_column,
1130
8
        TabletSchemaSPtr& output_schema, TabletSchema::PathsSetInfo& paths_set_info) {
1131
8
    const auto& parent_indexes = output_schema->inverted_indexs(parent_column->unique_id());
1132
8
    for (const auto& path : nested_paths) {
1133
3
        const auto& find_data_types = path_to_data_types.find(path);
1134
3
        if (find_data_types == path_to_data_types.end() || find_data_types->second.empty()) {
1135
1
            return Status::InternalError("Nested path {} has no data type", path.get_path());
1136
1
        }
1137
2
        DataTypePtr data_type;
1138
2
        get_least_supertype_jsonb(find_data_types->second, &data_type);
1139
1140
2
        const std::string& column_name = parent_column->name_lower_case() + "." + path.get_path();
1141
2
        PathInDataBuilder full_path_builder;
1142
2
        auto full_path = full_path_builder.append(parent_column->name_lower_case(), false)
1143
2
                                 .append(path.get_parts(), false)
1144
2
                                 .build();
1145
2
        TabletColumn nested_column =
1146
2
                get_column_by_type(data_type, column_name,
1147
2
                                   ExtraInfo {.unique_id = -1,
1148
2
                                              .parent_unique_id = parent_column->unique_id(),
1149
2
                                              .path_info = full_path});
1150
2
        inherit_column_attributes(*parent_column, nested_column);
1151
2
        TabletIndexes sub_column_indexes;
1152
2
        inherit_index(parent_indexes, sub_column_indexes, nested_column);
1153
2
        paths_set_info.subcolumn_indexes.emplace(path.get_path(), std::move(sub_column_indexes));
1154
2
        output_schema->append_column(nested_column);
1155
2
        VLOG_DEBUG << "append nested column " << path.get_path();
1156
2
    }
1157
7
    return Status::OK();
1158
8
}
1159
1160
void VariantCompactionUtil::get_compaction_subcolumns_from_subpaths(
1161
        TabletSchema::PathsSetInfo& paths_set_info, const TabletColumnPtr parent_column,
1162
        const TabletSchemaSPtr& target, const PathToDataTypes& path_to_data_types,
1163
13
        const std::unordered_set<std::string>& sparse_paths, TabletSchemaSPtr& output_schema) {
1164
13
    auto& path_set = paths_set_info.sub_path_set;
1165
13
    std::vector<StringRef> sorted_subpaths(path_set.begin(), path_set.end());
1166
13
    std::sort(sorted_subpaths.begin(), sorted_subpaths.end());
1167
13
    const auto& parent_indexes = target->inverted_indexs(parent_column->unique_id());
1168
    // append subcolumns
1169
38
    for (const auto& subpath : sorted_subpaths) {
1170
38
        auto column_name = parent_column->name_lower_case() + "." + subpath.to_string();
1171
38
        auto column_path = PathInData(column_name);
1172
1173
38
        const auto& find_data_types = path_to_data_types.find(PathInData(subpath));
1174
1175
        // some cases: the subcolumn type is variant
1176
        // 1. this path has no data type in segments
1177
        // 2. this path is in sparse paths
1178
        // 3. the sparse paths are too much
1179
38
        TabletSchema::SubColumnInfo sub_column_info;
1180
38
        if (parent_column->variant_enable_typed_paths_to_sparse() &&
1181
38
            generate_sub_column_info(*target, parent_column->unique_id(), std::string(subpath),
1182
16
                                     &sub_column_info)) {
1183
8
            inherit_column_attributes(*parent_column, sub_column_info.column);
1184
8
            output_schema->append_column(sub_column_info.column);
1185
8
            paths_set_info.subcolumn_indexes.emplace(subpath, std::move(sub_column_info.indexes));
1186
8
            VLOG_DEBUG << "append typed column " << subpath;
1187
30
        } else if (find_data_types == path_to_data_types.end() || find_data_types->second.empty() ||
1188
30
                   sparse_paths.find(std::string(subpath)) != sparse_paths.end() ||
1189
30
                   sparse_paths.size() >=
1190
22
                           parent_column->variant_max_sparse_column_statistics_size()) {
1191
12
            TabletColumn subcolumn;
1192
12
            subcolumn.set_name(column_name);
1193
12
            subcolumn.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
1194
12
            subcolumn.set_parent_unique_id(parent_column->unique_id());
1195
12
            subcolumn.set_path_info(column_path);
1196
12
            subcolumn.set_aggregation_method(parent_column->aggregation());
1197
12
            subcolumn.set_variant_max_subcolumns_count(
1198
12
                    parent_column->variant_max_subcolumns_count());
1199
12
            subcolumn.set_variant_enable_doc_mode(parent_column->variant_enable_doc_mode());
1200
12
            subcolumn.set_is_nullable(true);
1201
12
            output_schema->append_column(subcolumn);
1202
12
            VLOG_DEBUG << "append sub column " << subpath << " data type "
1203
0
                       << "VARIANT";
1204
12
        }
1205
        // normal case: the subcolumn type can be calculated from the data types in segments
1206
18
        else {
1207
18
            DataTypePtr data_type;
1208
18
            get_least_supertype_jsonb(find_data_types->second, &data_type);
1209
18
            TabletColumn sub_column =
1210
18
                    get_column_by_type(data_type, column_name,
1211
18
                                       ExtraInfo {.unique_id = -1,
1212
18
                                                  .parent_unique_id = parent_column->unique_id(),
1213
18
                                                  .path_info = column_path});
1214
18
            inherit_column_attributes(*parent_column, sub_column);
1215
18
            TabletIndexes sub_column_indexes;
1216
18
            inherit_index(parent_indexes, sub_column_indexes, sub_column);
1217
18
            paths_set_info.subcolumn_indexes.emplace(subpath, std::move(sub_column_indexes));
1218
18
            output_schema->append_column(sub_column);
1219
18
            VLOG_DEBUG << "append sub column " << subpath << " data type " << data_type->get_name();
1220
18
        }
1221
38
    }
1222
13
}
1223
1224
void VariantCompactionUtil::get_compaction_subcolumns_from_data_types(
1225
        TabletSchema::PathsSetInfo& paths_set_info, const TabletColumnPtr parent_column,
1226
        const TabletSchemaSPtr& target, const PathToDataTypes& path_to_data_types,
1227
3
        TabletSchemaSPtr& output_schema) {
1228
3
    const auto& parent_indexes = target->inverted_indexs(parent_column->unique_id());
1229
4
    for (const auto& [path, data_types] : path_to_data_types) {
1230
        // Typed paths are materialized by get_compaction_typed_columns(); this helper only
1231
        // materializes regular subcolumns inferred from rowset data types.
1232
4
        if (data_types.empty() || path.empty() || path.get_is_typed() || path.has_nested_part()) {
1233
1
            continue;
1234
1
        }
1235
3
        DataTypePtr data_type;
1236
3
        get_least_supertype_jsonb(data_types, &data_type);
1237
3
        auto column_name = parent_column->name_lower_case() + "." + path.get_path();
1238
3
        auto column_path = PathInData(column_name);
1239
3
        TabletColumn sub_column =
1240
3
                get_column_by_type(data_type, column_name,
1241
3
                                   ExtraInfo {.unique_id = -1,
1242
3
                                              .parent_unique_id = parent_column->unique_id(),
1243
3
                                              .path_info = column_path});
1244
3
        inherit_column_attributes(*parent_column, sub_column);
1245
3
        TabletIndexes sub_column_indexes;
1246
3
        inherit_index(parent_indexes, sub_column_indexes, sub_column);
1247
3
        paths_set_info.sub_path_set.emplace(path.get_path());
1248
3
        paths_set_info.subcolumn_indexes.emplace(path.get_path(), std::move(sub_column_indexes));
1249
3
        output_schema->append_column(sub_column);
1250
3
        VLOG_DEBUG << "append sub column " << path.get_path() << " data type "
1251
0
                   << data_type->get_name();
1252
3
    }
1253
3
}
1254
1255
// Build the temporary schema for compaction.
1256
// NestedGroup roots are special: the root VARIANT column owns the NG tree and the streaming NG
1257
// writer handles NG children, while regular non-NG paths beside the arrays are materialized as
1258
// ordinary extracted subcolumns. NG typed paths still use get_compaction_typed_columns(), keeping
1259
// typed-column rules out of the NG-specific regular-path filtering.
1260
Status VariantCompactionUtil::get_extended_compaction_schema(
1261
41
        const std::vector<RowsetSharedPtr>& rowsets, TabletSchemaSPtr& target) {
1262
41
    std::unordered_map<int32_t, VariantExtendedInfo> uid_to_variant_extended_info;
1263
41
    const bool needs_variant_extended_info =
1264
821
            std::ranges::any_of(target->columns(), [](const TabletColumnPtr& column) {
1265
821
                return column->is_variant_type() && (should_check_variant_path_stats(*column) ||
1266
5
                                                     column->variant_enable_nested_group());
1267
821
            });
1268
41
    if (needs_variant_extended_info) {
1269
        // collect path stats from all rowsets and segments
1270
10
        for (const auto& rs : rowsets) {
1271
10
            RETURN_IF_ERROR(aggregate_variant_extended_info(rs, &uid_to_variant_extended_info));
1272
10
        }
1273
5
    }
1274
1275
    // build the output schema
1276
41
    TabletSchemaSPtr output_schema = std::make_shared<TabletSchema>();
1277
41
    output_schema->shawdow_copy_without_columns(*target);
1278
41
    std::unordered_map<int32_t, TabletSchema::PathsSetInfo> uid_to_paths_set_info;
1279
41
    const auto ng_root_uids =
1280
41
            collect_nested_group_compaction_root_uids(target, uid_to_variant_extended_info);
1281
828
    for (const TabletColumnPtr& column : target->columns()) {
1282
828
        if (!column->is_extracted_column()) {
1283
827
            output_schema->append_column(*column);
1284
827
        }
1285
828
        if (!column->is_variant_type()) {
1286
821
            continue;
1287
821
        }
1288
7
        VLOG_DEBUG << "column " << column->name() << " unique id " << column->unique_id();
1289
1290
7
        const auto info_it = uid_to_variant_extended_info.find(column->unique_id());
1291
7
        const VariantExtendedInfo empty_extended_info;
1292
7
        const VariantExtendedInfo& extended_info = info_it == uid_to_variant_extended_info.end()
1293
7
                                                           ? empty_extended_info
1294
7
                                                           : info_it->second;
1295
7
        auto& paths_set_info = uid_to_paths_set_info[column->unique_id()];
1296
7
        const bool use_nested_group_compaction_schema = ng_root_uids.contains(column->unique_id());
1297
1298
7
        if (use_nested_group_compaction_schema) {
1299
            // 1. append typed columns. Keep this shared with the non-NG typed helper; only the
1300
            // regular-path selection below is NG-specific.
1301
1
            RETURN_IF_ERROR(get_compaction_typed_columns(target, extended_info.typed_paths, column,
1302
1
                                                         output_schema, paths_set_info));
1303
1304
            // NG roots do not record path-count stats for ordinary Variant paths, so their regular
1305
            // non-NG subcolumns use the same data-types materialization helper as the
1306
            // all-materialized non-NG branch below.
1307
1
            auto regular_path_to_data_types =
1308
1
                    collect_regular_types_outside_nested_group(extended_info);
1309
1
            get_compaction_subcolumns_from_data_types(paths_set_info, column, target,
1310
1
                                                      regular_path_to_data_types, output_schema);
1311
1
            LOG(INFO) << "Variant column uid=" << column->unique_id()
1312
1
                      << " keeps nested-group root and materializes regular non-NG subcolumns in "
1313
1
                         "compaction schema";
1314
1
            continue;
1315
1
        }
1316
1317
6
        if (column->variant_enable_doc_mode()) {
1318
0
            const int bucket_num = std::max(1, column->variant_doc_hash_shard_count());
1319
0
            for (int b = 0; b < bucket_num; ++b) {
1320
0
                TabletColumn doc_value_bucket_column = create_doc_value_column(*column, b);
1321
0
                doc_value_bucket_column.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
1322
0
                doc_value_bucket_column.set_is_nullable(false);
1323
0
                doc_value_bucket_column.set_variant_enable_doc_mode(true);
1324
0
                output_schema->append_column(doc_value_bucket_column);
1325
0
            }
1326
0
            continue;
1327
0
        }
1328
1329
        // 1. append typed columns
1330
6
        RETURN_IF_ERROR(get_compaction_typed_columns(target, extended_info.typed_paths, column,
1331
6
                                                     output_schema, paths_set_info));
1332
1333
        // 2. append nested columns
1334
6
        RETURN_IF_ERROR(get_compaction_nested_columns(extended_info.nested_paths,
1335
6
                                                      extended_info.path_to_data_types, column,
1336
6
                                                      output_schema, paths_set_info));
1337
1338
        // 3. get the subpaths
1339
6
        get_subpaths(column->variant_max_subcolumns_count(), extended_info.path_to_none_null_values,
1340
6
                     paths_set_info);
1341
1342
        // 4. append subcolumns
1343
6
        if (column->variant_max_subcolumns_count() > 0 || !column->get_sub_columns().empty()) {
1344
5
            get_compaction_subcolumns_from_subpaths(paths_set_info, column, target,
1345
5
                                                    extended_info.path_to_data_types,
1346
5
                                                    extended_info.sparse_paths, output_schema);
1347
5
        }
1348
        // variant_max_subcolumns_count == 0 and no typed paths materialized
1349
        // it means that all subcolumns are materialized, may be from old data
1350
1
        else {
1351
1
            get_compaction_subcolumns_from_data_types(paths_set_info, column, target,
1352
1
                                                      extended_info.path_to_data_types,
1353
1
                                                      output_schema);
1354
1
        }
1355
1356
        // append sparse column(s)
1357
        // If variant uses bucketized sparse columns, append one sparse bucket column per bucket.
1358
        // Otherwise, append the single sparse column.
1359
6
        int bucket_num = std::max(1, column->variant_sparse_hash_shard_count());
1360
6
        if (bucket_num > 1) {
1361
0
            for (int b = 0; b < bucket_num; ++b) {
1362
0
                TabletColumn sparse_bucket_column = create_sparse_shard_column(*column, b);
1363
0
                output_schema->append_column(sparse_bucket_column);
1364
0
            }
1365
6
        } else {
1366
6
            TabletColumn sparse_column = create_sparse_column(*column);
1367
6
            output_schema->append_column(sparse_column);
1368
6
        }
1369
6
    }
1370
1371
41
    target = output_schema;
1372
    // used to merge & filter path to sparse column during reading in compaction
1373
41
    target->set_path_set_info(std::move(uid_to_paths_set_info));
1374
41
    VLOG_DEBUG << "dump schema " << target->dump_full_schema();
1375
41
    return Status::OK();
1376
41
}
1377
1378
// Calculate statistics about variant data paths from the encoded sparse column
1379
void VariantCompactionUtil::calculate_variant_stats(const IColumn& encoded_sparse_column,
1380
                                                    segment_v2::VariantStatisticsPB* stats,
1381
                                                    size_t max_sparse_column_statistics_size,
1382
3
                                                    size_t row_pos, size_t num_rows) {
1383
    // Cast input column to ColumnMap type since sparse column is stored as a map
1384
3
    const auto& map_column = assert_cast<const ColumnMap&>(encoded_sparse_column);
1385
1386
    // Get the keys column which contains the paths as strings
1387
3
    const auto& sparse_data_paths =
1388
3
            assert_cast<const ColumnString*>(map_column.get_keys_ptr().get());
1389
3
    const auto& serialized_sparse_column_offsets = map_column.get_offsets();
1390
3
    auto& count_map = *stats->mutable_sparse_column_non_null_size();
1391
    // Iterate through all paths in the sparse column
1392
11
    for (size_t i = row_pos; i != row_pos + num_rows; ++i) {
1393
8
        size_t offset = serialized_sparse_column_offsets[i - 1];
1394
8
        size_t end = serialized_sparse_column_offsets[i];
1395
10
        for (size_t j = offset; j != end; ++j) {
1396
2
            auto path = sparse_data_paths->get_data_at(j);
1397
1398
2
            const auto& sparse_path = path.to_string();
1399
            // If path already exists in statistics, increment its count
1400
2
            if (auto it = count_map.find(sparse_path); it != count_map.end()) {
1401
0
                ++it->second;
1402
0
            }
1403
            // If path doesn't exist and we haven't hit the max statistics size limit,
1404
            // add it with count 1
1405
2
            else if (count_map.size() < max_sparse_column_statistics_size) {
1406
2
                count_map.emplace(sparse_path, 1);
1407
2
            }
1408
2
        }
1409
8
    }
1410
1411
3
    if (stats->sparse_column_non_null_size().size() > max_sparse_column_statistics_size) {
1412
0
        throw doris::Exception(
1413
0
                ErrorCode::INTERNAL_ERROR,
1414
0
                "Sparse column non null size: {} is greater than max statistics size: {}",
1415
0
                stats->sparse_column_non_null_size().size(), max_sparse_column_statistics_size);
1416
0
    }
1417
3
}
1418
1419
/// Calculates number of dimensions in array field.
1420
/// Returns 0 for scalar fields.
1421
class FieldVisitorToNumberOfDimensions : public StaticVisitor<size_t> {
1422
public:
1423
    FieldVisitorToNumberOfDimensions() = default;
1424
    template <PrimitiveType T>
1425
2.24M
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1426
2.24M
        if constexpr (T == TYPE_ARRAY) {
1427
127k
            const size_t size = x.size();
1428
127k
            size_t dimensions = 0;
1429
873k
            for (size_t i = 0; i < size; ++i) {
1430
745k
                size_t element_dimensions = apply_visitor(*this, x[i]);
1431
745k
                dimensions = std::max(dimensions, element_dimensions);
1432
745k
            }
1433
127k
            return 1 + dimensions;
1434
2.12M
        } else {
1435
2.12M
            return 0;
1436
2.12M
        }
1437
2.24M
    }
_ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE1EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1425
24.0k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1426
        if constexpr (T == TYPE_ARRAY) {
1427
            const size_t size = x.size();
1428
            size_t dimensions = 0;
1429
            for (size_t i = 0; i < size; ++i) {
1430
                size_t element_dimensions = apply_visitor(*this, x[i]);
1431
                dimensions = std::max(dimensions, element_dimensions);
1432
            }
1433
            return 1 + dimensions;
1434
24.0k
        } else {
1435
24.0k
            return 0;
1436
24.0k
        }
1437
24.0k
    }
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE26EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE42EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE7EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1425
40.9k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1426
        if constexpr (T == TYPE_ARRAY) {
1427
            const size_t size = x.size();
1428
            size_t dimensions = 0;
1429
            for (size_t i = 0; i < size; ++i) {
1430
                size_t element_dimensions = apply_visitor(*this, x[i]);
1431
                dimensions = std::max(dimensions, element_dimensions);
1432
            }
1433
            return 1 + dimensions;
1434
40.9k
        } else {
1435
40.9k
            return 0;
1436
40.9k
        }
1437
40.9k
    }
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE12EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE11EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE25EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE2EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1425
69.6k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1426
        if constexpr (T == TYPE_ARRAY) {
1427
            const size_t size = x.size();
1428
            size_t dimensions = 0;
1429
            for (size_t i = 0; i < size; ++i) {
1430
                size_t element_dimensions = apply_visitor(*this, x[i]);
1431
                dimensions = std::max(dimensions, element_dimensions);
1432
            }
1433
            return 1 + dimensions;
1434
69.6k
        } else {
1435
69.6k
            return 0;
1436
69.6k
        }
1437
69.6k
    }
_ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE3EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1425
6
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1426
        if constexpr (T == TYPE_ARRAY) {
1427
            const size_t size = x.size();
1428
            size_t dimensions = 0;
1429
            for (size_t i = 0; i < size; ++i) {
1430
                size_t element_dimensions = apply_visitor(*this, x[i]);
1431
                dimensions = std::max(dimensions, element_dimensions);
1432
            }
1433
            return 1 + dimensions;
1434
6
        } else {
1435
6
            return 0;
1436
6
        }
1437
6
    }
_ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE4EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1425
7
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1426
        if constexpr (T == TYPE_ARRAY) {
1427
            const size_t size = x.size();
1428
            size_t dimensions = 0;
1429
            for (size_t i = 0; i < size; ++i) {
1430
                size_t element_dimensions = apply_visitor(*this, x[i]);
1431
                dimensions = std::max(dimensions, element_dimensions);
1432
            }
1433
            return 1 + dimensions;
1434
7
        } else {
1435
7
            return 0;
1436
7
        }
1437
7
    }
_ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE5EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1425
958
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1426
        if constexpr (T == TYPE_ARRAY) {
1427
            const size_t size = x.size();
1428
            size_t dimensions = 0;
1429
            for (size_t i = 0; i < size; ++i) {
1430
                size_t element_dimensions = apply_visitor(*this, x[i]);
1431
                dimensions = std::max(dimensions, element_dimensions);
1432
            }
1433
            return 1 + dimensions;
1434
958
        } else {
1435
958
            return 0;
1436
958
        }
1437
958
    }
_ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE6EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1425
852k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1426
        if constexpr (T == TYPE_ARRAY) {
1427
            const size_t size = x.size();
1428
            size_t dimensions = 0;
1429
            for (size_t i = 0; i < size; ++i) {
1430
                size_t element_dimensions = apply_visitor(*this, x[i]);
1431
                dimensions = std::max(dimensions, element_dimensions);
1432
            }
1433
            return 1 + dimensions;
1434
852k
        } else {
1435
852k
            return 0;
1436
852k
        }
1437
852k
    }
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE38EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE39EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE8EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1425
1
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1426
        if constexpr (T == TYPE_ARRAY) {
1427
            const size_t size = x.size();
1428
            size_t dimensions = 0;
1429
            for (size_t i = 0; i < size; ++i) {
1430
                size_t element_dimensions = apply_visitor(*this, x[i]);
1431
                dimensions = std::max(dimensions, element_dimensions);
1432
            }
1433
            return 1 + dimensions;
1434
1
        } else {
1435
1
            return 0;
1436
1
        }
1437
1
    }
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE27EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE9EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1425
164k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1426
        if constexpr (T == TYPE_ARRAY) {
1427
            const size_t size = x.size();
1428
            size_t dimensions = 0;
1429
            for (size_t i = 0; i < size; ++i) {
1430
                size_t element_dimensions = apply_visitor(*this, x[i]);
1431
                dimensions = std::max(dimensions, element_dimensions);
1432
            }
1433
            return 1 + dimensions;
1434
164k
        } else {
1435
164k
            return 0;
1436
164k
        }
1437
164k
    }
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE36EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE37EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE23EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1425
967k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1426
        if constexpr (T == TYPE_ARRAY) {
1427
            const size_t size = x.size();
1428
            size_t dimensions = 0;
1429
            for (size_t i = 0; i < size; ++i) {
1430
                size_t element_dimensions = apply_visitor(*this, x[i]);
1431
                dimensions = std::max(dimensions, element_dimensions);
1432
            }
1433
            return 1 + dimensions;
1434
967k
        } else {
1435
967k
            return 0;
1436
967k
        }
1437
967k
    }
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE15EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE10EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE41EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE17EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1425
127k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1426
127k
        if constexpr (T == TYPE_ARRAY) {
1427
127k
            const size_t size = x.size();
1428
127k
            size_t dimensions = 0;
1429
873k
            for (size_t i = 0; i < size; ++i) {
1430
745k
                size_t element_dimensions = apply_visitor(*this, x[i]);
1431
745k
                dimensions = std::max(dimensions, element_dimensions);
1432
745k
            }
1433
127k
            return 1 + dimensions;
1434
        } else {
1435
            return 0;
1436
        }
1437
127k
    }
_ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE16EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1425
1
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1426
        if constexpr (T == TYPE_ARRAY) {
1427
            const size_t size = x.size();
1428
            size_t dimensions = 0;
1429
            for (size_t i = 0; i < size; ++i) {
1430
                size_t element_dimensions = apply_visitor(*this, x[i]);
1431
                dimensions = std::max(dimensions, element_dimensions);
1432
            }
1433
            return 1 + dimensions;
1434
1
        } else {
1435
1
            return 0;
1436
1
        }
1437
1
    }
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE18EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE32EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1425
1
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1426
        if constexpr (T == TYPE_ARRAY) {
1427
            const size_t size = x.size();
1428
            size_t dimensions = 0;
1429
            for (size_t i = 0; i < size; ++i) {
1430
                size_t element_dimensions = apply_visitor(*this, x[i]);
1431
                dimensions = std::max(dimensions, element_dimensions);
1432
            }
1433
            return 1 + dimensions;
1434
1
        } else {
1435
1
            return 0;
1436
1
        }
1437
1
    }
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE28EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE29EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE20EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE30EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE35EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE22EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE19EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE24EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE31EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1425
40
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1426
        if constexpr (T == TYPE_ARRAY) {
1427
            const size_t size = x.size();
1428
            size_t dimensions = 0;
1429
            for (size_t i = 0; i < size; ++i) {
1430
                size_t element_dimensions = apply_visitor(*this, x[i]);
1431
                dimensions = std::max(dimensions, element_dimensions);
1432
            }
1433
            return 1 + dimensions;
1434
40
        } else {
1435
40
            return 0;
1436
40
        }
1437
40
    }
1438
};
1439
1440
// Visitor that allows to get type of scalar field
1441
// but exclude fields contain complex field.This is a faster version
1442
// for FieldVisitorToScalarType which does not support complex field.
1443
class SimpleFieldVisitorToScalarType : public StaticVisitor<size_t> {
1444
public:
1445
    template <PrimitiveType T>
1446
1.42M
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1447
1.42M
        if constexpr (T == TYPE_ARRAY) {
1448
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Array type is not supported");
1449
12.3k
        } else if constexpr (T == TYPE_NULL) {
1450
12.3k
            have_nulls = true;
1451
12.3k
            return 1;
1452
1.41M
        } else {
1453
1.41M
            type = T;
1454
1.41M
            return 1;
1455
1.41M
        }
1456
1.42M
    }
_ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE1EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1446
12.3k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1447
        if constexpr (T == TYPE_ARRAY) {
1448
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Array type is not supported");
1449
12.3k
        } else if constexpr (T == TYPE_NULL) {
1450
12.3k
            have_nulls = true;
1451
12.3k
            return 1;
1452
        } else {
1453
            type = T;
1454
            return 1;
1455
        }
1456
12.3k
    }
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE26EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE42EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE7EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1446
12.3k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1447
        if constexpr (T == TYPE_ARRAY) {
1448
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Array type is not supported");
1449
        } else if constexpr (T == TYPE_NULL) {
1450
            have_nulls = true;
1451
            return 1;
1452
12.3k
        } else {
1453
12.3k
            type = T;
1454
12.3k
            return 1;
1455
12.3k
        }
1456
12.3k
    }
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE12EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE11EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE25EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE2EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1446
12.4k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1447
        if constexpr (T == TYPE_ARRAY) {
1448
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Array type is not supported");
1449
        } else if constexpr (T == TYPE_NULL) {
1450
            have_nulls = true;
1451
            return 1;
1452
12.4k
        } else {
1453
12.4k
            type = T;
1454
12.4k
            return 1;
1455
12.4k
        }
1456
12.4k
    }
_ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE3EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1446
2
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1447
        if constexpr (T == TYPE_ARRAY) {
1448
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Array type is not supported");
1449
        } else if constexpr (T == TYPE_NULL) {
1450
            have_nulls = true;
1451
            return 1;
1452
2
        } else {
1453
2
            type = T;
1454
2
            return 1;
1455
2
        }
1456
2
    }
_ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE4EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1446
7
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1447
        if constexpr (T == TYPE_ARRAY) {
1448
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Array type is not supported");
1449
        } else if constexpr (T == TYPE_NULL) {
1450
            have_nulls = true;
1451
            return 1;
1452
7
        } else {
1453
7
            type = T;
1454
7
            return 1;
1455
7
        }
1456
7
    }
_ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE5EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1446
571
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1447
        if constexpr (T == TYPE_ARRAY) {
1448
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Array type is not supported");
1449
        } else if constexpr (T == TYPE_NULL) {
1450
            have_nulls = true;
1451
            return 1;
1452
571
        } else {
1453
571
            type = T;
1454
571
            return 1;
1455
571
        }
1456
571
    }
_ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE6EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1446
704k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1447
        if constexpr (T == TYPE_ARRAY) {
1448
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Array type is not supported");
1449
        } else if constexpr (T == TYPE_NULL) {
1450
            have_nulls = true;
1451
            return 1;
1452
704k
        } else {
1453
704k
            type = T;
1454
704k
            return 1;
1455
704k
        }
1456
704k
    }
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE38EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE39EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE8EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1446
1
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1447
        if constexpr (T == TYPE_ARRAY) {
1448
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Array type is not supported");
1449
        } else if constexpr (T == TYPE_NULL) {
1450
            have_nulls = true;
1451
            return 1;
1452
1
        } else {
1453
1
            type = T;
1454
1
            return 1;
1455
1
        }
1456
1
    }
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE27EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE9EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1446
12.6k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1447
        if constexpr (T == TYPE_ARRAY) {
1448
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Array type is not supported");
1449
        } else if constexpr (T == TYPE_NULL) {
1450
            have_nulls = true;
1451
            return 1;
1452
12.6k
        } else {
1453
12.6k
            type = T;
1454
12.6k
            return 1;
1455
12.6k
        }
1456
12.6k
    }
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE36EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE37EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE23EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1446
670k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1447
        if constexpr (T == TYPE_ARRAY) {
1448
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Array type is not supported");
1449
        } else if constexpr (T == TYPE_NULL) {
1450
            have_nulls = true;
1451
            return 1;
1452
670k
        } else {
1453
670k
            type = T;
1454
670k
            return 1;
1455
670k
        }
1456
670k
    }
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE15EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE10EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE41EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE17EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE16EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE18EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE32EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE28EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE29EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE20EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE30EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE35EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE22EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE19EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE24EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE31EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1446
14
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1447
        if constexpr (T == TYPE_ARRAY) {
1448
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Array type is not supported");
1449
        } else if constexpr (T == TYPE_NULL) {
1450
            have_nulls = true;
1451
            return 1;
1452
14
        } else {
1453
14
            type = T;
1454
14
            return 1;
1455
14
        }
1456
14
    }
1457
1.42M
    void get_scalar_type(PrimitiveType* data_type) const { *data_type = type; }
1458
1.42M
    bool contain_nulls() const { return have_nulls; }
1459
1460
1.42M
    bool need_convert_field() const { return false; }
1461
1462
private:
1463
    PrimitiveType type = PrimitiveType::INVALID_TYPE;
1464
    bool have_nulls = false;
1465
};
1466
1467
/// Visitor that allows to get type of scalar field
1468
/// or least common type of scalars in array.
1469
/// More optimized version of FieldToDataType.
1470
class FieldVisitorToScalarType : public StaticVisitor<size_t> {
1471
public:
1472
    template <PrimitiveType T>
1473
823k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1474
823k
        if constexpr (T == TYPE_ARRAY) {
1475
127k
            size_t size = x.size();
1476
873k
            for (size_t i = 0; i < size; ++i) {
1477
745k
                apply_visitor(*this, x[i]);
1478
745k
            }
1479
127k
            return 0;
1480
127k
        } else if constexpr (T == TYPE_NULL) {
1481
11.6k
            have_nulls = true;
1482
11.6k
            return 0;
1483
683k
        } else {
1484
683k
            field_types.insert(T);
1485
683k
            type_indexes.insert(T);
1486
683k
            return 0;
1487
683k
        }
1488
823k
    }
_ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE1EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1473
11.6k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1474
        if constexpr (T == TYPE_ARRAY) {
1475
            size_t size = x.size();
1476
            for (size_t i = 0; i < size; ++i) {
1477
                apply_visitor(*this, x[i]);
1478
            }
1479
            return 0;
1480
11.6k
        } else if constexpr (T == TYPE_NULL) {
1481
11.6k
            have_nulls = true;
1482
11.6k
            return 0;
1483
        } else {
1484
            field_types.insert(T);
1485
            type_indexes.insert(T);
1486
            return 0;
1487
        }
1488
11.6k
    }
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE26EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE42EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE7EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1473
28.6k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1474
        if constexpr (T == TYPE_ARRAY) {
1475
            size_t size = x.size();
1476
            for (size_t i = 0; i < size; ++i) {
1477
                apply_visitor(*this, x[i]);
1478
            }
1479
            return 0;
1480
        } else if constexpr (T == TYPE_NULL) {
1481
            have_nulls = true;
1482
            return 0;
1483
28.6k
        } else {
1484
28.6k
            field_types.insert(T);
1485
28.6k
            type_indexes.insert(T);
1486
28.6k
            return 0;
1487
28.6k
        }
1488
28.6k
    }
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE12EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE11EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE25EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE2EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1473
57.2k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1474
        if constexpr (T == TYPE_ARRAY) {
1475
            size_t size = x.size();
1476
            for (size_t i = 0; i < size; ++i) {
1477
                apply_visitor(*this, x[i]);
1478
            }
1479
            return 0;
1480
        } else if constexpr (T == TYPE_NULL) {
1481
            have_nulls = true;
1482
            return 0;
1483
57.2k
        } else {
1484
57.2k
            field_types.insert(T);
1485
57.2k
            type_indexes.insert(T);
1486
57.2k
            return 0;
1487
57.2k
        }
1488
57.2k
    }
_ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE3EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1473
4
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1474
        if constexpr (T == TYPE_ARRAY) {
1475
            size_t size = x.size();
1476
            for (size_t i = 0; i < size; ++i) {
1477
                apply_visitor(*this, x[i]);
1478
            }
1479
            return 0;
1480
        } else if constexpr (T == TYPE_NULL) {
1481
            have_nulls = true;
1482
            return 0;
1483
4
        } else {
1484
4
            field_types.insert(T);
1485
4
            type_indexes.insert(T);
1486
4
            return 0;
1487
4
        }
1488
4
    }
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE4EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE5EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1473
387
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1474
        if constexpr (T == TYPE_ARRAY) {
1475
            size_t size = x.size();
1476
            for (size_t i = 0; i < size; ++i) {
1477
                apply_visitor(*this, x[i]);
1478
            }
1479
            return 0;
1480
        } else if constexpr (T == TYPE_NULL) {
1481
            have_nulls = true;
1482
            return 0;
1483
387
        } else {
1484
387
            field_types.insert(T);
1485
387
            type_indexes.insert(T);
1486
387
            return 0;
1487
387
        }
1488
387
    }
_ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE6EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1473
147k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1474
        if constexpr (T == TYPE_ARRAY) {
1475
            size_t size = x.size();
1476
            for (size_t i = 0; i < size; ++i) {
1477
                apply_visitor(*this, x[i]);
1478
            }
1479
            return 0;
1480
        } else if constexpr (T == TYPE_NULL) {
1481
            have_nulls = true;
1482
            return 0;
1483
147k
        } else {
1484
147k
            field_types.insert(T);
1485
147k
            type_indexes.insert(T);
1486
147k
            return 0;
1487
147k
        }
1488
147k
    }
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE38EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE39EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE8EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE27EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE9EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1473
151k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1474
        if constexpr (T == TYPE_ARRAY) {
1475
            size_t size = x.size();
1476
            for (size_t i = 0; i < size; ++i) {
1477
                apply_visitor(*this, x[i]);
1478
            }
1479
            return 0;
1480
        } else if constexpr (T == TYPE_NULL) {
1481
            have_nulls = true;
1482
            return 0;
1483
151k
        } else {
1484
151k
            field_types.insert(T);
1485
151k
            type_indexes.insert(T);
1486
151k
            return 0;
1487
151k
        }
1488
151k
    }
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE36EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE37EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE23EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1473
297k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1474
        if constexpr (T == TYPE_ARRAY) {
1475
            size_t size = x.size();
1476
            for (size_t i = 0; i < size; ++i) {
1477
                apply_visitor(*this, x[i]);
1478
            }
1479
            return 0;
1480
        } else if constexpr (T == TYPE_NULL) {
1481
            have_nulls = true;
1482
            return 0;
1483
297k
        } else {
1484
297k
            field_types.insert(T);
1485
297k
            type_indexes.insert(T);
1486
297k
            return 0;
1487
297k
        }
1488
297k
    }
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE15EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE10EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE41EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE17EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1473
127k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1474
127k
        if constexpr (T == TYPE_ARRAY) {
1475
127k
            size_t size = x.size();
1476
873k
            for (size_t i = 0; i < size; ++i) {
1477
745k
                apply_visitor(*this, x[i]);
1478
745k
            }
1479
127k
            return 0;
1480
        } else if constexpr (T == TYPE_NULL) {
1481
            have_nulls = true;
1482
            return 0;
1483
        } else {
1484
            field_types.insert(T);
1485
            type_indexes.insert(T);
1486
            return 0;
1487
        }
1488
127k
    }
_ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE16EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1473
1
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1474
        if constexpr (T == TYPE_ARRAY) {
1475
            size_t size = x.size();
1476
            for (size_t i = 0; i < size; ++i) {
1477
                apply_visitor(*this, x[i]);
1478
            }
1479
            return 0;
1480
        } else if constexpr (T == TYPE_NULL) {
1481
            have_nulls = true;
1482
            return 0;
1483
1
        } else {
1484
1
            field_types.insert(T);
1485
1
            type_indexes.insert(T);
1486
1
            return 0;
1487
1
        }
1488
1
    }
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE18EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE32EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1473
1
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1474
        if constexpr (T == TYPE_ARRAY) {
1475
            size_t size = x.size();
1476
            for (size_t i = 0; i < size; ++i) {
1477
                apply_visitor(*this, x[i]);
1478
            }
1479
            return 0;
1480
        } else if constexpr (T == TYPE_NULL) {
1481
            have_nulls = true;
1482
            return 0;
1483
1
        } else {
1484
1
            field_types.insert(T);
1485
1
            type_indexes.insert(T);
1486
1
            return 0;
1487
1
        }
1488
1
    }
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE28EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE29EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE20EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE30EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE35EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE22EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE19EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE24EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE31EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1473
26
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1474
        if constexpr (T == TYPE_ARRAY) {
1475
            size_t size = x.size();
1476
            for (size_t i = 0; i < size; ++i) {
1477
                apply_visitor(*this, x[i]);
1478
            }
1479
            return 0;
1480
        } else if constexpr (T == TYPE_NULL) {
1481
            have_nulls = true;
1482
            return 0;
1483
26
        } else {
1484
26
            field_types.insert(T);
1485
26
            type_indexes.insert(T);
1486
26
            return 0;
1487
26
        }
1488
26
    }
1489
77.7k
    void get_scalar_type(PrimitiveType* type) const {
1490
77.7k
        if (type_indexes.size() == 1) {
1491
            // Most cases will have only one type
1492
64.5k
            *type = *type_indexes.begin();
1493
64.5k
            return;
1494
64.5k
        }
1495
13.2k
        DataTypePtr data_type;
1496
13.2k
        get_least_supertype_jsonb(type_indexes, &data_type);
1497
13.2k
        *type = data_type->get_primitive_type();
1498
13.2k
    }
1499
77.7k
    bool contain_nulls() const { return have_nulls; }
1500
77.7k
    bool need_convert_field() const { return field_types.size() > 1; }
1501
1502
private:
1503
    phmap::flat_hash_set<PrimitiveType> type_indexes;
1504
    phmap::flat_hash_set<PrimitiveType> field_types;
1505
    bool have_nulls = false;
1506
};
1507
1508
template <typename Visitor>
1509
1.50M
void get_field_info_impl(const Field& field, FieldInfo* info) {
1510
1.50M
    Visitor to_scalar_type_visitor;
1511
1.50M
    apply_visitor(to_scalar_type_visitor, field);
1512
1.50M
    PrimitiveType type_id;
1513
1.50M
    to_scalar_type_visitor.get_scalar_type(&type_id);
1514
    // array item's dimension may missmatch, eg. [1, 2, [1, 2, 3]]
1515
1.50M
    *info = {type_id, to_scalar_type_visitor.contain_nulls(),
1516
1.50M
             to_scalar_type_visitor.need_convert_field(),
1517
1.50M
             apply_visitor(FieldVisitorToNumberOfDimensions(), field)};
1518
1.50M
}
_ZN5doris12variant_util19get_field_info_implINS0_24FieldVisitorToScalarTypeEEEvRKNS_5FieldEPNS_9FieldInfoE
Line
Count
Source
1509
77.7k
void get_field_info_impl(const Field& field, FieldInfo* info) {
1510
77.7k
    Visitor to_scalar_type_visitor;
1511
77.7k
    apply_visitor(to_scalar_type_visitor, field);
1512
77.7k
    PrimitiveType type_id;
1513
77.7k
    to_scalar_type_visitor.get_scalar_type(&type_id);
1514
    // array item's dimension may missmatch, eg. [1, 2, [1, 2, 3]]
1515
77.7k
    *info = {type_id, to_scalar_type_visitor.contain_nulls(),
1516
77.7k
             to_scalar_type_visitor.need_convert_field(),
1517
77.7k
             apply_visitor(FieldVisitorToNumberOfDimensions(), field)};
1518
77.7k
}
_ZN5doris12variant_util19get_field_info_implINS0_30SimpleFieldVisitorToScalarTypeEEEvRKNS_5FieldEPNS_9FieldInfoE
Line
Count
Source
1509
1.42M
void get_field_info_impl(const Field& field, FieldInfo* info) {
1510
1.42M
    Visitor to_scalar_type_visitor;
1511
1.42M
    apply_visitor(to_scalar_type_visitor, field);
1512
1.42M
    PrimitiveType type_id;
1513
1.42M
    to_scalar_type_visitor.get_scalar_type(&type_id);
1514
    // array item's dimension may missmatch, eg. [1, 2, [1, 2, 3]]
1515
1.42M
    *info = {type_id, to_scalar_type_visitor.contain_nulls(),
1516
1.42M
             to_scalar_type_visitor.need_convert_field(),
1517
1.42M
             apply_visitor(FieldVisitorToNumberOfDimensions(), field)};
1518
1.42M
}
1519
1520
1.50M
void get_field_info(const Field& field, FieldInfo* info) {
1521
1.50M
    if (field.is_complex_field()) {
1522
77.7k
        get_field_info_impl<FieldVisitorToScalarType>(field, info);
1523
1.42M
    } else {
1524
1.42M
        get_field_info_impl<SimpleFieldVisitorToScalarType>(field, info);
1525
1.42M
    }
1526
1.50M
}
1527
1528
bool generate_sub_column_info(const TabletSchema& schema, int32_t col_unique_id,
1529
                              const std::string& path,
1530
1.97k
                              TabletSchema::SubColumnInfo* sub_column_info) {
1531
1.97k
    const auto& parent_column = schema.column_by_uid(col_unique_id);
1532
1.97k
    std::function<void(const TabletColumn&, TabletColumn*)> generate_result_column =
1533
1.97k
            [&](const TabletColumn& from_column, TabletColumn* to_column) {
1534
35
                to_column->set_name(parent_column.name_lower_case() + "." + path);
1535
35
                to_column->set_type(from_column.type());
1536
35
                to_column->set_parent_unique_id(parent_column.unique_id());
1537
35
                bool is_typed = !parent_column.variant_enable_typed_paths_to_sparse();
1538
35
                to_column->set_path_info(
1539
35
                        PathInData(parent_column.name_lower_case() + "." + path, is_typed));
1540
35
                to_column->set_aggregation_method(parent_column.aggregation());
1541
35
                to_column->set_is_nullable(true);
1542
35
                to_column->set_parent_unique_id(parent_column.unique_id());
1543
35
                if (from_column.is_decimal()) {
1544
0
                    to_column->set_precision(from_column.precision());
1545
0
                }
1546
35
                to_column->set_frac(from_column.frac());
1547
1548
35
                if (from_column.is_array_type()) {
1549
2
                    TabletColumn nested_column;
1550
2
                    generate_result_column(*from_column.get_sub_columns()[0], &nested_column);
1551
2
                    to_column->add_sub_column(nested_column);
1552
2
                }
1553
35
            };
1554
1555
1.97k
    auto generate_index = [&](const std::string& pattern) {
1556
        // 1. find subcolumn's index
1557
33
        if (const auto& indexes = schema.inverted_index_by_field_pattern(col_unique_id, pattern);
1558
33
            !indexes.empty()) {
1559
6
            for (const auto& index : indexes) {
1560
6
                auto index_ptr = std::make_shared<TabletIndex>(*index);
1561
6
                index_ptr->set_escaped_escaped_index_suffix_path(
1562
6
                        sub_column_info->column.path_info_ptr()->get_path());
1563
6
                sub_column_info->indexes.emplace_back(std::move(index_ptr));
1564
6
            }
1565
6
        }
1566
        // 2. find parent column's index
1567
27
        else if (const auto parent_index = schema.inverted_indexs(col_unique_id);
1568
27
                 !parent_index.empty()) {
1569
0
            inherit_index(parent_index, sub_column_info->indexes, sub_column_info->column);
1570
27
        } else {
1571
27
            sub_column_info->indexes.clear();
1572
27
        }
1573
33
    };
1574
1575
1.97k
    const auto& sub_columns = parent_column.get_sub_columns();
1576
1.97k
    for (const auto& sub_column : sub_columns) {
1577
119
        const char* pattern = sub_column->name().c_str();
1578
119
        switch (sub_column->pattern_type()) {
1579
1
        case PatternTypePB::MATCH_NAME: {
1580
1
            if (strcmp(pattern, path.c_str()) == 0) {
1581
1
                generate_result_column(*sub_column, &sub_column_info->column);
1582
1
                generate_index(sub_column->name());
1583
1
                return true;
1584
1
            }
1585
0
            break;
1586
1
        }
1587
118
        case PatternTypePB::MATCH_NAME_GLOB: {
1588
118
            if (glob_match_re2(pattern, path)) {
1589
32
                generate_result_column(*sub_column, &sub_column_info->column);
1590
32
                generate_index(sub_column->name());
1591
32
                return true;
1592
32
            }
1593
86
            break;
1594
118
        }
1595
86
        default:
1596
0
            break;
1597
119
        }
1598
119
    }
1599
1.94k
    return false;
1600
1.97k
}
1601
1602
TabletSchemaSPtr VariantCompactionUtil::calculate_variant_extended_schema(
1603
0
        const std::vector<RowsetSharedPtr>& rowsets, const TabletSchemaSPtr& base_schema) {
1604
0
    if (rowsets.empty()) {
1605
0
        return nullptr;
1606
0
    }
1607
1608
0
    std::vector<TabletSchemaSPtr> schemas;
1609
0
    for (const auto& rs : rowsets) {
1610
0
        if (rs->num_segments() == 0) {
1611
0
            continue;
1612
0
        }
1613
0
        const auto& tablet_schema = rs->tablet_schema();
1614
0
        SegmentCacheHandle segment_cache;
1615
0
        auto st = SegmentLoader::instance()->load_segments(std::static_pointer_cast<BetaRowset>(rs),
1616
0
                                                           &segment_cache);
1617
0
        if (!st.ok()) {
1618
0
            return base_schema;
1619
0
        }
1620
0
        for (const auto& segment : segment_cache.get_segments()) {
1621
0
            TabletSchemaSPtr schema = tablet_schema->copy_without_variant_extracted_columns();
1622
0
            for (const auto& column : tablet_schema->columns()) {
1623
0
                if (!column->is_variant_type()) {
1624
0
                    continue;
1625
0
                }
1626
0
                std::shared_ptr<ColumnReader> column_reader;
1627
0
                OlapReaderStatistics stats;
1628
0
                st = segment->get_column_reader(column->unique_id(), &column_reader, &stats);
1629
0
                if (!st.ok()) {
1630
0
                    LOG(WARNING) << "Failed to get column reader for column: " << column->name()
1631
0
                                 << " error: " << st.to_string();
1632
0
                    continue;
1633
0
                }
1634
0
                if (!column_reader) {
1635
0
                    continue;
1636
0
                }
1637
1638
0
                CHECK(column_reader->get_meta_type() == FieldType::OLAP_FIELD_TYPE_VARIANT);
1639
0
                auto* variant_column_reader =
1640
0
                        assert_cast<segment_v2::VariantColumnReader*>(column_reader.get());
1641
                // load external meta before getting subcolumn meta info
1642
0
                st = variant_column_reader->load_external_meta_once();
1643
0
                if (!st.ok()) {
1644
0
                    LOG(WARNING) << "Failed to load external meta for column: " << column->name()
1645
0
                                 << " error: " << st.to_string();
1646
0
                    continue;
1647
0
                }
1648
0
                const auto* subcolumn_meta_info = variant_column_reader->get_subcolumns_meta_info();
1649
0
                for (const auto& entry : *subcolumn_meta_info) {
1650
0
                    if (entry->path.empty()) {
1651
0
                        continue;
1652
0
                    }
1653
0
                    const std::string& column_name =
1654
0
                            column->name_lower_case() + "." + entry->path.get_path();
1655
0
                    const DataTypePtr& data_type = entry->data.file_column_type;
1656
0
                    PathInDataBuilder full_path_builder;
1657
0
                    auto full_path = full_path_builder.append(column->name_lower_case(), false)
1658
0
                                             .append(entry->path.get_parts(), false)
1659
0
                                             .build();
1660
0
                    TabletColumn subcolumn =
1661
0
                            get_column_by_type(data_type, column_name,
1662
0
                                               ExtraInfo {.unique_id = -1,
1663
0
                                                          .parent_unique_id = column->unique_id(),
1664
0
                                                          .path_info = full_path});
1665
0
                    schema->append_column(subcolumn);
1666
0
                }
1667
0
            }
1668
0
            schemas.emplace_back(schema);
1669
0
        }
1670
0
    }
1671
0
    TabletSchemaSPtr least_common_schema;
1672
0
    auto st = get_least_common_schema(schemas, base_schema, least_common_schema, false);
1673
0
    if (!st.ok()) {
1674
0
        return base_schema;
1675
0
    }
1676
0
    return least_common_schema;
1677
0
}
1678
1679
bool inherit_index(const std::vector<const TabletIndex*>& parent_indexes,
1680
                   TabletIndexes& subcolumns_indexes, FieldType column_type,
1681
1.00k
                   const std::string& suffix_path, bool is_array_nested_type) {
1682
1.00k
    if (parent_indexes.empty()) {
1683
983
        return false;
1684
983
    }
1685
20
    subcolumns_indexes.clear();
1686
    // bkd index or array index only need to inherit one index
1687
20
    if (field_is_numeric_type(column_type) ||
1688
20
        (is_array_nested_type &&
1689
13
         (field_is_numeric_type(column_type) || field_is_slice_type(column_type)))) {
1690
9
        auto index_ptr = std::make_shared<TabletIndex>(*parent_indexes[0]);
1691
9
        index_ptr->set_escaped_escaped_index_suffix_path(suffix_path);
1692
        // no need parse for bkd index or array index
1693
9
        index_ptr->remove_parser_and_analyzer();
1694
9
        subcolumns_indexes.emplace_back(std::move(index_ptr));
1695
9
        return true;
1696
9
    }
1697
    // string type need to inherit all indexes
1698
11
    else if (field_is_slice_type(column_type) && !is_array_nested_type) {
1699
10
        for (const auto& index : parent_indexes) {
1700
10
            auto index_ptr = std::make_shared<TabletIndex>(*index);
1701
10
            index_ptr->set_escaped_escaped_index_suffix_path(suffix_path);
1702
10
            subcolumns_indexes.emplace_back(std::move(index_ptr));
1703
10
        }
1704
9
        return true;
1705
9
    }
1706
2
    return false;
1707
20
}
1708
1709
bool inherit_index(const std::vector<const TabletIndex*>& parent_indexes,
1710
1.00k
                   TabletIndexes& subcolumns_indexes, const TabletColumn& column) {
1711
1.00k
    if (!column.is_extracted_column()) {
1712
3
        return false;
1713
3
    }
1714
1.00k
    if (column.is_array_type()) {
1715
14
        if (column.get_sub_columns().empty()) {
1716
0
            return false;
1717
0
        }
1718
14
        const TabletColumn* nested = column.get_sub_columns()[0].get();
1719
14
        while (nested != nullptr && nested->is_array_type()) {
1720
0
            if (nested->get_sub_columns().empty()) {
1721
0
                return false;
1722
0
            }
1723
0
            nested = nested->get_sub_columns()[0].get();
1724
0
        }
1725
14
        if (nested == nullptr) {
1726
0
            return false;
1727
0
        }
1728
14
        return inherit_index(parent_indexes, subcolumns_indexes, nested->type(),
1729
14
                             column.path_info_ptr()->get_path(), true);
1730
14
    }
1731
989
    return inherit_index(parent_indexes, subcolumns_indexes, column.type(),
1732
989
                         column.path_info_ptr()->get_path());
1733
1.00k
}
1734
1735
bool inherit_index(const std::vector<const TabletIndex*>& parent_indexes,
1736
0
                   TabletIndexes& subcolumns_indexes, const ColumnMetaPB& column_pb) {
1737
0
    if (!column_pb.has_column_path_info()) {
1738
0
        return false;
1739
0
    }
1740
0
    if (column_pb.type() == (int)FieldType::OLAP_FIELD_TYPE_ARRAY) {
1741
0
        if (column_pb.children_columns_size() == 0) {
1742
0
            return false;
1743
0
        }
1744
0
        const ColumnMetaPB* nested = &column_pb.children_columns(0);
1745
0
        while (nested != nullptr && nested->type() == (int)FieldType::OLAP_FIELD_TYPE_ARRAY) {
1746
0
            if (nested->children_columns_size() == 0) {
1747
0
                return false;
1748
0
            }
1749
0
            nested = &nested->children_columns(0);
1750
0
        }
1751
0
        if (nested == nullptr) {
1752
0
            return false;
1753
0
        }
1754
0
        return inherit_index(parent_indexes, subcolumns_indexes, (FieldType)nested->type(),
1755
0
                             column_pb.column_path_info().path(), true);
1756
0
    }
1757
0
    return inherit_index(parent_indexes, subcolumns_indexes, (FieldType)column_pb.type(),
1758
0
                         column_pb.column_path_info().path());
1759
0
}
1760
1761
// ============ Implementation from parse2column.cpp ============
1762
1763
/** Pool for objects that cannot be used from different threads simultaneously.
1764
  * Allows to create an object for each thread.
1765
  * Pool has unbounded size and objects are not destroyed before destruction of pool.
1766
  *
1767
  * Use it in cases when thread local storage is not appropriate
1768
  *  (when maximum number of simultaneously used objects is less
1769
  *   than number of running/sleeping threads, that has ever used object,
1770
  *   and creation/destruction of objects is expensive).
1771
  */
1772
template <typename T>
1773
class SimpleObjectPool {
1774
protected:
1775
    /// Hold all available objects in stack.
1776
    std::mutex mutex;
1777
    std::stack<std::unique_ptr<T>> stack;
1778
    /// Specialized deleter for std::unique_ptr.
1779
    /// Returns underlying pointer back to stack thus reclaiming its ownership.
1780
    struct Deleter {
1781
        SimpleObjectPool<T>* parent;
1782
12.7k
        Deleter(SimpleObjectPool<T>* parent_ = nullptr) : parent {parent_} {} /// NOLINT
1783
12.7k
        void operator()(T* owning_ptr) const {
1784
12.7k
            std::lock_guard lock {parent->mutex};
1785
12.7k
            parent->stack.emplace(owning_ptr);
1786
12.7k
        }
1787
    };
1788
1789
public:
1790
    using Pointer = std::unique_ptr<T, Deleter>;
1791
    /// Extracts and returns a pointer from the stack if it's not empty,
1792
    ///  creates a new one by calling provided f() otherwise.
1793
    template <typename Factory>
1794
12.7k
    Pointer get(Factory&& f) {
1795
12.7k
        std::unique_lock lock(mutex);
1796
12.7k
        if (stack.empty()) {
1797
1
            return {f(), this};
1798
1
        }
1799
12.7k
        auto object = stack.top().release();
1800
12.7k
        stack.pop();
1801
12.7k
        return std::unique_ptr<T, Deleter>(object, Deleter(this));
1802
12.7k
    }
variant_util.cpp:_ZN5doris12variant_util16SimpleObjectPoolINS_14JSONDataParserINS_14SimdJSONParserEEEE3getIZNS0_21parse_json_to_variantERNS_7IColumnERKNS_9StringRefEPS4_RKNS_11ParseConfigEE3$_0EESt10unique_ptrIS4_NS5_7DeleterEEOT_
Line
Count
Source
1794
12.4k
    Pointer get(Factory&& f) {
1795
12.4k
        std::unique_lock lock(mutex);
1796
12.4k
        if (stack.empty()) {
1797
1
            return {f(), this};
1798
1
        }
1799
12.4k
        auto object = stack.top().release();
1800
12.4k
        stack.pop();
1801
12.4k
        return std::unique_ptr<T, Deleter>(object, Deleter(this));
1802
12.4k
    }
variant_util.cpp:_ZN5doris12variant_util16SimpleObjectPoolINS_14JSONDataParserINS_14SimdJSONParserEEEE3getIZNS0_21parse_json_to_variantERNS_7IColumnERKNS_9ColumnStrIjEERKNS_11ParseConfigEE3$_0EESt10unique_ptrIS4_NS5_7DeleterEEOT_
Line
Count
Source
1794
286
    Pointer get(Factory&& f) {
1795
286
        std::unique_lock lock(mutex);
1796
286
        if (stack.empty()) {
1797
0
            return {f(), this};
1798
0
        }
1799
286
        auto object = stack.top().release();
1800
286
        stack.pop();
1801
286
        return std::unique_ptr<T, Deleter>(object, Deleter(this));
1802
286
    }
1803
    /// Like get(), but creates object using default constructor.
1804
    Pointer getDefault() {
1805
        return get([] { return new T; });
1806
    }
1807
};
1808
1809
SimpleObjectPool<JsonParser> parsers_pool;
1810
1811
using Node = typename ColumnVariant::Subcolumns::Node;
1812
1813
11.6k
static inline void append_binary_bytes(ColumnString::Chars& chars, const void* data, size_t size) {
1814
11.6k
    const auto old_size = chars.size();
1815
11.6k
    chars.resize(old_size + size);
1816
11.6k
    memcpy(chars.data() + old_size, reinterpret_cast<const char*>(data), size);
1817
11.6k
}
1818
1819
4.69k
static inline void append_binary_type(ColumnString::Chars& chars, FieldType type) {
1820
4.69k
    const uint8_t t = static_cast<uint8_t>(type);
1821
4.69k
    append_binary_bytes(chars, &t, sizeof(uint8_t));
1822
4.69k
}
1823
1824
2.23k
static inline void append_binary_sizet(ColumnString::Chars& chars, size_t v) {
1825
2.23k
    append_binary_bytes(chars, &v, sizeof(size_t));
1826
2.23k
}
1827
1828
4.69k
static void append_field_to_binary_chars(const Field& field, ColumnString::Chars& chars) {
1829
4.69k
    switch (field.get_type()) {
1830
0
    case PrimitiveType::TYPE_NULL: {
1831
0
        append_binary_type(chars, FieldType::OLAP_FIELD_TYPE_NONE);
1832
0
        return;
1833
0
    }
1834
2
    case PrimitiveType::TYPE_BOOLEAN: {
1835
2
        append_binary_type(chars,
1836
2
                           TabletColumn::get_field_type_by_type(PrimitiveType::TYPE_BOOLEAN));
1837
2
        const auto v = static_cast<UInt8>(field.get<PrimitiveType::TYPE_BOOLEAN>());
1838
2
        append_binary_bytes(chars, &v, sizeof(UInt8));
1839
2
        return;
1840
0
    }
1841
2.45k
    case PrimitiveType::TYPE_BIGINT: {
1842
2.45k
        append_binary_type(chars, TabletColumn::get_field_type_by_type(PrimitiveType::TYPE_BIGINT));
1843
2.45k
        const auto v = field.get<PrimitiveType::TYPE_BIGINT>();
1844
2.45k
        append_binary_bytes(chars, &v, sizeof(Int64));
1845
2.45k
        return;
1846
0
    }
1847
1
    case PrimitiveType::TYPE_LARGEINT: {
1848
1
        append_binary_type(chars,
1849
1
                           TabletColumn::get_field_type_by_type(PrimitiveType::TYPE_LARGEINT));
1850
1
        const auto v = field.get<PrimitiveType::TYPE_LARGEINT>();
1851
1
        append_binary_bytes(chars, &v, sizeof(int128_t));
1852
1
        return;
1853
0
    }
1854
1
    case PrimitiveType::TYPE_DOUBLE: {
1855
1
        append_binary_type(chars, TabletColumn::get_field_type_by_type(PrimitiveType::TYPE_DOUBLE));
1856
1
        const auto v = field.get<PrimitiveType::TYPE_DOUBLE>();
1857
1
        append_binary_bytes(chars, &v, sizeof(Float64));
1858
1
        return;
1859
0
    }
1860
2.22k
    case PrimitiveType::TYPE_STRING: {
1861
2.22k
        append_binary_type(chars, FieldType::OLAP_FIELD_TYPE_STRING);
1862
2.22k
        const auto& v = field.get<PrimitiveType::TYPE_STRING>();
1863
2.22k
        append_binary_sizet(chars, v.size());
1864
2.22k
        append_binary_bytes(chars, v.data(), v.size());
1865
2.22k
        return;
1866
0
    }
1867
0
    case PrimitiveType::TYPE_JSONB: {
1868
0
        append_binary_type(chars, FieldType::OLAP_FIELD_TYPE_JSONB);
1869
0
        const auto& v = field.get<PrimitiveType::TYPE_JSONB>();
1870
0
        append_binary_sizet(chars, v.get_size());
1871
0
        append_binary_bytes(chars, v.get_value(), v.get_size());
1872
0
        return;
1873
0
    }
1874
8
    case PrimitiveType::TYPE_ARRAY: {
1875
8
        append_binary_type(chars, FieldType::OLAP_FIELD_TYPE_ARRAY);
1876
8
        const auto& a = field.get<PrimitiveType::TYPE_ARRAY>();
1877
8
        append_binary_sizet(chars, a.size());
1878
13
        for (const auto& elem : a) {
1879
13
            append_field_to_binary_chars(elem, chars);
1880
13
        }
1881
8
        return;
1882
0
    }
1883
0
    default:
1884
0
        throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Unsupported field type {}",
1885
0
                               field.get_type());
1886
4.69k
    }
1887
4.69k
}
1888
template <typename ParserImpl>
1889
void parse_json_to_variant_impl(IColumn& column, const char* src, size_t length,
1890
80.6k
                                JSONDataParser<ParserImpl>* parser, const ParseConfig& config) {
1891
80.6k
    auto& column_variant = assert_cast<ColumnVariant&>(column);
1892
80.6k
    std::optional<ParseResult> result;
1893
    /// Treat empty string as an empty object
1894
    /// for better CAST from String to Object.
1895
80.6k
    if (length > 0) {
1896
80.6k
        result = parser->parse(src, length, config);
1897
80.6k
    } else {
1898
1
        result = ParseResult {};
1899
1
    }
1900
80.6k
    if (!result) {
1901
11
        VLOG_DEBUG << "failed to parse " << std::string_view(src, length) << ", length= " << length;
1902
11
        if (config::variant_throw_exeception_on_invalid_json) {
1903
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Failed to parse object {}",
1904
0
                                   std::string_view(src, length));
1905
0
        }
1906
        // Treat as string
1907
11
        PathInData root_path;
1908
11
        Field field = Field::create_field<TYPE_STRING>(String(src, length));
1909
11
        result = ParseResult {{root_path}, {field}};
1910
11
    }
1911
80.6k
    auto& [paths, values] = *result;
1912
80.6k
    assert(paths.size() == values.size());
1913
80.1k
    size_t old_num_rows = column_variant.rows();
1914
80.1k
    if (config.deprecated_enable_flatten_nested) {
1915
        // here we should check the paths in variant and paths in result,
1916
        // if two paths which same prefix have different structure, we should throw an exception
1917
3.00k
        std::vector<PathInData> check_paths;
1918
11.9k
        for (const auto& entry : column_variant.get_subcolumns()) {
1919
11.9k
            check_paths.push_back(entry->path);
1920
11.9k
        }
1921
3.00k
        check_paths.insert(check_paths.end(), paths.begin(), paths.end());
1922
3.00k
        THROW_IF_ERROR(check_variant_has_no_ambiguous_paths(check_paths));
1923
3.00k
    }
1924
80.1k
    auto [doc_value_data_paths, doc_value_data_values] =
1925
80.1k
            column_variant.get_doc_value_data_paths_and_values();
1926
80.1k
    auto& doc_value_data_offsets = column_variant.serialized_doc_value_column_offsets();
1927
1928
1.35M
    auto flush_defaults = [](ColumnVariant::Subcolumn* subcolumn) {
1929
1.35M
        const auto num_defaults = subcolumn->cur_num_of_defaults();
1930
1.35M
        if (num_defaults > 0) {
1931
104k
            subcolumn->insert_many_defaults(num_defaults);
1932
104k
            subcolumn->reset_current_num_of_defaults();
1933
104k
        }
1934
1.35M
    };
1935
1936
80.1k
    auto is_plain_path = [](const PathInData& path) {
1937
13
        for (const auto& part : path.get_parts()) {
1938
13
            if (part.is_nested || part.anonymous_array_level != 0) {
1939
0
                return false;
1940
0
            }
1941
13
        }
1942
9
        return true;
1943
9
    };
1944
1945
80.1k
    auto get_or_create_subcolumn = [&](const PathInData& path, size_t index_hint,
1946
1.35M
                                       const FieldInfo& field_info) -> ColumnVariant::Subcolumn* {
1947
1.35M
        auto* subcolumn = column_variant.get_subcolumn(path, index_hint);
1948
1.35M
        if (subcolumn == nullptr) {
1949
2.46k
            if (path.has_nested_part()) {
1950
8
                column_variant.add_nested_subcolumn(path, field_info, old_num_rows);
1951
2.45k
            } else {
1952
2.45k
                column_variant.add_sub_column(path, old_num_rows);
1953
2.45k
            }
1954
2.46k
            subcolumn = column_variant.get_subcolumn(path, index_hint);
1955
2.46k
        }
1956
1.35M
        if (!subcolumn) {
1957
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Failed to find sub column {}",
1958
0
                                   path.get_path());
1959
0
        }
1960
1.35M
        return subcolumn;
1961
1.35M
    };
1962
1963
1.35M
    auto normalize_plain_path = [&](const PathInData& path) {
1964
1.35M
        if (!config.check_duplicate_json_path || path.empty() || !is_plain_path(path)) {
1965
1.35M
            return path;
1966
1.35M
        }
1967
9
        return PathInData(path.get_path());
1968
1.35M
    };
1969
1970
80.1k
    auto insert_into_subcolumn = [&](size_t i,
1971
1.35M
                                     bool check_size_mismatch) -> ColumnVariant::Subcolumn* {
1972
1.35M
        FieldInfo field_info;
1973
1.35M
        get_field_info(values[i], &field_info);
1974
1.35M
        if (field_info.scalar_type_id == PrimitiveType::INVALID_TYPE) {
1975
104
            return nullptr;
1976
104
        }
1977
1.35M
        auto path = normalize_plain_path(paths[i]);
1978
1.35M
        auto* subcolumn = get_or_create_subcolumn(path, i, field_info);
1979
1.35M
        flush_defaults(subcolumn);
1980
1.35M
        if (check_size_mismatch && subcolumn->size() != old_num_rows) {
1981
1
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
1982
1
                                   "subcolumn {} size missmatched, may contains duplicated entry",
1983
1
                                   path.get_path());
1984
1
        }
1985
1.35M
        subcolumn->insert(std::move(values[i]), std::move(field_info));
1986
1.35M
        return subcolumn;
1987
1.35M
    };
1988
1989
80.1k
    switch (config.parse_to) {
1990
79.1k
    case ParseConfig::ParseTo::OnlySubcolumns:
1991
1.43M
        for (size_t i = 0; i < paths.size(); ++i) {
1992
1.35M
            insert_into_subcolumn(i, true);
1993
1.35M
        }
1994
79.1k
        break;
1995
1.02k
    case ParseConfig::ParseTo::OnlyDocValueColumn: {
1996
1.02k
        CHECK(column_variant.enable_doc_mode()) << "OnlyDocValueColumn requires doc mode enabled";
1997
1.02k
        std::vector<size_t> doc_item_indexes;
1998
1.02k
        doc_item_indexes.reserve(paths.size());
1999
1.02k
        phmap::flat_hash_set<StringRef, StringRefHash> seen_paths;
2000
1.02k
        seen_paths.reserve(paths.size());
2001
2002
5.70k
        for (size_t i = 0; i < paths.size(); ++i) {
2003
4.68k
            FieldInfo field_info;
2004
4.68k
            get_field_info(values[i], &field_info);
2005
4.68k
            if (paths[i].empty()) {
2006
0
                auto* subcolumn = column_variant.get_subcolumn(paths[i]);
2007
0
                DCHECK(subcolumn != nullptr);
2008
0
                flush_defaults(subcolumn);
2009
0
                subcolumn->insert(std::move(values[i]), std::move(field_info));
2010
0
                continue;
2011
0
            }
2012
4.68k
            if (field_info.scalar_type_id == PrimitiveType::INVALID_TYPE ||
2013
4.68k
                values[i].get_type() == PrimitiveType::TYPE_NULL) {
2014
0
                continue;
2015
0
            }
2016
4.68k
            const auto& path_str = paths[i].get_path();
2017
4.68k
            StringRef path_ref {path_str.data(), path_str.size()};
2018
4.68k
            if (UNLIKELY(!seen_paths.emplace(path_ref).second)) {
2019
0
                throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
2020
0
                                       "may contains duplicated entry : {}",
2021
0
                                       std::string_view(path_str));
2022
0
            }
2023
4.68k
            doc_item_indexes.push_back(i);
2024
4.68k
        }
2025
2026
1.02k
        std::sort(doc_item_indexes.begin(), doc_item_indexes.end(),
2027
7.37k
                  [&](size_t l, size_t r) { return paths[l].get_path() < paths[r].get_path(); });
2028
4.68k
        for (const auto idx : doc_item_indexes) {
2029
4.68k
            const auto& path_str = paths[idx].get_path();
2030
4.68k
            doc_value_data_paths->insert_data(path_str.data(), path_str.size());
2031
4.68k
            auto& chars = doc_value_data_values->get_chars();
2032
4.68k
            append_field_to_binary_chars(values[idx], chars);
2033
4.68k
            doc_value_data_values->get_offsets().push_back(chars.size());
2034
4.68k
        }
2035
1.02k
    } break;
2036
80.1k
    }
2037
80.1k
    doc_value_data_offsets.push_back(doc_value_data_paths->size());
2038
    // /// Insert default values to missed subcolumns.
2039
80.1k
    const auto& subcolumns = column_variant.get_subcolumns();
2040
1.73M
    for (const auto& entry : subcolumns) {
2041
1.73M
        if (entry->data.size() == old_num_rows) {
2042
            // Handle nested paths differently from simple paths
2043
384k
            if (entry->path.has_nested_part()) {
2044
                // Try to insert default from nested, if failed, insert regular default
2045
0
                bool success = UNLIKELY(column_variant.try_insert_default_from_nested(entry));
2046
0
                if (!success) {
2047
0
                    entry->data.insert_default();
2048
0
                }
2049
384k
            } else {
2050
                // For non-nested paths, increment default counter
2051
384k
                entry->data.increment_default_counter();
2052
384k
            }
2053
384k
        }
2054
1.73M
    }
2055
80.1k
    column_variant.incr_num_rows();
2056
80.1k
    auto sparse_column = column_variant.get_sparse_column();
2057
80.1k
    if (sparse_column->size() == old_num_rows) {
2058
80.1k
        sparse_column->assume_mutable()->insert_default();
2059
80.1k
    }
2060
80.1k
#ifndef NDEBUG
2061
80.1k
    column_variant.check_consistency();
2062
80.1k
#endif
2063
80.1k
}
2064
2065
// exposed interfaces
2066
void parse_json_to_variant(IColumn& column, const StringRef& json, JsonParser* parser,
2067
12.4k
                           const ParseConfig& config) {
2068
12.4k
    if (parser) {
2069
0
        return parse_json_to_variant_impl(column, json.data, json.size, parser, config);
2070
12.4k
    } else {
2071
12.4k
        auto pool_parser = parsers_pool.get([] { return new JsonParser(); });
2072
12.4k
        return parse_json_to_variant_impl(column, json.data, json.size, pool_parser.get(), config);
2073
12.4k
    }
2074
12.4k
}
2075
2076
void parse_json_to_variant(IColumn& column, const ColumnString& raw_json_column,
2077
286
                           const ParseConfig& config) {
2078
286
    auto parser = parsers_pool.get([] { return new JsonParser(); });
2079
68.5k
    for (size_t i = 0; i < raw_json_column.size(); ++i) {
2080
68.2k
        StringRef raw_json = raw_json_column.get_data_at(i);
2081
68.2k
        parse_json_to_variant_impl(column, raw_json.data, raw_json.size, parser.get(), config);
2082
68.2k
    }
2083
286
    column.finalize();
2084
286
}
2085
2086
// parse the doc snapshot column to subcolumns
2087
0
void materialize_docs_to_subcolumns(ColumnVariant& column_variant) {
2088
0
    auto subcolumns = materialize_docs_to_subcolumns_map(column_variant);
2089
2090
0
    for (auto& entry : subcolumns) {
2091
0
        entry.second.finalize();
2092
0
        if (!column_variant.add_sub_column(PathInData(entry.first),
2093
0
                                           IColumn::mutate(entry.second.get_finalized_column_ptr()),
2094
0
                                           entry.second.get_least_common_type())) {
2095
0
            throw doris::Exception(ErrorCode::INTERNAL_ERROR,
2096
0
                                   "Failed to add subcolumn {}, which is from doc snapshot column",
2097
0
                                   entry.first);
2098
0
        }
2099
0
    }
2100
2101
0
    column_variant.finalize();
2102
0
}
2103
2104
// ============ Implementation from variant_util.cpp ============
2105
2106
phmap::flat_hash_map<std::string_view, ColumnVariant::Subcolumn> materialize_docs_to_subcolumns_map(
2107
8
        const ColumnVariant& variant, size_t expected_unique_paths) {
2108
8
    constexpr size_t kInitialPathReserve = 8192;
2109
8
    phmap::flat_hash_map<std::string_view, ColumnVariant::Subcolumn> subcolumns;
2110
2111
8
    const auto [column_key, column_value] = variant.get_doc_value_data_paths_and_values();
2112
8
    const auto& column_offsets = variant.serialized_doc_value_column_offsets();
2113
8
    const size_t num_rows = column_offsets.size();
2114
2115
8
    DCHECK_EQ(num_rows, variant.size()) << "doc snapshot offsets size mismatch with variant rows";
2116
2117
8
    subcolumns.reserve(expected_unique_paths != 0
2118
8
                               ? expected_unique_paths
2119
8
                               : std::min<size_t>(column_key->size(), kInitialPathReserve));
2120
2121
29
    for (size_t row = 0; row < num_rows; ++row) {
2122
21
        const size_t start = column_offsets[row - 1];
2123
21
        const size_t end = column_offsets[row];
2124
60
        for (size_t i = start; i < end; ++i) {
2125
39
            const auto& key = column_key->get_data_at(i);
2126
39
            const std::string_view path_sv(key.data, key.size);
2127
2128
39
            auto [it, inserted] =
2129
39
                    subcolumns.try_emplace(path_sv, ColumnVariant::Subcolumn {0, true, false});
2130
39
            auto& subcolumn = it->second;
2131
39
            if (inserted) {
2132
21
                subcolumn.insert_many_defaults(row);
2133
21
            } else if (subcolumn.size() != row) {
2134
4
                subcolumn.insert_many_defaults(row - subcolumn.size());
2135
4
            }
2136
39
            subcolumn.deserialize_from_binary_column(column_value, i);
2137
39
        }
2138
21
    }
2139
2140
21
    for (auto& [path, subcolumn] : subcolumns) {
2141
21
        if (subcolumn.size() != num_rows) {
2142
7
            subcolumn.insert_many_defaults(num_rows - subcolumn.size());
2143
7
        }
2144
21
    }
2145
2146
8
    return subcolumns;
2147
8
}
2148
2149
constexpr uint64_t VARIANT_PATCH_PATH_MARKER_MASK = 1ULL << 63;
2150
constexpr uint64_t VARIANT_PATCH_PATH_MARKER_CLASS_SHIFT = 62;
2151
constexpr uint64_t VARIANT_PATCH_PATH_MARKER_UID_BITS = 31;
2152
constexpr uint64_t VARIANT_PATCH_PATH_MARKER_INDEX_BITS = 11;
2153
constexpr uint64_t VARIANT_PATCH_PATH_MARKER_POS_BITS = 12;
2154
constexpr uint64_t VARIANT_PATCH_PATH_MARKER_BYTE_BITS = 8;
2155
constexpr uint64_t VARIANT_PATCH_PATH_MARKER_POS_SHIFT = VARIANT_PATCH_PATH_MARKER_BYTE_BITS;
2156
constexpr uint64_t VARIANT_PATCH_PATH_MARKER_INDEX_SHIFT =
2157
        VARIANT_PATCH_PATH_MARKER_POS_SHIFT + VARIANT_PATCH_PATH_MARKER_POS_BITS;
2158
constexpr uint64_t VARIANT_PATCH_PATH_MARKER_UID_SHIFT =
2159
        VARIANT_PATCH_PATH_MARKER_INDEX_SHIFT + VARIANT_PATCH_PATH_MARKER_INDEX_BITS;
2160
static_assert(VARIANT_PATCH_PATH_MARKER_UID_SHIFT + VARIANT_PATCH_PATH_MARKER_UID_BITS ==
2161
              VARIANT_PATCH_PATH_MARKER_CLASS_SHIFT);
2162
constexpr uint64_t VARIANT_PATCH_PATH_MARKER_UID_MASK =
2163
        (1ULL << VARIANT_PATCH_PATH_MARKER_UID_BITS) - 1;
2164
constexpr uint64_t VARIANT_PATCH_PATH_MARKER_INDEX_MASK =
2165
        (1ULL << VARIANT_PATCH_PATH_MARKER_INDEX_BITS) - 1;
2166
constexpr uint64_t VARIANT_PATCH_PATH_MARKER_POS_MASK =
2167
        (1ULL << VARIANT_PATCH_PATH_MARKER_POS_BITS) - 1;
2168
constexpr uint64_t VARIANT_PATCH_PATH_MARKER_BYTE_MASK =
2169
        (1ULL << VARIANT_PATCH_PATH_MARKER_BYTE_BITS) - 1;
2170
constexpr uint64_t VARIANT_PATCH_PATH_MARKER_MAX_COUNT = 1ULL
2171
                                                         << VARIANT_PATCH_PATH_MARKER_INDEX_BITS;
2172
// Flexible VARIANT partial update keeps exact patch paths in skip bitmap markers.
2173
// The byte position field is the feature-level encoded-path limit.
2174
constexpr uint64_t VARIANT_PATCH_PATH_MARKER_MAX_BYTES = 1ULL << VARIANT_PATCH_PATH_MARKER_POS_BITS;
2175
constexpr uint64_t VARIANT_PATCH_PATH_MAX_COUNT = 256;
2176
constexpr uint64_t VARIANT_PATCH_PATH_MAX_TOTAL_BYTES = 64 * 1024;
2177
2178
// The hidden skip bitmap stores top-level column unique ids, so VARIANT patch metadata uses
2179
// values outside the int32 uid range. Each path is represented by exact, column-scoped byte
2180
// markers with the high marker bit set; this keeps publish-conflict merge deterministic.
2181
104k
bool is_variant_patch_path_marker(uint64_t value) {
2182
104k
    return (value & VARIANT_PATCH_PATH_MARKER_MASK) != 0;
2183
104k
}
2184
2185
namespace {
2186
2187
struct VariantPatchPathEncoding {
2188
    std::optional<uint64_t> length;
2189
    std::vector<std::optional<uint8_t>> bytes;
2190
};
2191
2192
using VariantPatchPathMap = std::map<std::string, PathInData>;
2193
2194
1.68k
void append_fixed_u32(uint32_t value, std::string* dst) {
2195
1.68k
    dst->push_back(static_cast<char>(value & 0xFF));
2196
1.68k
    dst->push_back(static_cast<char>((value >> 8) & 0xFF));
2197
1.68k
    dst->push_back(static_cast<char>((value >> 16) & 0xFF));
2198
1.68k
    dst->push_back(static_cast<char>((value >> 24) & 0xFF));
2199
1.68k
}
2200
2201
42
bool read_fixed_u32(std::string_view src, size_t* offset, uint32_t* value) {
2202
42
    if (*offset + sizeof(uint32_t) > src.size()) {
2203
1
        return false;
2204
1
    }
2205
41
    const auto* data = reinterpret_cast<const uint8_t*>(src.data() + *offset);
2206
41
    *value = static_cast<uint32_t>(data[0]) | (static_cast<uint32_t>(data[1]) << 8) |
2207
41
             (static_cast<uint32_t>(data[2]) << 16) | (static_cast<uint32_t>(data[3]) << 24);
2208
41
    *offset += sizeof(uint32_t);
2209
41
    return true;
2210
42
}
2211
2212
824
std::string encode_variant_patch_path_key(const PathInData& path) {
2213
824
    const auto& parts = path.get_parts();
2214
824
    DCHECK(!parts.empty());
2215
824
    std::string encoded;
2216
824
    append_fixed_u32(static_cast<uint32_t>(parts.size()), &encoded);
2217
858
    for (const auto& part : parts) {
2218
858
        append_fixed_u32(static_cast<uint32_t>(part.key.size()), &encoded);
2219
858
        encoded.append(part.key.data(), part.key.size());
2220
858
        encoded.push_back(static_cast<char>(part.is_nested ? 1 : 0));
2221
858
        encoded.push_back(static_cast<char>(part.anonymous_array_level));
2222
858
    }
2223
824
    return encoded;
2224
824
}
2225
2226
19
Status decode_variant_patch_path_key(std::string_view encoded, PathInData* path) {
2227
19
    size_t offset = 0;
2228
19
    uint32_t part_count = 0;
2229
19
    if (!read_fixed_u32(encoded, &offset, &part_count) || part_count == 0) {
2230
1
        return Status::InternalError("Invalid VARIANT patch path marker part count");
2231
1
    }
2232
2233
18
    PathInData::Parts parts;
2234
18
    parts.reserve(part_count);
2235
40
    for (uint32_t i = 0; i < part_count; ++i) {
2236
23
        uint32_t key_size = 0;
2237
23
        if (!read_fixed_u32(encoded, &offset, &key_size) ||
2238
23
            offset + key_size + 2 > encoded.size()) {
2239
1
            return Status::InternalError("Invalid VARIANT patch path marker part payload");
2240
1
        }
2241
22
        PathInData::Part part;
2242
22
        part.key = std::string_view(encoded.data() + offset, key_size);
2243
22
        offset += key_size;
2244
22
        part.is_nested = encoded[offset++] != 0;
2245
22
        part.anonymous_array_level = static_cast<UInt8>(encoded[offset++]);
2246
22
        parts.emplace_back(part);
2247
22
    }
2248
17
    if (offset != encoded.size()) {
2249
1
        return Status::InternalError("Trailing bytes in VARIANT patch path marker");
2250
1
    }
2251
2252
16
    *path = PathInData(parts);
2253
16
    return Status::OK();
2254
17
}
2255
2256
20
uint64_t variant_patch_path_max_bytes() {
2257
20
    return VARIANT_PATCH_PATH_MARKER_MAX_BYTES;
2258
20
}
2259
2260
206k
uint64_t normalized_variant_col_unique_id(int32_t variant_col_unique_id) {
2261
206k
    CHECK_GE(variant_col_unique_id, 0);
2262
206k
    CHECK_LE(static_cast<uint64_t>(variant_col_unique_id), VARIANT_PATCH_PATH_MARKER_UID_MASK);
2263
206k
    return static_cast<uint64_t>(variant_col_unique_id);
2264
206k
}
2265
2266
68.4k
uint64_t variant_patch_path_marker_uid(uint64_t marker) {
2267
68.4k
    return (marker >> VARIANT_PATCH_PATH_MARKER_UID_SHIFT) & VARIANT_PATCH_PATH_MARKER_UID_MASK;
2268
68.4k
}
2269
2270
68.4k
bool is_variant_patch_path_marker_for_column(uint64_t marker, int32_t variant_col_unique_id) {
2271
68.4k
    return is_variant_patch_path_marker(marker) &&
2272
68.4k
           variant_patch_path_marker_uid(marker) ==
2273
68.4k
                   normalized_variant_col_unique_id(variant_col_unique_id);
2274
68.4k
}
2275
2276
283
uint64_t variant_patch_path_marker_index(uint64_t marker) {
2277
283
    return (marker >> VARIANT_PATCH_PATH_MARKER_INDEX_SHIFT) & VARIANT_PATCH_PATH_MARKER_INDEX_MASK;
2278
283
}
2279
2280
36.3k
bool variant_patch_path_marker_is_byte(uint64_t marker) {
2281
36.3k
    return ((marker >> VARIANT_PATCH_PATH_MARKER_CLASS_SHIFT) & 1ULL) != 0;
2282
36.3k
}
2283
2284
uint64_t variant_patch_path_length_marker(int32_t variant_col_unique_id, uint64_t path_index,
2285
529
                                          uint64_t length) {
2286
529
    DCHECK_LT(path_index, VARIANT_PATCH_PATH_MARKER_MAX_COUNT);
2287
529
    DCHECK_LE(length, VARIANT_PATCH_PATH_MARKER_MAX_BYTES);
2288
529
    return VARIANT_PATCH_PATH_MARKER_MASK |
2289
529
           (normalized_variant_col_unique_id(variant_col_unique_id)
2290
529
            << VARIANT_PATCH_PATH_MARKER_UID_SHIFT) |
2291
529
           (path_index << VARIANT_PATCH_PATH_MARKER_INDEX_SHIFT) | length;
2292
529
}
2293
2294
uint64_t variant_patch_path_byte_marker(int32_t variant_col_unique_id, uint64_t path_index,
2295
137k
                                        uint64_t byte_pos, uint8_t byte) {
2296
137k
    DCHECK_LT(path_index, VARIANT_PATCH_PATH_MARKER_MAX_COUNT);
2297
137k
    DCHECK_LT(byte_pos, VARIANT_PATCH_PATH_MARKER_MAX_BYTES);
2298
137k
    return VARIANT_PATCH_PATH_MARKER_MASK | (1ULL << VARIANT_PATCH_PATH_MARKER_CLASS_SHIFT) |
2299
137k
           (normalized_variant_col_unique_id(variant_col_unique_id)
2300
137k
            << VARIANT_PATCH_PATH_MARKER_UID_SHIFT) |
2301
137k
           (path_index << VARIANT_PATCH_PATH_MARKER_INDEX_SHIFT) |
2302
137k
           (byte_pos << VARIANT_PATCH_PATH_MARKER_POS_SHIFT) | byte;
2303
137k
}
2304
2305
void remove_variant_patch_path_markers_for_column(int32_t variant_col_unique_id,
2306
19
                                                  BitmapValue* bitmap) {
2307
19
    std::vector<uint64_t> markers_to_remove;
2308
34.0k
    for (uint64_t marker : *bitmap) {
2309
34.0k
        if (is_variant_patch_path_marker_for_column(marker, variant_col_unique_id)) {
2310
12
            markers_to_remove.push_back(marker);
2311
12
        }
2312
34.0k
    }
2313
19
    for (uint64_t marker : markers_to_remove) {
2314
12
        bitmap->remove(marker);
2315
12
    }
2316
19
}
2317
2318
2
void remove_all_variant_patch_path_markers(BitmapValue* bitmap) {
2319
2
    std::vector<uint64_t> markers_to_remove;
2320
11
    for (uint64_t marker : *bitmap) {
2321
11
        if (is_variant_patch_path_marker(marker)) {
2322
10
            markers_to_remove.push_back(marker);
2323
10
        }
2324
11
    }
2325
10
    for (uint64_t marker : markers_to_remove) {
2326
10
        bitmap->remove(marker);
2327
10
    }
2328
2
}
2329
2330
Status decode_variant_patch_paths(const BitmapValue& bitmap, int32_t variant_col_unique_id,
2331
44
                                  VariantPatchPathMap* paths) {
2332
44
    paths->clear();
2333
44
    std::map<uint64_t, VariantPatchPathEncoding> encoded_paths;
2334
34.3k
    for (uint64_t marker : bitmap) {
2335
34.3k
        if (!is_variant_patch_path_marker_for_column(marker, variant_col_unique_id)) {
2336
34.0k
            continue;
2337
34.0k
        }
2338
283
        auto& encoded_path = encoded_paths[variant_patch_path_marker_index(marker)];
2339
283
        if (!variant_patch_path_marker_is_byte(marker)) {
2340
25
            const uint64_t length = marker & ((1ULL << VARIANT_PATCH_PATH_MARKER_INDEX_SHIFT) - 1);
2341
25
            if (length > VARIANT_PATCH_PATH_MARKER_MAX_BYTES) {
2342
1
                return Status::InternalError(
2343
1
                        "Invalid VARIANT patch path marker length {} for column {}", length,
2344
1
                        variant_col_unique_id);
2345
1
            }
2346
24
            if (encoded_path.length.has_value() && *encoded_path.length != length) {
2347
1
                return Status::InternalError(
2348
1
                        "Conflicting VARIANT patch path marker length for column {}",
2349
1
                        variant_col_unique_id);
2350
1
            }
2351
23
            encoded_path.length = length;
2352
23
            continue;
2353
24
        }
2354
2355
258
        const uint64_t byte_pos = (marker >> VARIANT_PATCH_PATH_MARKER_POS_SHIFT) &
2356
258
                                  VARIANT_PATCH_PATH_MARKER_POS_MASK;
2357
258
        const uint8_t byte = marker & VARIANT_PATCH_PATH_MARKER_BYTE_MASK;
2358
258
        if (encoded_path.bytes.size() <= byte_pos) {
2359
86
            encoded_path.bytes.resize(byte_pos + 1);
2360
86
        }
2361
258
        if (encoded_path.bytes[byte_pos].has_value() && *encoded_path.bytes[byte_pos] != byte) {
2362
1
            return Status::InternalError("Conflicting VARIANT patch path marker byte for column {}",
2363
1
                                         variant_col_unique_id);
2364
1
        }
2365
257
        encoded_path.bytes[byte_pos] = byte;
2366
257
    }
2367
2368
41
    for (const auto& [_, encoded_path] : encoded_paths) {
2369
22
        if (!encoded_path.length.has_value()) {
2370
1
            if (!encoded_path.bytes.empty()) {
2371
1
                return Status::InternalError(
2372
1
                        "VARIANT patch path marker byte without length for column {}",
2373
1
                        variant_col_unique_id);
2374
1
            }
2375
0
            continue;
2376
1
        }
2377
21
        if (encoded_path.bytes.size() > *encoded_path.length) {
2378
1
            return Status::InternalError(
2379
1
                    "VARIANT patch path marker byte exceeds length for column {}",
2380
1
                    variant_col_unique_id);
2381
1
        }
2382
20
        std::string encoded_path_key;
2383
20
        encoded_path_key.reserve(*encoded_path.length);
2384
252
        for (uint64_t i = 0; i < *encoded_path.length; ++i) {
2385
233
            if (i >= encoded_path.bytes.size() || !encoded_path.bytes[i].has_value()) {
2386
1
                return Status::InternalError("Incomplete VARIANT patch path marker for column {}",
2387
1
                                             variant_col_unique_id);
2388
1
            }
2389
232
            encoded_path_key.push_back(static_cast<char>(*encoded_path.bytes[i]));
2390
232
        }
2391
19
        PathInData path;
2392
19
        RETURN_IF_ERROR(decode_variant_patch_path_key(encoded_path_key, &path));
2393
16
        paths->insert_or_assign(std::move(encoded_path_key), std::move(path));
2394
16
    }
2395
35
    return Status::OK();
2396
41
}
2397
2398
Status encode_variant_patch_paths(int32_t variant_col_unique_id, const VariantPatchPathMap& paths,
2399
21
                                  BitmapValue* bitmap) {
2400
21
    if (paths.size() > VARIANT_PATCH_PATH_MAX_COUNT) {
2401
1
        return Status::NotSupported(
2402
1
                "VARIANT flexible partial update supports at most {} patch paths per row",
2403
1
                VARIANT_PATCH_PATH_MAX_COUNT);
2404
1
    }
2405
20
    const uint64_t max_encoded_bytes = variant_patch_path_max_bytes();
2406
530
    for (const auto& [encoded_path_key, _] : paths) {
2407
530
        if (encoded_path_key.size() > max_encoded_bytes) {
2408
1
            return Status::NotSupported(
2409
1
                    "VARIANT flexible partial update encoded patch path exceeds {} bytes, actual "
2410
1
                    "{} bytes",
2411
1
                    max_encoded_bytes, encoded_path_key.size());
2412
1
        }
2413
530
    }
2414
2415
19
    BitmapValue encoded_bitmap = *bitmap;
2416
19
    remove_variant_patch_path_markers_for_column(variant_col_unique_id, &encoded_bitmap);
2417
2418
19
    uint64_t path_index = 0;
2419
529
    for (const auto& [encoded_path_key, _] : paths) {
2420
529
        encoded_bitmap.add(variant_patch_path_length_marker(variant_col_unique_id, path_index,
2421
529
                                                            encoded_path_key.size()));
2422
137k
        for (uint64_t byte_pos = 0; byte_pos < encoded_path_key.size(); ++byte_pos) {
2423
137k
            encoded_bitmap.add(variant_patch_path_byte_marker(
2424
137k
                    variant_col_unique_id, path_index, byte_pos,
2425
137k
                    static_cast<uint8_t>(static_cast<unsigned char>(encoded_path_key[byte_pos]))));
2426
137k
        }
2427
529
        ++path_index;
2428
529
    }
2429
19
    uint64_t row_total_encoded_bytes = 0;
2430
36.0k
    for (uint64_t marker : encoded_bitmap) {
2431
36.0k
        if (is_variant_patch_path_marker(marker) && !variant_patch_path_marker_is_byte(marker)) {
2432
642
            row_total_encoded_bytes +=
2433
642
                    marker & ((1ULL << VARIANT_PATCH_PATH_MARKER_INDEX_SHIFT) - 1);
2434
642
            if (row_total_encoded_bytes > VARIANT_PATCH_PATH_MAX_TOTAL_BYTES) {
2435
2
                return Status::NotSupported(
2436
2
                        "VARIANT flexible partial update encoded patch paths exceed {} bytes per "
2437
2
                        "row",
2438
2
                        VARIANT_PATCH_PATH_MAX_TOTAL_BYTES);
2439
2
            }
2440
642
        }
2441
36.0k
    }
2442
17
    *bitmap = std::move(encoded_bitmap);
2443
17
    return Status::OK();
2444
19
}
2445
2446
void collect_variant_patch_marker_column_uids(const BitmapValue& bitmap,
2447
4
                                              std::set<int32_t>* variant_col_unique_ids) {
2448
36
    for (uint64_t marker : bitmap) {
2449
36
        if (is_variant_patch_path_marker(marker)) {
2450
31
            variant_col_unique_ids->insert(
2451
31
                    static_cast<int32_t>(variant_patch_path_marker_uid(marker)));
2452
31
        }
2453
36
    }
2454
4
}
2455
2456
8
Status variant_object_patch_required_status() {
2457
8
    return Status::NotSupported(
2458
8
            "VARIANT flexible partial update only supports JSON object patch values");
2459
8
}
2460
2461
6
Status variant_object_base_required_status() {
2462
6
    return Status::NotSupported(
2463
6
            "VARIANT flexible partial update only supports patching JSON object old values");
2464
6
}
2465
2466
2
Status variant_doc_mode_not_supported_status() {
2467
2
    return Status::NotSupported(
2468
2
            "VARIANT flexible partial update does not support doc mode in this version");
2469
2
}
2470
2471
214
const ColumnVariant& get_variant_nested_column(const IColumn& column) {
2472
214
    if (column.is_nullable()) {
2473
5
        return assert_cast<const ColumnVariant&>(
2474
5
                assert_cast<const ColumnNullable&>(column).get_nested_column());
2475
5
    }
2476
209
    return assert_cast<const ColumnVariant&>(column);
2477
214
}
2478
2479
17
ColumnVariant& get_variant_nested_column(IColumn& column) {
2480
17
    if (column.is_nullable()) {
2481
1
        return assert_cast<ColumnVariant&>(
2482
1
                assert_cast<ColumnNullable&>(column).get_nested_column());
2483
1
    }
2484
16
    return assert_cast<ColumnVariant&>(column);
2485
17
}
2486
2487
50
bool is_path_prefix_of(const PathInData& prefix, const PathInData& path) {
2488
50
    const auto& prefix_parts = prefix.get_parts();
2489
50
    const auto& path_parts = path.get_parts();
2490
50
    if (prefix_parts.size() > path_parts.size()) {
2491
10
        return false;
2492
10
    }
2493
40
    return std::equal(prefix_parts.begin(), prefix_parts.end(), path_parts.begin());
2494
50
}
2495
2496
27
bool paths_conflict(const PathInData& left, const PathInData& right) {
2497
27
    return is_path_prefix_of(left, right) || is_path_prefix_of(right, left);
2498
27
}
2499
2500
bool path_or_prefix_is_variant_patch_path(const PathInData& path,
2501
19
                                          const VariantPatchPathMap& patch_paths) {
2502
19
    PathInData::Parts prefix_parts;
2503
19
    prefix_parts.reserve(path.get_parts().size());
2504
21
    for (const auto& part : path.get_parts()) {
2505
21
        prefix_parts.push_back(part);
2506
21
        if (patch_paths.contains(encode_variant_patch_path_key(PathInData(prefix_parts)))) {
2507
5
            return true;
2508
5
        }
2509
21
    }
2510
14
    return false;
2511
19
}
2512
2513
14
bool path_conflicts_with_any_patch_path(const PathInData& path, const VariantMap& patch_object) {
2514
14
    return std::ranges::any_of(patch_object, [&](const auto& patch_item) {
2515
10
        return paths_conflict(patch_item.first, path);
2516
10
    });
2517
14
}
2518
2519
8
bool starts_with_json_object(std::string_view text) {
2520
8
    auto it = std::ranges::find_if_not(text, [](unsigned char ch) { return std::isspace(ch); });
2521
8
    return it != text.end() && *it == '{';
2522
8
}
2523
2524
16
bool root_jsonb_field_to_json_text(const Field& field, std::string* json_text) {
2525
16
    switch (field.get_type()) {
2526
8
    case PrimitiveType::TYPE_JSONB: {
2527
8
        const auto& jsonb = field.get<PrimitiveType::TYPE_JSONB>();
2528
8
        *json_text = JsonbToJson::jsonb_to_json_string(jsonb.get_value(), jsonb.get_size());
2529
8
        return true;
2530
0
    }
2531
8
    default:
2532
8
        return false;
2533
16
    }
2534
16
}
2535
2536
bool collect_json_object_text_map(std::string_view json_text, bool reject_json_null_value,
2537
8
                                  VariantMap* object) {
2538
8
    if (!starts_with_json_object(json_text)) {
2539
3
        return false;
2540
3
    }
2541
2542
5
    auto parsed = ColumnVariant::create(0, false);
2543
5
    ParseConfig config;
2544
5
    config.parse_to = ParseConfig::ParseTo::OnlySubcolumns;
2545
5
    config.reject_json_null_value = reject_json_null_value;
2546
5
    config.record_empty_object_path = true;
2547
5
    StringRef json_ref {json_text.data(), json_text.size()};
2548
5
    parse_json_to_variant(*parsed, json_ref, nullptr, config);
2549
5
    parsed->finalize();
2550
2551
5
    Field parsed_field;
2552
5
    parsed->get(0, parsed_field);
2553
5
    if (parsed_field.get_type() != PrimitiveType::TYPE_VARIANT) {
2554
0
        return false;
2555
0
    }
2556
5
    const auto& parsed_object = parsed_field.get<PrimitiveType::TYPE_VARIANT>();
2557
5
    if (parsed_object.contains(PathInData())) {
2558
0
        return false;
2559
0
    }
2560
5
    for (const auto& [path, value] : parsed_object) {
2561
3
        if (!path.empty()) {
2562
3
            object->insert_or_assign(path, value);
2563
3
        }
2564
3
    }
2565
5
    return true;
2566
5
}
2567
2568
void collect_materialized_variant_map(const ColumnVariant& variant, size_t row, VariantMap* object,
2569
81
                                      FieldWithDataType* root_field) {
2570
81
    Field field;
2571
81
    variant.get(row, field);
2572
81
    if (field.get_type() == PrimitiveType::TYPE_VARIANT) {
2573
869
        for (const auto& [path, value] : field.get<PrimitiveType::TYPE_VARIANT>()) {
2574
869
            if (path.get_path() == DOC_VALUE_COLUMN_PATH) {
2575
0
                continue;
2576
0
            }
2577
869
            if (path.empty()) {
2578
16
                *root_field = value;
2579
16
                continue;
2580
16
            }
2581
853
            object->insert_or_assign(path, value);
2582
853
        }
2583
80
    }
2584
2585
81
    DCHECK(!variant.has_doc_value_column(row));
2586
81
}
2587
2588
Status collect_variant_patch_map(const ColumnVariant& variant, size_t row, bool* is_object_patch,
2589
37
                                 VariantMap* object) {
2590
37
    object->clear();
2591
37
    FieldWithDataType root_field;
2592
37
    collect_materialized_variant_map(variant, row, object, &root_field);
2593
37
    if (root_field.field.get_type() == PrimitiveType::TYPE_NULL) {
2594
28
        *is_object_patch = true;
2595
28
        return Status::OK();
2596
28
    }
2597
2598
9
    std::string json_text;
2599
9
    if (!root_jsonb_field_to_json_text(root_field.field, &json_text)) {
2600
4
        *is_object_patch = false;
2601
4
        return Status::OK();
2602
4
    }
2603
5
    object->clear();
2604
5
    *is_object_patch = collect_json_object_text_map(json_text, true, object);
2605
5
    return Status::OK();
2606
9
}
2607
2608
44
Status collect_variant_base_map(const ColumnVariant& variant, size_t row, VariantMap* object) {
2609
44
    object->clear();
2610
44
    FieldWithDataType root_field;
2611
44
    collect_materialized_variant_map(variant, row, object, &root_field);
2612
44
    if (root_field.field.get_type() == PrimitiveType::TYPE_NULL) {
2613
37
        return Status::OK();
2614
37
    }
2615
2616
7
    std::string json_text;
2617
7
    if (!root_jsonb_field_to_json_text(root_field.field, &json_text)) {
2618
4
        return variant_object_base_required_status();
2619
4
    }
2620
3
    object->clear();
2621
3
    if (!collect_json_object_text_map(json_text, false, object)) {
2622
2
        return variant_object_base_required_status();
2623
2
    }
2624
1
    return Status::OK();
2625
3
}
2626
2627
17
Status insert_variant_field(IColumn& dst_column, const Field& field) {
2628
17
    DCHECK(!get_variant_nested_column(dst_column).enable_doc_mode());
2629
17
    dst_column.insert(field);
2630
17
    return Status::OK();
2631
17
}
2632
2633
133
Status check_variant_object_patch_supported(const IColumn& column) {
2634
133
    if (get_variant_nested_column(column).enable_doc_mode()) {
2635
2
        return variant_doc_mode_not_supported_status();
2636
2
    }
2637
131
    return Status::OK();
2638
133
}
2639
2640
Status merge_variant_object_patch(const IColumn& old_column, size_t old_row,
2641
10
                                  VariantMap&& patch_object, IColumn& dst_column) {
2642
10
    VariantMap merged_object;
2643
10
    if (!old_column.is_null_at(old_row)) {
2644
10
        RETURN_IF_ERROR(collect_variant_base_map(get_variant_nested_column(old_column), old_row,
2645
10
                                                 &merged_object));
2646
10
    }
2647
9
    for (const auto& [patch_path, _] : patch_object) {
2648
26
        for (auto it = merged_object.begin(); it != merged_object.end();) {
2649
17
            if (paths_conflict(patch_path, it->first)) {
2650
5
                it = merged_object.erase(it);
2651
12
            } else {
2652
12
                ++it;
2653
12
            }
2654
17
        }
2655
9
    }
2656
9
    for (auto& [patch_path, patch_value] : patch_object) {
2657
9
        merged_object.insert_or_assign(patch_path, std::move(patch_value));
2658
9
    }
2659
2660
7
    Field merged_field = Field::create_field<TYPE_VARIANT>(std::move(merged_object));
2661
7
    return insert_variant_field(dst_column, merged_field);
2662
10
}
2663
2664
1
Status insert_variant_object_patch(VariantMap&& patch_object, IColumn& dst_column) {
2665
1
    Field patch_field = Field::create_field<TYPE_VARIANT>(std::move(patch_object));
2666
1
    return insert_variant_field(dst_column, patch_field);
2667
1
}
2668
2669
} // namespace
2670
2671
Status mark_variant_patch_paths(const IColumn& patch_column, size_t patch_row,
2672
25
                                int32_t variant_col_unique_id, BitmapValue* patch_path_markers) {
2673
25
    RETURN_IF_CATCH_EXCEPTION({
2674
25
        if (patch_column.is_null_at(patch_row)) {
2675
25
            return variant_object_patch_required_status();
2676
25
        }
2677
25
        RETURN_IF_ERROR(check_variant_object_patch_supported(patch_column));
2678
2679
25
        bool is_object_patch = false;
2680
25
        VariantMap patch_object;
2681
25
        RETURN_IF_ERROR(collect_variant_patch_map(get_variant_nested_column(patch_column),
2682
25
                                                  patch_row, &is_object_patch, &patch_object));
2683
25
        if (!is_object_patch) {
2684
25
            return variant_object_patch_required_status();
2685
25
        }
2686
2687
25
        VariantPatchPathMap patch_paths;
2688
25
        RETURN_IF_ERROR(decode_variant_patch_paths(*patch_path_markers, variant_col_unique_id,
2689
25
                                                   &patch_paths));
2690
25
        for (const auto& [path, _] : patch_object) {
2691
25
            patch_paths.insert_or_assign(encode_variant_patch_path_key(path), path);
2692
25
        }
2693
25
        return encode_variant_patch_paths(variant_col_unique_id, patch_paths, patch_path_markers);
2694
25
    });
2695
0
    return Status::OK();
2696
25
}
2697
2698
Status merge_variant_patch_path_markers(const BitmapValue& left, const BitmapValue& right,
2699
2
                                        BitmapValue* merged) {
2700
2
    RETURN_IF_CATCH_EXCEPTION({
2701
2
        *merged = left;
2702
2
        *merged &= right;
2703
2
        remove_all_variant_patch_path_markers(merged);
2704
2705
2
        std::set<int32_t> variant_col_unique_ids;
2706
2
        collect_variant_patch_marker_column_uids(left, &variant_col_unique_ids);
2707
2
        collect_variant_patch_marker_column_uids(right, &variant_col_unique_ids);
2708
2
        for (int32_t variant_col_unique_id : variant_col_unique_ids) {
2709
2
            VariantPatchPathMap patch_paths;
2710
2
            RETURN_IF_ERROR(decode_variant_patch_paths(left, variant_col_unique_id, &patch_paths));
2711
2
            VariantPatchPathMap right_patch_paths;
2712
2
            RETURN_IF_ERROR(
2713
2
                    decode_variant_patch_paths(right, variant_col_unique_id, &right_patch_paths));
2714
2
            patch_paths.insert(right_patch_paths.begin(), right_patch_paths.end());
2715
2
            RETURN_IF_ERROR(encode_variant_patch_paths(variant_col_unique_id, patch_paths, merged));
2716
2
        }
2717
2
        return Status::OK();
2718
2
    });
2719
0
    return Status::OK();
2720
2
}
2721
2722
Status merge_variant_patch(const IColumn& old_column, size_t old_row, const IColumn& patch_column,
2723
16
                           size_t patch_row, IColumn& dst_column) {
2724
16
    RETURN_IF_CATCH_EXCEPTION({
2725
16
        if (patch_column.is_null_at(patch_row)) {
2726
16
            return variant_object_patch_required_status();
2727
16
        }
2728
16
        RETURN_IF_ERROR(check_variant_object_patch_supported(old_column));
2729
16
        RETURN_IF_ERROR(check_variant_object_patch_supported(patch_column));
2730
16
        RETURN_IF_ERROR(check_variant_object_patch_supported(dst_column));
2731
2732
16
        bool is_object_patch = false;
2733
16
        VariantMap patch_object;
2734
16
        RETURN_IF_ERROR(collect_variant_patch_map(get_variant_nested_column(patch_column),
2735
16
                                                  patch_row, &is_object_patch, &patch_object));
2736
16
        if (!is_object_patch) {
2737
16
            return variant_object_patch_required_status();
2738
16
        }
2739
2740
16
        RETURN_IF_ERROR(merge_variant_object_patch(old_column, old_row, std::move(patch_object),
2741
16
                                                   dst_column));
2742
16
        return Status::OK();
2743
16
    });
2744
0
    return Status::OK();
2745
16
}
2746
2747
Status merge_variant_patch_by_path_markers(const IColumn& old_column, size_t old_row,
2748
                                           const IColumn& patch_column, size_t patch_row,
2749
                                           int32_t variant_col_unique_id,
2750
                                           const BitmapValue& patch_path_markers,
2751
23
                                           bool old_row_deleted, IColumn& dst_column) {
2752
23
    RETURN_IF_CATCH_EXCEPTION({
2753
23
        if (patch_column.is_null_at(patch_row)) {
2754
23
            return variant_object_patch_required_status();
2755
23
        }
2756
23
        RETURN_IF_ERROR(check_variant_object_patch_supported(old_column));
2757
23
        RETURN_IF_ERROR(check_variant_object_patch_supported(patch_column));
2758
23
        RETURN_IF_ERROR(check_variant_object_patch_supported(dst_column));
2759
2760
23
        VariantMap patch_object;
2761
23
        RETURN_IF_ERROR(collect_variant_base_map(get_variant_nested_column(patch_column), patch_row,
2762
23
                                                 &patch_object));
2763
23
        VariantPatchPathMap patch_paths;
2764
23
        RETURN_IF_ERROR(decode_variant_patch_paths(patch_path_markers, variant_col_unique_id,
2765
23
                                                   &patch_paths));
2766
23
        for (auto it = patch_object.begin(); it != patch_object.end();) {
2767
23
            if (patch_paths.contains(encode_variant_patch_path_key(it->first))) {
2768
23
                ++it;
2769
23
            } else {
2770
23
                it = patch_object.erase(it);
2771
23
            }
2772
23
        }
2773
23
        if (old_row_deleted) {
2774
23
            RETURN_IF_ERROR(insert_variant_object_patch(std::move(patch_object), dst_column));
2775
23
            return Status::OK();
2776
23
        }
2777
2778
23
        VariantMap merged_object;
2779
23
        if (!old_column.is_null_at(old_row)) {
2780
23
            RETURN_IF_ERROR(collect_variant_base_map(get_variant_nested_column(old_column), old_row,
2781
23
                                                     &merged_object));
2782
23
        }
2783
23
        for (auto it = merged_object.begin(); it != merged_object.end();) {
2784
23
            if (path_or_prefix_is_variant_patch_path(it->first, patch_paths) ||
2785
23
                path_conflicts_with_any_patch_path(it->first, patch_object)) {
2786
23
                it = merged_object.erase(it);
2787
23
            } else {
2788
23
                ++it;
2789
23
            }
2790
23
        }
2791
23
        for (auto& [patch_path, patch_value] : patch_object) {
2792
23
            merged_object.insert_or_assign(patch_path, std::move(patch_value));
2793
23
        }
2794
2795
23
        Field merged_field = Field::create_field<TYPE_VARIANT>(std::move(merged_object));
2796
23
        RETURN_IF_ERROR(insert_variant_field(dst_column, merged_field));
2797
23
        return Status::OK();
2798
23
    });
2799
0
    return Status::OK();
2800
23
}
2801
2802
Status _parse_and_materialize_variant_columns(Block& block,
2803
                                              const std::vector<uint32_t>& variant_pos,
2804
154
                                              const std::vector<ParseConfig>& configs) {
2805
443
    for (size_t i = 0; i < variant_pos.size(); ++i) {
2806
289
        auto column_ref = block.get_by_position(variant_pos[i]).column;
2807
289
        bool is_nullable = column_ref->is_nullable();
2808
289
        MutableColumnPtr var_column = column_ref->assume_mutable();
2809
289
        if (is_nullable) {
2810
1
            const auto& nullable = assert_cast<const ColumnNullable&>(*column_ref);
2811
1
            var_column = nullable.get_nested_column_ptr()->assume_mutable();
2812
1
        }
2813
289
        auto& var = assert_cast<ColumnVariant&>(*var_column);
2814
289
        var_column->finalize();
2815
2816
289
        MutableColumnPtr variant_column;
2817
289
        if (!var.is_scalar_variant()) {
2818
            // already parsed
2819
279
            continue;
2820
279
        }
2821
2822
10
        VLOG_DEBUG << "parse scalar variant column: " << var.get_root_type()->get_name();
2823
10
        ColumnPtr scalar_root_column;
2824
10
        if (var.get_root_type()->get_primitive_type() == TYPE_JSONB) {
2825
            // TODO more efficient way to parse jsonb type, currently we just convert jsonb to
2826
            // json str and parse them into variant
2827
1
            RETURN_IF_ERROR(cast_column({var.get_root(), var.get_root_type(), ""},
2828
1
                                        var.get_root()->is_nullable()
2829
1
                                                ? make_nullable(std::make_shared<DataTypeString>())
2830
1
                                                : std::make_shared<DataTypeString>(),
2831
1
                                        &scalar_root_column));
2832
1
            if (scalar_root_column->is_nullable()) {
2833
1
                scalar_root_column = assert_cast<const ColumnNullable*>(scalar_root_column.get())
2834
1
                                             ->get_nested_column_ptr();
2835
1
            }
2836
9
        } else {
2837
9
            const auto& root = *var.get_root();
2838
9
            scalar_root_column =
2839
9
                    root.is_nullable()
2840
9
                            ? assert_cast<const ColumnNullable&>(root).get_nested_column_ptr()
2841
9
                            : var.get_root();
2842
9
        }
2843
2844
10
        if (scalar_root_column->is_column_string()) {
2845
9
            variant_column = ColumnVariant::create(0, var.enable_doc_mode());
2846
9
            parse_json_to_variant(*variant_column.get(),
2847
9
                                  assert_cast<const ColumnString&>(*scalar_root_column),
2848
9
                                  configs[i]);
2849
9
        } else {
2850
            // Root maybe other types rather than string like ColumnVariant(Int32).
2851
            // In this case, we should finlize the root and cast to JSON type
2852
1
            auto expected_root_type =
2853
1
                    make_nullable(std::make_shared<ColumnVariant::MostCommonType>());
2854
1
            var.ensure_root_node_type(expected_root_type);
2855
1
            variant_column = var.assume_mutable();
2856
1
        }
2857
2858
        // Wrap variant with nullmap if it is nullable
2859
10
        ColumnPtr result = variant_column->get_ptr();
2860
10
        if (is_nullable) {
2861
1
            const auto& null_map =
2862
1
                    assert_cast<const ColumnNullable&>(*column_ref).get_null_map_column_ptr();
2863
1
            result = ColumnNullable::create(result, null_map);
2864
1
        }
2865
10
        block.get_by_position(variant_pos[i]).column = result;
2866
10
    }
2867
154
    return Status::OK();
2868
154
}
2869
2870
Status parse_and_materialize_variant_columns(Block& block, const std::vector<uint32_t>& variant_pos,
2871
154
                                             const std::vector<ParseConfig>& configs) {
2872
154
    RETURN_IF_CATCH_EXCEPTION(
2873
154
            { return _parse_and_materialize_variant_columns(block, variant_pos, configs); });
2874
154
}
2875
2876
Status parse_and_materialize_variant_columns(Block& block, const TabletSchema& tablet_schema,
2877
                                             const std::vector<uint32_t>& column_pos,
2878
145
                                             bool reject_json_null_value) {
2879
145
    std::vector<uint32_t> variant_column_pos;
2880
145
    std::vector<uint32_t> variant_schema_pos;
2881
145
    variant_column_pos.reserve(column_pos.size());
2882
145
    variant_schema_pos.reserve(column_pos.size());
2883
830
    for (size_t block_pos = 0; block_pos < column_pos.size(); ++block_pos) {
2884
685
        const uint32_t schema_pos = column_pos[block_pos];
2885
685
        const auto& column = tablet_schema.column(schema_pos);
2886
685
        if (column.is_variant_type()) {
2887
279
            variant_column_pos.push_back(schema_pos);
2888
279
            variant_schema_pos.push_back(schema_pos);
2889
279
        }
2890
685
    }
2891
2892
145
    if (variant_column_pos.empty()) {
2893
1
        return Status::OK();
2894
1
    }
2895
2896
144
    std::vector<ParseConfig> configs(variant_column_pos.size());
2897
423
    for (size_t i = 0; i < variant_column_pos.size(); ++i) {
2898
        // Deprecated legacy flatten-nested switch. Distinct from variant_enable_nested_group.
2899
279
        configs[i].deprecated_enable_flatten_nested =
2900
279
                tablet_schema.deprecated_variant_flatten_nested();
2901
279
        configs[i].check_duplicate_json_path = config::variant_enable_duplicate_json_path_check;
2902
279
        configs[i].reject_json_null_value = reject_json_null_value;
2903
279
        configs[i].record_empty_object_path = reject_json_null_value;
2904
279
        const auto& column = tablet_schema.column(variant_schema_pos[i]);
2905
279
        if (!column.is_variant_type()) {
2906
0
            return Status::InternalError("column is not variant type, column name: {}",
2907
0
                                         column.name());
2908
0
        }
2909
        // if doc mode is not enabled, no need to parse to doc value column
2910
279
        if (!column.variant_enable_doc_mode()) {
2911
278
            configs[i].parse_to = ParseConfig::ParseTo::OnlySubcolumns;
2912
278
            continue;
2913
278
        }
2914
2915
1
        configs[i].parse_to = ParseConfig::ParseTo::OnlyDocValueColumn;
2916
1
    }
2917
2918
144
    RETURN_IF_ERROR(parse_and_materialize_variant_columns(block, variant_column_pos, configs));
2919
144
    return Status::OK();
2920
144
}
2921
2922
} // namespace doris::variant_util