Coverage Report

Created: 2026-03-15 18:01

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exec/common/variant_util.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "exec/common/variant_util.h"
19
20
#include <assert.h>
21
#include <fmt/format.h>
22
#include <gen_cpp/FrontendService.h>
23
#include <gen_cpp/FrontendService_types.h>
24
#include <gen_cpp/HeartbeatService_types.h>
25
#include <gen_cpp/MasterService_types.h>
26
#include <gen_cpp/Status_types.h>
27
#include <gen_cpp/Types_types.h>
28
#include <glog/logging.h>
29
#include <rapidjson/document.h>
30
#include <rapidjson/stringbuffer.h>
31
#include <rapidjson/writer.h>
32
#include <simdjson/simdjson.h> // IWYU pragma: keep
33
#include <unicode/uchar.h>
34
35
#include <algorithm>
36
#include <cassert>
37
#include <cstddef>
38
#include <cstdint>
39
#include <cstring>
40
#include <list>
41
#include <memory>
42
#include <mutex>
43
#include <optional>
44
#include <ostream>
45
#include <set>
46
#include <stack>
47
#include <string>
48
#include <string_view>
49
#include <unordered_map>
50
#include <utility>
51
#include <vector>
52
53
#include "common/config.h"
54
#include "common/status.h"
55
#include "core/assert_cast.h"
56
#include "core/block/block.h"
57
#include "core/block/column_numbers.h"
58
#include "core/block/column_with_type_and_name.h"
59
#include "core/column/column.h"
60
#include "core/column/column_array.h"
61
#include "core/column/column_map.h"
62
#include "core/column/column_nullable.h"
63
#include "core/column/column_string.h"
64
#include "core/column/column_variant.h"
65
#include "core/data_type/data_type.h"
66
#include "core/data_type/data_type_array.h"
67
#include "core/data_type/data_type_factory.hpp"
68
#include "core/data_type/data_type_jsonb.h"
69
#include "core/data_type/data_type_nullable.h"
70
#include "core/data_type/data_type_string.h"
71
#include "core/data_type/data_type_variant.h"
72
#include "core/data_type/define_primitive_type.h"
73
#include "core/data_type/get_least_supertype.h"
74
#include "core/data_type/primitive_type.h"
75
#include "core/field.h"
76
#include "core/typeid_cast.h"
77
#include "core/types.h"
78
#include "exec/common/field_visitors.h"
79
#include "exec/common/sip_hash.h"
80
#include "exprs/function/function.h"
81
#include "exprs/function/simple_function_factory.h"
82
#include "exprs/function_context.h"
83
#include "exprs/json_functions.h"
84
#include "re2/re2.h"
85
#include "runtime/exec_env.h"
86
#include "runtime/runtime_state.h"
87
#include "storage/olap_common.h"
88
#include "storage/rowset/beta_rowset.h"
89
#include "storage/rowset/rowset.h"
90
#include "storage/rowset/rowset_fwd.h"
91
#include "storage/segment/segment_loader.h"
92
#include "storage/segment/variant/variant_column_reader.h"
93
#include "storage/segment/variant/variant_column_writer_impl.h"
94
#include "storage/tablet/tablet.h"
95
#include "storage/tablet/tablet_fwd.h"
96
#include "storage/tablet/tablet_schema.h"
97
#include "util/client_cache.h"
98
#include "util/defer_op.h"
99
#include "util/json/json_parser.h"
100
#include "util/json/path_in_data.h"
101
#include "util/json/simd_json_parser.h"
102
103
namespace doris::variant_util {
104
#include "common/compile_check_begin.h"
105
106
224
inline void append_escaped_regex_char(std::string* regex_output, char ch) {
107
224
    switch (ch) {
108
10
    case '.':
109
12
    case '^':
110
14
    case '$':
111
16
    case '+':
112
21
    case '*':
113
23
    case '?':
114
25
    case '(':
115
27
    case ')':
116
29
    case '|':
117
31
    case '{':
118
33
    case '}':
119
35
    case '[':
120
35
    case ']':
121
39
    case '\\':
122
39
        regex_output->push_back('\\');
123
39
        regex_output->push_back(ch);
124
39
        break;
125
185
    default:
126
185
        regex_output->push_back(ch);
127
185
        break;
128
224
    }
129
224
}
130
131
// Small LRU to cap compiled glob patterns
132
constexpr size_t kGlobRegexCacheCapacity = 256;
133
134
struct GlobRegexCacheEntry {
135
    std::shared_ptr<RE2> re2;
136
    std::list<std::string>::iterator lru_it;
137
};
138
139
static std::mutex g_glob_regex_cache_mutex;
140
static std::list<std::string> g_glob_regex_cache_lru;
141
static std::unordered_map<std::string, GlobRegexCacheEntry> g_glob_regex_cache;
142
143
147
std::shared_ptr<RE2> get_or_build_re2(const std::string& glob_pattern) {
144
147
    {
145
147
        std::lock_guard<std::mutex> lock(g_glob_regex_cache_mutex);
146
147
        auto it = g_glob_regex_cache.find(glob_pattern);
147
147
        if (it != g_glob_regex_cache.end()) {
148
101
            g_glob_regex_cache_lru.splice(g_glob_regex_cache_lru.begin(), g_glob_regex_cache_lru,
149
101
                                          it->second.lru_it);
150
101
            return it->second.re2;
151
101
        }
152
147
    }
153
46
    std::string regex_pattern;
154
46
    Status st = glob_to_regex(glob_pattern, &regex_pattern);
155
46
    if (!st.ok()) {
156
2
        return nullptr;
157
2
    }
158
44
    auto compiled = std::make_shared<RE2>(regex_pattern);
159
44
    if (!compiled->ok()) {
160
3
        return nullptr;
161
3
    }
162
41
    {
163
41
        std::lock_guard<std::mutex> lock(g_glob_regex_cache_mutex);
164
41
        auto it = g_glob_regex_cache.find(glob_pattern);
165
41
        if (it != g_glob_regex_cache.end()) {
166
0
            g_glob_regex_cache_lru.splice(g_glob_regex_cache_lru.begin(), g_glob_regex_cache_lru,
167
0
                                          it->second.lru_it);
168
0
            return it->second.re2;
169
0
        }
170
41
        g_glob_regex_cache_lru.push_front(glob_pattern);
171
41
        g_glob_regex_cache.emplace(glob_pattern,
172
41
                                   GlobRegexCacheEntry {compiled, g_glob_regex_cache_lru.begin()});
173
41
        if (g_glob_regex_cache.size() > kGlobRegexCacheCapacity) {
174
0
            const std::string& evict_key = g_glob_regex_cache_lru.back();
175
0
            g_glob_regex_cache.erase(evict_key);
176
0
            g_glob_regex_cache_lru.pop_back();
177
0
        }
178
41
    }
179
0
    return compiled;
180
41
}
181
182
// Convert a restricted glob pattern into a regex.
183
// Supported: '*', '?', '[...]', '\\' escape. Others are treated as literals.
184
84
Status glob_to_regex(const std::string& glob_pattern, std::string* regex_pattern) {
185
84
    regex_pattern->clear();
186
84
    regex_pattern->append("^");
187
84
    bool is_escaped = false;
188
84
    size_t pattern_length = glob_pattern.size();
189
373
    for (size_t index = 0; index < pattern_length; ++index) {
190
293
        char current_char = glob_pattern[index];
191
293
        if (is_escaped) {
192
9
            append_escaped_regex_char(regex_pattern, current_char);
193
9
            is_escaped = false;
194
9
            continue;
195
9
        }
196
284
        if (current_char == '\\') {
197
13
            is_escaped = true;
198
13
            continue;
199
13
        }
200
271
        if (current_char == '*') {
201
15
            regex_pattern->append(".*");
202
15
            continue;
203
15
        }
204
256
        if (current_char == '?') {
205
13
            regex_pattern->append(".");
206
13
            continue;
207
13
        }
208
243
        if (current_char == '[') {
209
32
            size_t class_index = index + 1;
210
32
            bool class_closed = false;
211
32
            bool is_class_escaped = false;
212
32
            std::string class_buffer;
213
32
            if (class_index < pattern_length &&
214
32
                (glob_pattern[class_index] == '!' || glob_pattern[class_index] == '^')) {
215
9
                class_buffer.push_back('^');
216
9
                ++class_index;
217
9
            }
218
95
            for (; class_index < pattern_length; ++class_index) {
219
91
                char class_char = glob_pattern[class_index];
220
91
                if (is_class_escaped) {
221
10
                    class_buffer.push_back(class_char);
222
10
                    is_class_escaped = false;
223
10
                    continue;
224
10
                }
225
81
                if (class_char == '\\') {
226
10
                    is_class_escaped = true;
227
10
                    continue;
228
10
                }
229
71
                if (class_char == ']') {
230
28
                    class_closed = true;
231
28
                    break;
232
28
                }
233
43
                class_buffer.push_back(class_char);
234
43
            }
235
32
            if (!class_closed) {
236
4
                return Status::InvalidArgument("Unclosed character class in glob pattern: {}",
237
4
                                               glob_pattern);
238
4
            }
239
28
            regex_pattern->append("[");
240
28
            regex_pattern->append(class_buffer);
241
28
            regex_pattern->append("]");
242
28
            index = class_index;
243
28
            continue;
244
32
        }
245
211
        append_escaped_regex_char(regex_pattern, current_char);
246
211
    }
247
80
    if (is_escaped) {
248
4
        append_escaped_regex_char(regex_pattern, '\\');
249
4
    }
250
80
    regex_pattern->append("$");
251
80
    return Status::OK();
252
84
}
253
254
147
bool glob_match_re2(const std::string& glob_pattern, const std::string& candidate_path) {
255
147
    auto compiled = get_or_build_re2(glob_pattern);
256
147
    if (compiled == nullptr) {
257
5
        return false;
258
5
    }
259
142
    return RE2::FullMatch(candidate_path, *compiled);
260
147
}
261
262
14
size_t get_number_of_dimensions(const IDataType& type) {
263
14
    if (const auto* type_array = typeid_cast<const DataTypeArray*>(&type)) {
264
4
        return type_array->get_number_of_dimensions();
265
4
    }
266
10
    return 0;
267
14
}
268
3
size_t get_number_of_dimensions(const IColumn& column) {
269
3
    if (const auto* column_array = check_and_get_column<ColumnArray>(column)) {
270
2
        return column_array->get_number_of_dimensions();
271
2
    }
272
1
    return 0;
273
3
}
274
275
1.00k
DataTypePtr get_base_type_of_array(const DataTypePtr& type) {
276
    /// Get raw pointers to avoid extra copying of type pointers.
277
1.00k
    const DataTypeArray* last_array = nullptr;
278
1.00k
    const auto* current_type = type.get();
279
1.00k
    if (const auto* nullable = typeid_cast<const DataTypeNullable*>(current_type)) {
280
1.00k
        current_type = nullable->get_nested_type().get();
281
1.00k
    }
282
1.01k
    while (const auto* type_array = typeid_cast<const DataTypeArray*>(current_type)) {
283
11
        current_type = type_array->get_nested_type().get();
284
11
        last_array = type_array;
285
11
        if (const auto* nullable = typeid_cast<const DataTypeNullable*>(current_type)) {
286
8
            current_type = nullable->get_nested_type().get();
287
8
        }
288
11
    }
289
1.00k
    return last_array ? last_array->get_nested_type() : type;
290
1.00k
}
291
292
49.1k
Status cast_column(const ColumnWithTypeAndName& arg, const DataTypePtr& type, ColumnPtr* result) {
293
49.1k
    ColumnsWithTypeAndName arguments {arg, {nullptr, type, type->get_name()}};
294
295
    // To prevent from null info lost, we should not call function since the function framework will wrap
296
    // nullable to Variant instead of the root of Variant
297
    // correct output: Nullable(Array(int)) -> Nullable(Variant(Nullable(Array(int))))
298
    // incorrect output: Nullable(Array(int)) -> Nullable(Variant(Array(int)))
299
49.1k
    if (type->get_primitive_type() == TYPE_VARIANT) {
300
        // If source column is variant, so the nullable info is different from dst column
301
3
        if (arg.type->get_primitive_type() == TYPE_VARIANT) {
302
1
            *result = type->is_nullable() ? make_nullable(arg.column) : remove_nullable(arg.column);
303
1
            return Status::OK();
304
1
        }
305
        // set variant root column/type to from column/type
306
3
        CHECK(arg.column->is_nullable());
307
2
        auto to_type = remove_nullable(type);
308
2
        const auto& data_type_object = assert_cast<const DataTypeVariant&>(*to_type);
309
2
        auto variant = ColumnVariant::create(data_type_object.variant_max_subcolumns_count());
310
311
2
        variant->create_root(arg.type, arg.column->assume_mutable());
312
2
        ColumnPtr nullable = ColumnNullable::create(
313
2
                variant->get_ptr(),
314
2
                check_and_get_column<ColumnNullable>(arg.column.get())->get_null_map_column_ptr());
315
2
        *result = type->is_nullable() ? nullable : variant->get_ptr();
316
2
        return Status::OK();
317
3
    }
318
319
49.1k
    auto function = SimpleFunctionFactory::instance().get_function("CAST", arguments, type);
320
49.1k
    if (!function) {
321
0
        return Status::InternalError("Not found cast function {} to {}", arg.type->get_name(),
322
0
                                     type->get_name());
323
0
    }
324
49.1k
    Block tmp_block {arguments};
325
49.1k
    uint32_t result_column = cast_set<uint32_t>(tmp_block.columns());
326
49.1k
    RuntimeState state;
327
49.1k
    auto ctx = FunctionContext::create_context(&state, {}, {});
328
329
49.1k
    if (arg.type->get_primitive_type() == INVALID_TYPE) {
330
        // cast from nothing to any type should result in nulls
331
652
        *result = type->create_column_const_with_default_value(arg.column->size())
332
652
                          ->convert_to_full_column_if_const();
333
652
        return Status::OK();
334
652
    }
335
336
    // We convert column string to jsonb type just add a string jsonb field to dst column instead of parse
337
    // each line in original string column.
338
48.4k
    ctx->set_string_as_jsonb_string(true);
339
48.4k
    ctx->set_jsonb_string_as_string(true);
340
48.4k
    tmp_block.insert({nullptr, type, arg.name});
341
    // TODO(lihangyu): we should handle this error in strict mode
342
48.4k
    if (!function->execute(ctx.get(), tmp_block, {0}, result_column, arg.column->size())) {
343
0
        LOG_EVERY_N(WARNING, 100) << fmt::format("cast from {} to {}", arg.type->get_name(),
344
0
                                                 type->get_name());
345
0
        *result = type->create_column_const_with_default_value(arg.column->size())
346
0
                          ->convert_to_full_column_if_const();
347
0
        return Status::OK();
348
0
    }
349
48.4k
    *result = tmp_block.get_by_position(result_column).column->convert_to_full_column_if_const();
350
48.4k
    VLOG_DEBUG << fmt::format("{} before convert {}, after convert {}", arg.name,
351
0
                              arg.column->get_name(), (*result)->get_name());
352
48.4k
    return Status::OK();
353
48.4k
}
354
355
void get_column_by_type(const DataTypePtr& data_type, const std::string& name, TabletColumn& column,
356
2.05k
                        const ExtraInfo& ext_info) {
357
2.05k
    column.set_name(name);
358
2.05k
    column.set_type(data_type->get_storage_field_type());
359
2.05k
    if (ext_info.unique_id >= 0) {
360
4
        column.set_unique_id(ext_info.unique_id);
361
4
    }
362
2.05k
    if (ext_info.parent_unique_id >= 0) {
363
1.02k
        column.set_parent_unique_id(ext_info.parent_unique_id);
364
1.02k
    }
365
2.05k
    if (!ext_info.path_info.empty()) {
366
1.02k
        column.set_path_info(ext_info.path_info);
367
1.02k
    }
368
2.05k
    if (data_type->is_nullable()) {
369
1.02k
        const auto& real_type = static_cast<const DataTypeNullable&>(*data_type);
370
1.02k
        column.set_is_nullable(true);
371
1.02k
        get_column_by_type(real_type.get_nested_type(), name, column, {});
372
1.02k
        return;
373
1.02k
    }
374
1.03k
    if (data_type->get_primitive_type() == PrimitiveType::TYPE_ARRAY) {
375
9
        TabletColumn child;
376
9
        get_column_by_type(assert_cast<const DataTypeArray*>(data_type.get())->get_nested_type(),
377
9
                           "", child, {});
378
9
        column.set_length(TabletColumn::get_field_length_by_type(TPrimitiveType::ARRAY, 0));
379
9
        column.add_sub_column(child);
380
9
        return;
381
9
    }
382
    // size is not fixed when type is string or json
383
1.02k
    if (is_string_type(data_type->get_primitive_type()) ||
384
1.02k
        data_type->get_primitive_type() == TYPE_JSONB) {
385
367
        column.set_length(INT_MAX);
386
367
        return;
387
367
    }
388
389
662
    PrimitiveType type = data_type->get_primitive_type();
390
662
    if (is_int_or_bool(type) || is_string_type(type) || is_float_or_double(type) || is_ip(type) ||
391
662
        is_date_or_datetime(type) || type == PrimitiveType::TYPE_DATEV2) {
392
659
        column.set_length(cast_set<int32_t>(data_type->get_size_of_value_in_memory()));
393
659
        return;
394
659
    }
395
3
    if (is_decimal(type)) {
396
1
        column.set_precision(data_type->get_precision());
397
1
        column.set_frac(data_type->get_scale());
398
1
        return;
399
1
    }
400
    // datetimev2 needs scale
401
2
    if (type == PrimitiveType::TYPE_DATETIMEV2 || type == PrimitiveType::TYPE_TIMESTAMPTZ) {
402
1
        column.set_precision(-1);
403
1
        column.set_frac(data_type->get_scale());
404
1
        return;
405
1
    }
406
407
1
    throw doris::Exception(doris::ErrorCode::INTERNAL_ERROR,
408
1
                           "unexcepted data column type: {}, column name is: {}",
409
1
                           data_type->get_name(), name);
410
2
}
411
412
TabletColumn get_column_by_type(const DataTypePtr& data_type, const std::string& name,
413
1.02k
                                const ExtraInfo& ext_info) {
414
1.02k
    TabletColumn result;
415
1.02k
    get_column_by_type(data_type, name, result, ext_info);
416
1.02k
    return result;
417
1.02k
}
418
419
// check if two paths which same prefix have different structure
420
static bool has_different_structure_in_same_path(const PathInData::Parts& lhs,
421
9.00k
                                                 const PathInData::Parts& rhs) {
422
9.00k
    if (lhs.size() != rhs.size()) {
423
1
        return false; // different size means different structure
424
1
    }
425
    // Since we group by path string, lhs and rhs must have the same size and keys
426
    // We only need to check if they have different nested structure
427
35.9k
    for (size_t i = 0; i < lhs.size(); ++i) {
428
26.9k
        if (lhs[i] != rhs[i]) {
429
5
            VLOG_DEBUG << fmt::format(
430
0
                    "Check different structure: {} vs {}, lhs[i].is_nested: {}, rhs[i].is_nested: "
431
0
                    "{}",
432
0
                    lhs[i].key, rhs[i].key, lhs[i].is_nested, rhs[i].is_nested);
433
5
            return true;
434
5
        }
435
26.9k
    }
436
8.99k
    return false;
437
8.99k
}
438
439
3.02k
Status check_variant_has_no_ambiguous_paths(const PathsInData& tuple_paths) {
440
    // Group paths by their string representation to reduce comparisons
441
3.02k
    std::unordered_map<std::string, std::vector<size_t>> path_groups;
442
443
24.0k
    for (size_t i = 0; i < tuple_paths.size(); ++i) {
444
        // same path should have same structure, so we group them by path
445
21.0k
        path_groups[tuple_paths[i].get_path()].push_back(i);
446
        // print part of tuple_paths[i]
447
21.0k
        VLOG_DEBUG << "tuple_paths[i]: " << tuple_paths[i].get_path();
448
21.0k
    }
449
450
    // Only compare paths within the same group
451
12.0k
    for (const auto& [path_str, indices] : path_groups) {
452
12.0k
        if (indices.size() <= 1) {
453
3.02k
            continue; // No conflicts possible
454
3.02k
        }
455
456
        // Compare all pairs within this group
457
26.9k
        for (size_t i = 0; i < indices.size(); ++i) {
458
26.9k
            for (size_t j = 0; j < i; ++j) {
459
9.00k
                if (has_different_structure_in_same_path(tuple_paths[indices[i]].get_parts(),
460
9.00k
                                                         tuple_paths[indices[j]].get_parts())) {
461
5
                    return Status::DataQualityError(
462
5
                            "Ambiguous paths: {} vs {} with different nested part {} vs {}",
463
5
                            tuple_paths[indices[i]].get_path(), tuple_paths[indices[j]].get_path(),
464
5
                            tuple_paths[indices[i]].has_nested_part(),
465
5
                            tuple_paths[indices[j]].has_nested_part());
466
5
                }
467
9.00k
            }
468
18.0k
        }
469
9.00k
    }
470
3.01k
    return Status::OK();
471
3.02k
}
472
473
Status update_least_schema_internal(const std::map<PathInData, DataTypes>& subcolumns_types,
474
                                    TabletSchemaSPtr& common_schema, int32_t variant_col_unique_id,
475
                                    const std::map<std::string, TabletColumnPtr>& typed_columns,
476
8
                                    std::set<PathInData>* path_set) {
477
8
    PathsInData tuple_paths;
478
8
    DataTypes tuple_types;
479
8
    CHECK(common_schema.use_count() == 1);
480
    // Get the least common type for all paths.
481
8
    for (const auto& [key, subtypes] : subcolumns_types) {
482
7
        assert(!subtypes.empty());
483
7
        if (key.get_path() == ColumnVariant::COLUMN_NAME_DUMMY) {
484
0
            continue;
485
0
        }
486
7
        size_t first_dim = get_number_of_dimensions(*subtypes[0]);
487
7
        tuple_paths.emplace_back(key);
488
10
        for (size_t i = 1; i < subtypes.size(); ++i) {
489
4
            if (first_dim != get_number_of_dimensions(*subtypes[i])) {
490
1
                tuple_types.emplace_back(make_nullable(std::make_shared<DataTypeJsonb>()));
491
1
                LOG(INFO) << fmt::format(
492
1
                        "Uncompatible types of subcolumn '{}': {} and {}, cast to JSONB",
493
1
                        key.get_path(), subtypes[0]->get_name(), subtypes[i]->get_name());
494
1
                break;
495
1
            }
496
4
        }
497
7
        if (tuple_paths.size() == tuple_types.size()) {
498
1
            continue;
499
1
        }
500
6
        DataTypePtr common_type;
501
6
        get_least_supertype_jsonb(subtypes, &common_type);
502
6
        if (!common_type->is_nullable()) {
503
3
            common_type = make_nullable(common_type);
504
3
        }
505
6
        tuple_types.emplace_back(common_type);
506
6
    }
507
8
    CHECK_EQ(tuple_paths.size(), tuple_types.size());
508
509
    // Append all common type columns of this variant
510
15
    for (int i = 0; i < tuple_paths.size(); ++i) {
511
7
        TabletColumn common_column;
512
        // typed path not contains root part
513
7
        auto path_without_root = tuple_paths[i].copy_pop_front().get_path();
514
7
        if (typed_columns.contains(path_without_root) && !tuple_paths[i].has_nested_part()) {
515
0
            common_column = *typed_columns.at(path_without_root);
516
            // parent unique id and path may not be init in write path
517
0
            common_column.set_parent_unique_id(variant_col_unique_id);
518
0
            common_column.set_path_info(tuple_paths[i]);
519
0
            common_column.set_name(tuple_paths[i].get_path());
520
7
        } else {
521
            // const std::string& column_name = variant_col_name + "." + tuple_paths[i].get_path();
522
7
            get_column_by_type(tuple_types[i], tuple_paths[i].get_path(), common_column,
523
7
                               ExtraInfo {.unique_id = -1,
524
7
                                          .parent_unique_id = variant_col_unique_id,
525
7
                                          .path_info = tuple_paths[i]});
526
7
        }
527
7
        common_schema->append_column(common_column);
528
7
        if (path_set != nullptr) {
529
4
            path_set->insert(tuple_paths[i]);
530
4
        }
531
7
    }
532
8
    return Status::OK();
533
8
}
534
535
Status update_least_common_schema(const std::vector<TabletSchemaSPtr>& schemas,
536
                                  TabletSchemaSPtr& common_schema, int32_t variant_col_unique_id,
537
7
                                  std::set<PathInData>* path_set) {
538
7
    std::map<std::string, TabletColumnPtr> typed_columns;
539
7
    for (const TabletColumnPtr& col :
540
7
         common_schema->column_by_uid(variant_col_unique_id).get_sub_columns()) {
541
2
        typed_columns[col->name()] = col;
542
2
    }
543
    // Types of subcolumns by path from all tuples.
544
7
    std::map<PathInData, DataTypes> subcolumns_types;
545
546
    // Collect all paths first to enable batch checking
547
7
    std::vector<PathInData> all_paths;
548
549
12
    for (const TabletSchemaSPtr& schema : schemas) {
550
16
        for (const TabletColumnPtr& col : schema->columns()) {
551
            // Get subcolumns of this variant
552
16
            if (col->has_path_info() && col->parent_unique_id() > 0 &&
553
16
                col->parent_unique_id() == variant_col_unique_id) {
554
6
                subcolumns_types[*col->path_info_ptr()].emplace_back(
555
6
                        DataTypeFactory::instance().create_data_type(*col, col->is_nullable()));
556
6
                all_paths.push_back(*col->path_info_ptr());
557
6
            }
558
16
        }
559
12
    }
560
561
    // Batch check for conflicts
562
7
    RETURN_IF_ERROR(check_variant_has_no_ambiguous_paths(all_paths));
563
564
7
    return update_least_schema_internal(subcolumns_types, common_schema, variant_col_unique_id,
565
7
                                        typed_columns, path_set);
566
7
}
567
568
// Keep variant subcolumn BF support aligned with FE DDL checks.
569
1.04k
bool is_bf_supported_by_fe_for_variant_subcolumn(FieldType type) {
570
1.04k
    switch (type) {
571
0
    case FieldType::OLAP_FIELD_TYPE_SMALLINT:
572
6
    case FieldType::OLAP_FIELD_TYPE_INT:
573
649
    case FieldType::OLAP_FIELD_TYPE_BIGINT:
574
649
    case FieldType::OLAP_FIELD_TYPE_LARGEINT:
575
649
    case FieldType::OLAP_FIELD_TYPE_CHAR:
576
649
    case FieldType::OLAP_FIELD_TYPE_VARCHAR:
577
1.02k
    case FieldType::OLAP_FIELD_TYPE_STRING:
578
1.02k
    case FieldType::OLAP_FIELD_TYPE_DATE:
579
1.02k
    case FieldType::OLAP_FIELD_TYPE_DATETIME:
580
1.03k
    case FieldType::OLAP_FIELD_TYPE_DATEV2:
581
1.03k
    case FieldType::OLAP_FIELD_TYPE_DATETIMEV2:
582
1.03k
    case FieldType::OLAP_FIELD_TYPE_TIMESTAMPTZ:
583
1.03k
    case FieldType::OLAP_FIELD_TYPE_DECIMAL:
584
1.03k
    case FieldType::OLAP_FIELD_TYPE_DECIMAL32:
585
1.03k
    case FieldType::OLAP_FIELD_TYPE_DECIMAL64:
586
1.03k
    case FieldType::OLAP_FIELD_TYPE_DECIMAL128I:
587
1.03k
    case FieldType::OLAP_FIELD_TYPE_DECIMAL256:
588
1.03k
    case FieldType::OLAP_FIELD_TYPE_IPV4:
589
1.03k
    case FieldType::OLAP_FIELD_TYPE_IPV6:
590
1.03k
        return true;
591
13
    default:
592
13
        return false;
593
1.04k
    }
594
1.04k
}
595
596
void inherit_column_attributes(const TabletColumn& source, TabletColumn& target,
597
1.04k
                               TabletSchemaSPtr* target_schema) {
598
1.04k
    if (!target.is_extracted_column()) {
599
0
        return;
600
0
    }
601
1.04k
    target.set_aggregation_method(source.aggregation());
602
603
    // 1. bloom filter
604
1.04k
    if (is_bf_supported_by_fe_for_variant_subcolumn(target.type())) {
605
1.03k
        target.set_is_bf_column(source.is_bf_column());
606
1.03k
    }
607
608
1.04k
    if (!target_schema) {
609
1.03k
        return;
610
1.03k
    }
611
612
    // 2. inverted index
613
8
    TabletIndexes indexes_to_add;
614
8
    auto source_indexes = (*target_schema)->inverted_indexs(source.unique_id());
615
    // if target is variant type, we need to inherit all indexes
616
    // because this schema is a read schema from fe
617
8
    if (target.is_variant_type()) {
618
0
        for (auto& index : source_indexes) {
619
0
            auto index_info = std::make_shared<TabletIndex>(*index);
620
0
            index_info->set_escaped_escaped_index_suffix_path(target.path_info_ptr()->get_path());
621
0
            indexes_to_add.emplace_back(std::move(index_info));
622
0
        }
623
8
    } else {
624
8
        inherit_index(source_indexes, indexes_to_add, target);
625
8
    }
626
8
    auto target_indexes = (*target_schema)
627
8
                                  ->inverted_indexs(target.parent_unique_id(),
628
8
                                                    target.path_info_ptr()->get_path());
629
8
    if (target_indexes.empty()) {
630
8
        for (auto& index_info : indexes_to_add) {
631
8
            (*target_schema)->append_index(std::move(*index_info));
632
8
        }
633
8
    }
634
635
    // 3. TODO: gnragm bf index
636
8
}
637
638
6
void inherit_column_attributes(TabletSchemaSPtr& schema) {
639
    // Add index meta if extracted column is missing index meta
640
23
    for (size_t i = 0; i < schema->num_columns(); ++i) {
641
17
        TabletColumn& col = schema->mutable_column(i);
642
17
        if (!col.is_extracted_column()) {
643
9
            continue;
644
9
        }
645
8
        if (schema->field_index(col.parent_unique_id()) == -1) {
646
            // parent column is missing, maybe dropped
647
0
            continue;
648
0
        }
649
8
        inherit_column_attributes(schema->column_by_uid(col.parent_unique_id()), col, &schema);
650
8
    }
651
6
}
652
653
Status get_least_common_schema(const std::vector<TabletSchemaSPtr>& schemas,
654
                               const TabletSchemaSPtr& base_schema, TabletSchemaSPtr& output_schema,
655
4
                               bool check_schema_size) {
656
4
    std::vector<int32_t> variant_column_unique_id;
657
    // Construct a schema excluding the extracted columns and gather unique identifiers for variants.
658
    // Ensure that the output schema also excludes these extracted columns. This approach prevents
659
    // duplicated paths following the update_least_common_schema process.
660
4
    auto build_schema_without_extracted_columns = [&](const TabletSchemaSPtr& base_schema) {
661
4
        output_schema = std::make_shared<TabletSchema>();
662
        // not copy columns but only shadow copy other attributes
663
4
        output_schema->shawdow_copy_without_columns(*base_schema);
664
        // Get all columns without extracted columns and collect variant col unique id
665
7
        for (const TabletColumnPtr& col : base_schema->columns()) {
666
7
            if (col->is_variant_type()) {
667
4
                variant_column_unique_id.push_back(col->unique_id());
668
4
            }
669
7
            if (!col->is_extracted_column()) {
670
4
                output_schema->append_column(*col);
671
4
            }
672
7
        }
673
4
    };
674
4
    if (base_schema == nullptr) {
675
        // Pick tablet schema with max schema version
676
4
        auto max_version_schema =
677
4
                *std::max_element(schemas.cbegin(), schemas.cend(),
678
4
                                  [](const TabletSchemaSPtr a, const TabletSchemaSPtr b) {
679
2
                                      return a->schema_version() < b->schema_version();
680
2
                                  });
681
4
        CHECK(max_version_schema);
682
4
        build_schema_without_extracted_columns(max_version_schema);
683
4
    } else {
684
        // use input base_schema schema as base schema
685
0
        build_schema_without_extracted_columns(base_schema);
686
0
    }
687
688
4
    for (int32_t unique_id : variant_column_unique_id) {
689
4
        std::set<PathInData> path_set;
690
4
        RETURN_IF_ERROR(update_least_common_schema(schemas, output_schema, unique_id, &path_set));
691
4
    }
692
693
4
    inherit_column_attributes(output_schema);
694
4
    if (check_schema_size &&
695
4
        output_schema->columns().size() > config::variant_max_merged_tablet_schema_size) {
696
0
        return Status::DataQualityError("Reached max column size limit {}",
697
0
                                        config::variant_max_merged_tablet_schema_size);
698
0
    }
699
700
4
    return Status::OK();
701
4
}
702
703
// sort by paths in lexicographical order
704
626
ColumnVariant::Subcolumns get_sorted_subcolumns(const ColumnVariant::Subcolumns& subcolumns) {
705
    // sort by paths in lexicographical order
706
626
    ColumnVariant::Subcolumns sorted = subcolumns;
707
4.09k
    std::sort(sorted.begin(), sorted.end(), [](const auto& lhsItem, const auto& rhsItem) {
708
4.09k
        return lhsItem->path < rhsItem->path;
709
4.09k
    });
710
626
    return sorted;
711
626
}
712
713
bool has_schema_index_diff(const TabletSchema* new_schema, const TabletSchema* old_schema,
714
4
                           int32_t new_col_idx, int32_t old_col_idx) {
715
4
    const auto& column_new = new_schema->column(new_col_idx);
716
4
    const auto& column_old = old_schema->column(old_col_idx);
717
718
4
    if (column_new.is_bf_column() != column_old.is_bf_column()) {
719
2
        return true;
720
2
    }
721
722
2
    auto new_schema_inverted_indexs = new_schema->inverted_indexs(column_new);
723
2
    auto old_schema_inverted_indexs = old_schema->inverted_indexs(column_old);
724
725
2
    if (new_schema_inverted_indexs.size() != old_schema_inverted_indexs.size()) {
726
1
        return true;
727
1
    }
728
729
2
    for (size_t i = 0; i < new_schema_inverted_indexs.size(); ++i) {
730
1
        if (!new_schema_inverted_indexs[i]->is_same_except_id(old_schema_inverted_indexs[i])) {
731
0
            return true;
732
0
        }
733
1
    }
734
735
1
    return false;
736
1
}
737
738
622
TabletColumn create_sparse_column(const TabletColumn& variant) {
739
622
    TabletColumn res;
740
622
    res.set_name(variant.name_lower_case() + "." + SPARSE_COLUMN_PATH);
741
622
    res.set_type(FieldType::OLAP_FIELD_TYPE_MAP);
742
622
    res.set_aggregation_method(variant.aggregation());
743
622
    res.set_path_info(PathInData {variant.name_lower_case() + "." + SPARSE_COLUMN_PATH});
744
622
    res.set_parent_unique_id(variant.unique_id());
745
    // set default value to "NULL" DefaultColumnIterator will call insert_many_defaults
746
622
    res.set_default_value("NULL");
747
622
    TabletColumn child_tcolumn;
748
622
    child_tcolumn.set_type(FieldType::OLAP_FIELD_TYPE_STRING);
749
622
    res.add_sub_column(child_tcolumn);
750
622
    res.add_sub_column(child_tcolumn);
751
622
    return res;
752
622
}
753
754
31
TabletColumn create_sparse_shard_column(const TabletColumn& variant, int bucket_index) {
755
31
    TabletColumn res;
756
31
    std::string name = variant.name_lower_case() + "." + SPARSE_COLUMN_PATH + ".b" +
757
31
                       std::to_string(bucket_index);
758
31
    res.set_name(name);
759
31
    res.set_type(FieldType::OLAP_FIELD_TYPE_MAP);
760
31
    res.set_aggregation_method(variant.aggregation());
761
31
    res.set_parent_unique_id(variant.unique_id());
762
31
    res.set_default_value("NULL");
763
31
    PathInData path(name);
764
31
    res.set_path_info(path);
765
31
    TabletColumn child_tcolumn;
766
31
    child_tcolumn.set_type(FieldType::OLAP_FIELD_TYPE_STRING);
767
31
    res.add_sub_column(child_tcolumn);
768
31
    res.add_sub_column(child_tcolumn);
769
31
    return res;
770
31
}
771
772
28
TabletColumn create_doc_value_column(const TabletColumn& variant, int bucket_index) {
773
28
    TabletColumn res;
774
28
    std::string name = variant.name_lower_case() + "." + DOC_VALUE_COLUMN_PATH + ".b" +
775
28
                       std::to_string(bucket_index);
776
28
    res.set_name(name);
777
28
    res.set_type(FieldType::OLAP_FIELD_TYPE_MAP);
778
28
    res.set_aggregation_method(variant.aggregation());
779
28
    res.set_parent_unique_id(variant.unique_id());
780
28
    res.set_default_value("NULL");
781
28
    res.set_path_info(PathInData {name});
782
783
28
    TabletColumn child_tcolumn;
784
28
    child_tcolumn.set_type(FieldType::OLAP_FIELD_TYPE_STRING);
785
28
    res.add_sub_column(child_tcolumn);
786
28
    res.add_sub_column(child_tcolumn);
787
28
    return res;
788
28
}
789
790
5.34k
uint32_t variant_binary_shard_of(const StringRef& path, uint32_t bucket_num) {
791
5.34k
    if (bucket_num <= 1) return 0;
792
5.34k
    SipHash hash;
793
5.34k
    hash.update(path.data, path.size);
794
5.34k
    uint64_t h = hash.get64();
795
5.34k
    return static_cast<uint32_t>(h % bucket_num);
796
5.34k
}
797
798
Status VariantCompactionUtil::aggregate_path_to_stats(
799
        const RowsetSharedPtr& rs,
800
19
        std::unordered_map<int32_t, PathToNoneNullValues>* uid_to_path_stats) {
801
19
    SegmentCacheHandle segment_cache;
802
19
    RETURN_IF_ERROR(SegmentLoader::instance()->load_segments(
803
19
            std::static_pointer_cast<BetaRowset>(rs), &segment_cache));
804
805
95
    for (const auto& column : rs->tablet_schema()->columns()) {
806
95
        if (!column->is_variant_type() || column->unique_id() < 0) {
807
57
            continue;
808
57
        }
809
810
194
        for (const auto& segment : segment_cache.get_segments()) {
811
194
            std::shared_ptr<ColumnReader> column_reader;
812
194
            OlapReaderStatistics stats;
813
194
            RETURN_IF_ERROR(
814
194
                    segment->get_column_reader(column->unique_id(), &column_reader, &stats));
815
194
            if (!column_reader) {
816
0
                continue;
817
0
            }
818
819
194
            CHECK(column_reader->get_meta_type() == FieldType::OLAP_FIELD_TYPE_VARIANT);
820
194
            auto* variant_column_reader =
821
194
                    assert_cast<segment_v2::VariantColumnReader*>(column_reader.get());
822
            // load external meta before getting stats
823
194
            RETURN_IF_ERROR(variant_column_reader->load_external_meta_once());
824
194
            const auto* source_stats = variant_column_reader->get_stats();
825
194
            CHECK(source_stats);
826
827
            // agg path -> stats
828
1.35k
            for (const auto& [path, size] : source_stats->sparse_column_non_null_size) {
829
1.35k
                (*uid_to_path_stats)[column->unique_id()][path] += size;
830
1.35k
            }
831
832
572
            for (const auto& [path, size] : source_stats->subcolumns_non_null_size) {
833
572
                (*uid_to_path_stats)[column->unique_id()][path] += size;
834
572
            }
835
194
        }
836
38
    }
837
19
    return Status::OK();
838
19
}
839
840
Status VariantCompactionUtil::aggregate_variant_extended_info(
841
        const RowsetSharedPtr& rs,
842
363
        std::unordered_map<int32_t, VariantExtendedInfo>* uid_to_variant_extended_info) {
843
363
    SegmentCacheHandle segment_cache;
844
363
    RETURN_IF_ERROR(SegmentLoader::instance()->load_segments(
845
363
            std::static_pointer_cast<BetaRowset>(rs), &segment_cache));
846
847
7.46k
    for (const auto& column : rs->tablet_schema()->columns()) {
848
7.46k
        if (!column->is_variant_type()) {
849
7.44k
            continue;
850
7.44k
        }
851
60
        for (const auto& segment : segment_cache.get_segments()) {
852
60
            std::shared_ptr<ColumnReader> column_reader;
853
60
            OlapReaderStatistics stats;
854
60
            RETURN_IF_ERROR(
855
60
                    segment->get_column_reader(column->unique_id(), &column_reader, &stats));
856
60
            if (!column_reader) {
857
0
                continue;
858
0
            }
859
860
60
            CHECK(column_reader->get_meta_type() == FieldType::OLAP_FIELD_TYPE_VARIANT);
861
60
            auto* variant_column_reader =
862
60
                    assert_cast<segment_v2::VariantColumnReader*>(column_reader.get());
863
            // load external meta before getting stats
864
60
            RETURN_IF_ERROR(variant_column_reader->load_external_meta_once());
865
60
            const auto* source_stats = variant_column_reader->get_stats();
866
60
            CHECK(source_stats);
867
868
            // 1. agg path -> stats
869
420
            for (const auto& [path, size] : source_stats->sparse_column_non_null_size) {
870
420
                (*uid_to_variant_extended_info)[column->unique_id()]
871
420
                        .path_to_none_null_values[path] += size;
872
420
                (*uid_to_variant_extended_info)[column->unique_id()].sparse_paths.emplace(path);
873
420
            }
874
875
170
            for (const auto& [path, size] : source_stats->subcolumns_non_null_size) {
876
170
                (*uid_to_variant_extended_info)[column->unique_id()]
877
170
                        .path_to_none_null_values[path] += size;
878
170
            }
879
880
            //2. agg path -> schema
881
60
            auto& paths_types =
882
60
                    (*uid_to_variant_extended_info)[column->unique_id()].path_to_data_types;
883
60
            variant_column_reader->get_subcolumns_types(&paths_types);
884
885
            // 3. extract typed paths
886
60
            auto& typed_paths = (*uid_to_variant_extended_info)[column->unique_id()].typed_paths;
887
60
            variant_column_reader->get_typed_paths(&typed_paths);
888
889
            // 4. extract nested paths
890
60
            auto& nested_paths = (*uid_to_variant_extended_info)[column->unique_id()].nested_paths;
891
60
            variant_column_reader->get_nested_paths(&nested_paths);
892
893
            // 5. check if has nested group from stats
894
60
            if (source_stats->has_nested_group) {
895
0
                (*uid_to_variant_extended_info)[column->unique_id()].has_nested_group = true;
896
0
            }
897
60
        }
898
14
    }
899
362
    return Status::OK();
900
362
}
901
902
// get the subpaths and sparse paths for the variant column
903
void VariantCompactionUtil::get_subpaths(int32_t max_subcolumns_count,
904
                                         const PathToNoneNullValues& stats,
905
11
                                         TabletSchema::PathsSetInfo& paths_set_info) {
906
    // max_subcolumns_count is 0 means no limit
907
11
    if (max_subcolumns_count > 0 && stats.size() > max_subcolumns_count) {
908
6
        std::vector<std::pair<size_t, std::string_view>> paths_with_sizes;
909
6
        paths_with_sizes.reserve(stats.size());
910
48
        for (const auto& [path, size] : stats) {
911
48
            paths_with_sizes.emplace_back(size, path);
912
48
        }
913
6
        std::sort(paths_with_sizes.begin(), paths_with_sizes.end(), std::greater());
914
915
        // Select top N paths as subcolumns, remaining paths as sparse columns
916
48
        for (const auto& [size, path] : paths_with_sizes) {
917
48
            if (paths_set_info.sub_path_set.size() < max_subcolumns_count) {
918
18
                paths_set_info.sub_path_set.emplace(path);
919
30
            } else {
920
30
                paths_set_info.sparse_path_set.emplace(path);
921
30
            }
922
48
        }
923
6
        LOG(INFO) << "subpaths " << paths_set_info.sub_path_set.size() << " sparse paths "
924
6
                  << paths_set_info.sparse_path_set.size() << " variant max subcolumns count "
925
6
                  << max_subcolumns_count << " stats size " << paths_with_sizes.size();
926
6
    } else {
927
        // Apply all paths as subcolumns
928
11
        for (const auto& [path, _] : stats) {
929
11
            paths_set_info.sub_path_set.emplace(path);
930
11
        }
931
5
    }
932
11
}
933
934
Status VariantCompactionUtil::check_path_stats(const std::vector<RowsetSharedPtr>& intputs,
935
49
                                               RowsetSharedPtr output, BaseTabletSPtr tablet) {
936
49
    if (output->tablet_schema()->num_variant_columns() == 0) {
937
49
        return Status::OK();
938
49
    }
939
    // check no extended schema in input rowsets
940
0
    for (const auto& rowset : intputs) {
941
0
        for (const auto& column : rowset->tablet_schema()->columns()) {
942
0
            if (column->is_extracted_column()) {
943
0
                return Status::OK();
944
0
            }
945
0
        }
946
0
    }
947
0
#ifndef BE_TEST
948
    // check no extended schema in output rowset
949
0
    for (const auto& column : output->tablet_schema()->columns()) {
950
0
        if (column->is_extracted_column()) {
951
0
            const auto& name = column->name();
952
0
            if (name.find("." + DOC_VALUE_COLUMN_PATH + ".") != std::string::npos ||
953
0
                name.find("." + SPARSE_COLUMN_PATH + ".") != std::string::npos ||
954
0
                name.ends_with("." + SPARSE_COLUMN_PATH)) {
955
0
                continue;
956
0
            }
957
0
            return Status::InternalError("Unexpected extracted column {} in output rowset",
958
0
                                         column->name());
959
0
        }
960
0
    }
961
0
#endif
962
    // only check path stats for dup_keys since the rows may be merged in other models
963
0
    if (tablet->keys_type() != KeysType::DUP_KEYS) {
964
0
        return Status::OK();
965
0
    }
966
    // if there is a delete predicate in the input rowsets, we skip the path stats check
967
0
    for (auto& rowset : intputs) {
968
0
        if (rowset->rowset_meta()->has_delete_predicate()) {
969
0
            return Status::OK();
970
0
        }
971
0
    }
972
0
    std::unordered_map<int32_t, PathToNoneNullValues> original_uid_to_path_stats;
973
0
    for (const auto& rs : intputs) {
974
0
        RETURN_IF_ERROR(aggregate_path_to_stats(rs, &original_uid_to_path_stats));
975
0
    }
976
0
    std::unordered_map<int32_t, PathToNoneNullValues> output_uid_to_path_stats;
977
0
    RETURN_IF_ERROR(aggregate_path_to_stats(output, &output_uid_to_path_stats));
978
0
    for (const auto& [uid, stats] : output_uid_to_path_stats) {
979
0
        if (output->tablet_schema()->column_by_uid(uid).is_variant_type() &&
980
0
            output->tablet_schema()->column_by_uid(uid).variant_enable_doc_mode()) {
981
0
            continue;
982
0
        }
983
0
        if (original_uid_to_path_stats.find(uid) == original_uid_to_path_stats.end()) {
984
0
            return Status::InternalError("Path stats not found for uid {}, tablet_id {}", uid,
985
0
                                         tablet->tablet_id());
986
0
        }
987
988
        // In input rowsets, some rowsets may have statistics values exceeding the maximum limit,
989
        // which leads to inaccurate statistics
990
0
        if (stats.size() > output->tablet_schema()
991
0
                                   ->column_by_uid(uid)
992
0
                                   .variant_max_sparse_column_statistics_size()) {
993
            // When there is only one segment, we can ensure that the size of each path in output stats is accurate
994
0
            if (output->num_segments() == 1) {
995
0
                for (const auto& [path, size] : stats) {
996
0
                    if (original_uid_to_path_stats.at(uid).find(path) ==
997
0
                        original_uid_to_path_stats.at(uid).end()) {
998
0
                        continue;
999
0
                    }
1000
0
                    if (original_uid_to_path_stats.at(uid).at(path) > size) {
1001
0
                        return Status::InternalError(
1002
0
                                "Path stats not smaller for uid {} with path `{}`, input size {}, "
1003
0
                                "output "
1004
0
                                "size {}, "
1005
0
                                "tablet_id {}",
1006
0
                                uid, path, original_uid_to_path_stats.at(uid).at(path), size,
1007
0
                                tablet->tablet_id());
1008
0
                    }
1009
0
                }
1010
0
            }
1011
0
        }
1012
        // in this case, input stats is accurate, so we check the stats size and stats value
1013
0
        else {
1014
0
            for (const auto& [path, size] : stats) {
1015
0
                if (original_uid_to_path_stats.at(uid).find(path) ==
1016
0
                    original_uid_to_path_stats.at(uid).end()) {
1017
0
                    return Status::InternalError(
1018
0
                            "Path stats not found for uid {}, path {}, tablet_id {}", uid, path,
1019
0
                            tablet->tablet_id());
1020
0
                }
1021
0
                if (original_uid_to_path_stats.at(uid).at(path) != size) {
1022
0
                    return Status::InternalError(
1023
0
                            "Path stats not match for uid {} with path `{}`, input size {}, output "
1024
0
                            "size {}, "
1025
0
                            "tablet_id {}",
1026
0
                            uid, path, original_uid_to_path_stats.at(uid).at(path), size,
1027
0
                            tablet->tablet_id());
1028
0
                }
1029
0
            }
1030
0
        }
1031
0
    }
1032
1033
0
    return Status::OK();
1034
0
}
1035
1036
Status VariantCompactionUtil::get_compaction_typed_columns(
1037
        const TabletSchemaSPtr& target, const std::unordered_set<std::string>& typed_paths,
1038
        const TabletColumnPtr parent_column, TabletSchemaSPtr& output_schema,
1039
7
        TabletSchema::PathsSetInfo& paths_set_info) {
1040
7
    if (parent_column->variant_enable_typed_paths_to_sparse()) {
1041
0
        return Status::OK();
1042
0
    }
1043
7
    for (const auto& path : typed_paths) {
1044
4
        TabletSchema::SubColumnInfo sub_column_info;
1045
4
        if (generate_sub_column_info(*target, parent_column->unique_id(), path, &sub_column_info)) {
1046
3
            inherit_column_attributes(*parent_column, sub_column_info.column);
1047
3
            output_schema->append_column(sub_column_info.column);
1048
3
            paths_set_info.typed_path_set.insert({path, std::move(sub_column_info)});
1049
3
            VLOG_DEBUG << "append typed column " << path;
1050
3
        } else {
1051
1
            return Status::InternalError("Failed to generate sub column info for path {}", path);
1052
1
        }
1053
4
    }
1054
6
    return Status::OK();
1055
7
}
1056
1057
Status VariantCompactionUtil::get_compaction_nested_columns(
1058
        const std::unordered_set<PathInData, PathInData::Hash>& nested_paths,
1059
        const PathToDataTypes& path_to_data_types, const TabletColumnPtr parent_column,
1060
7
        TabletSchemaSPtr& output_schema, TabletSchema::PathsSetInfo& paths_set_info) {
1061
7
    const auto& parent_indexes = output_schema->inverted_indexs(parent_column->unique_id());
1062
7
    for (const auto& path : nested_paths) {
1063
3
        const auto& find_data_types = path_to_data_types.find(path);
1064
3
        if (find_data_types == path_to_data_types.end() || find_data_types->second.empty()) {
1065
1
            return Status::InternalError("Nested path {} has no data type", path.get_path());
1066
1
        }
1067
2
        DataTypePtr data_type;
1068
2
        get_least_supertype_jsonb(find_data_types->second, &data_type);
1069
1070
2
        const std::string& column_name = parent_column->name_lower_case() + "." + path.get_path();
1071
2
        PathInDataBuilder full_path_builder;
1072
2
        auto full_path = full_path_builder.append(parent_column->name_lower_case(), false)
1073
2
                                 .append(path.get_parts(), false)
1074
2
                                 .build();
1075
2
        TabletColumn nested_column =
1076
2
                get_column_by_type(data_type, column_name,
1077
2
                                   ExtraInfo {.unique_id = -1,
1078
2
                                              .parent_unique_id = parent_column->unique_id(),
1079
2
                                              .path_info = full_path});
1080
2
        inherit_column_attributes(*parent_column, nested_column);
1081
2
        TabletIndexes sub_column_indexes;
1082
2
        inherit_index(parent_indexes, sub_column_indexes, nested_column);
1083
2
        paths_set_info.subcolumn_indexes.emplace(path.get_path(), std::move(sub_column_indexes));
1084
2
        output_schema->append_column(nested_column);
1085
2
        VLOG_DEBUG << "append nested column " << path.get_path();
1086
2
    }
1087
6
    return Status::OK();
1088
7
}
1089
1090
void VariantCompactionUtil::get_compaction_subcolumns_from_subpaths(
1091
        TabletSchema::PathsSetInfo& paths_set_info, const TabletColumnPtr parent_column,
1092
        const TabletSchemaSPtr& target, const PathToDataTypes& path_to_data_types,
1093
12
        const std::unordered_set<std::string>& sparse_paths, TabletSchemaSPtr& output_schema) {
1094
12
    auto& path_set = paths_set_info.sub_path_set;
1095
12
    std::vector<StringRef> sorted_subpaths(path_set.begin(), path_set.end());
1096
12
    std::sort(sorted_subpaths.begin(), sorted_subpaths.end());
1097
12
    const auto& parent_indexes = target->inverted_indexs(parent_column->unique_id());
1098
    // append subcolumns
1099
36
    for (const auto& subpath : sorted_subpaths) {
1100
36
        auto column_name = parent_column->name_lower_case() + "." + subpath.to_string();
1101
36
        auto column_path = PathInData(column_name);
1102
1103
36
        const auto& find_data_types = path_to_data_types.find(PathInData(subpath));
1104
1105
        // some cases: the subcolumn type is variant
1106
        // 1. this path has no data type in segments
1107
        // 2. this path is in sparse paths
1108
        // 3. the sparse paths are too much
1109
36
        TabletSchema::SubColumnInfo sub_column_info;
1110
36
        if (parent_column->variant_enable_typed_paths_to_sparse() &&
1111
36
            generate_sub_column_info(*target, parent_column->unique_id(), std::string(subpath),
1112
16
                                     &sub_column_info)) {
1113
8
            inherit_column_attributes(*parent_column, sub_column_info.column);
1114
8
            output_schema->append_column(sub_column_info.column);
1115
8
            paths_set_info.subcolumn_indexes.emplace(subpath, std::move(sub_column_info.indexes));
1116
8
            VLOG_DEBUG << "append typed column " << subpath;
1117
28
        } else if (find_data_types == path_to_data_types.end() || find_data_types->second.empty() ||
1118
28
                   sparse_paths.find(std::string(subpath)) != sparse_paths.end() ||
1119
28
                   sparse_paths.size() >=
1120
20
                           parent_column->variant_max_sparse_column_statistics_size()) {
1121
12
            TabletColumn subcolumn;
1122
12
            subcolumn.set_name(column_name);
1123
12
            subcolumn.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
1124
12
            subcolumn.set_parent_unique_id(parent_column->unique_id());
1125
12
            subcolumn.set_path_info(column_path);
1126
12
            subcolumn.set_aggregation_method(parent_column->aggregation());
1127
12
            subcolumn.set_variant_max_subcolumns_count(
1128
12
                    parent_column->variant_max_subcolumns_count());
1129
12
            subcolumn.set_is_nullable(true);
1130
12
            output_schema->append_column(subcolumn);
1131
12
            VLOG_DEBUG << "append sub column " << subpath << " data type "
1132
0
                       << "VARIANT";
1133
12
        }
1134
        // normal case: the subcolumn type can be calculated from the data types in segments
1135
16
        else {
1136
16
            DataTypePtr data_type;
1137
16
            get_least_supertype_jsonb(find_data_types->second, &data_type);
1138
16
            TabletColumn sub_column =
1139
16
                    get_column_by_type(data_type, column_name,
1140
16
                                       ExtraInfo {.unique_id = -1,
1141
16
                                                  .parent_unique_id = parent_column->unique_id(),
1142
16
                                                  .path_info = column_path});
1143
16
            inherit_column_attributes(*parent_column, sub_column);
1144
16
            TabletIndexes sub_column_indexes;
1145
16
            inherit_index(parent_indexes, sub_column_indexes, sub_column);
1146
16
            paths_set_info.subcolumn_indexes.emplace(subpath, std::move(sub_column_indexes));
1147
16
            output_schema->append_column(sub_column);
1148
16
            VLOG_DEBUG << "append sub column " << subpath << " data type " << data_type->get_name();
1149
16
        }
1150
36
    }
1151
12
}
1152
1153
void VariantCompactionUtil::get_compaction_subcolumns_from_data_types(
1154
        TabletSchema::PathsSetInfo& paths_set_info, const TabletColumnPtr parent_column,
1155
        const TabletSchemaSPtr& target, const PathToDataTypes& path_to_data_types,
1156
2
        TabletSchemaSPtr& output_schema) {
1157
2
    const auto& parent_indexes = target->inverted_indexs(parent_column->unique_id());
1158
2
    for (const auto& [path, data_types] : path_to_data_types) {
1159
2
        if (data_types.empty() || path.empty() || path.has_nested_part()) {
1160
0
            continue;
1161
0
        }
1162
2
        DataTypePtr data_type;
1163
2
        get_least_supertype_jsonb(data_types, &data_type);
1164
2
        auto column_name = parent_column->name_lower_case() + "." + path.get_path();
1165
2
        auto column_path = PathInData(column_name);
1166
2
        TabletColumn sub_column =
1167
2
                get_column_by_type(data_type, column_name,
1168
2
                                   ExtraInfo {.unique_id = -1,
1169
2
                                              .parent_unique_id = parent_column->unique_id(),
1170
2
                                              .path_info = column_path});
1171
2
        inherit_column_attributes(*parent_column, sub_column);
1172
2
        TabletIndexes sub_column_indexes;
1173
2
        inherit_index(parent_indexes, sub_column_indexes, sub_column);
1174
2
        paths_set_info.subcolumn_indexes.emplace(path.get_path(), std::move(sub_column_indexes));
1175
2
        output_schema->append_column(sub_column);
1176
2
        VLOG_DEBUG << "append sub column " << path.get_path() << " data type "
1177
0
                   << data_type->get_name();
1178
2
    }
1179
2
}
1180
1181
// Build the temporary schema for compaction
1182
// 1. aggregate path stats and data types from all rowsets
1183
// 2. append typed columns and nested columns to the output schema
1184
// 3. sort the subpaths and sparse paths for each unique id
1185
// 4. append the subpaths and sparse paths to the output schema
1186
// 5. set the path set info for each unique id
1187
// 6. return the output schema
1188
Status VariantCompactionUtil::get_extended_compaction_schema(
1189
86
        const std::vector<RowsetSharedPtr>& rowsets, TabletSchemaSPtr& target) {
1190
86
    std::unordered_map<int32_t, VariantExtendedInfo> uid_to_variant_extended_info;
1191
    // collect path stats from all rowsets and segments
1192
363
    for (const auto& rs : rowsets) {
1193
363
        RETURN_IF_ERROR(aggregate_variant_extended_info(rs, &uid_to_variant_extended_info));
1194
363
    }
1195
1196
    // If any variant column has nested group, skip extended schema and use normal compaction.
1197
    // Nested groups require special handling that is not yet supported in extended schema compaction.
1198
85
    for (const auto& [uid, info] : uid_to_variant_extended_info) {
1199
4
        if (info.has_nested_group) {
1200
0
            LOG(INFO) << "Variant column uid=" << uid
1201
0
                      << " has nested group, skip extended schema compaction";
1202
0
            return Status::OK();
1203
0
        }
1204
4
    }
1205
1206
    // build the output schema
1207
85
    TabletSchemaSPtr output_schema = std::make_shared<TabletSchema>();
1208
85
    output_schema->shawdow_copy_without_columns(*target);
1209
85
    std::unordered_map<int32_t, TabletSchema::PathsSetInfo> uid_to_paths_set_info;
1210
1.70k
    for (const TabletColumnPtr& column : target->columns()) {
1211
1.70k
        if (!column->is_extracted_column()) {
1212
1.70k
            output_schema->append_column(*column);
1213
1.70k
        }
1214
1.70k
        if (!column->is_variant_type()) {
1215
1.69k
            continue;
1216
1.69k
        }
1217
7
        VLOG_DEBUG << "column " << column->name() << " unique id " << column->unique_id();
1218
1219
7
        if (column->variant_enable_doc_mode()) {
1220
0
            const int bucket_num = std::max(1, column->variant_doc_hash_shard_count());
1221
0
            for (int b = 0; b < bucket_num; ++b) {
1222
0
                TabletColumn doc_value_bucket_column = create_doc_value_column(*column, b);
1223
0
                doc_value_bucket_column.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
1224
0
                doc_value_bucket_column.set_is_nullable(false);
1225
0
                output_schema->append_column(doc_value_bucket_column);
1226
0
            }
1227
0
            continue;
1228
0
        }
1229
1230
        // 1. append typed columns
1231
7
        RETURN_IF_ERROR(get_compaction_typed_columns(
1232
7
                target, uid_to_variant_extended_info[column->unique_id()].typed_paths, column,
1233
7
                output_schema, uid_to_paths_set_info[column->unique_id()]));
1234
        // 2. append nested columns
1235
7
        RETURN_IF_ERROR(get_compaction_nested_columns(
1236
7
                uid_to_variant_extended_info[column->unique_id()].nested_paths,
1237
7
                uid_to_variant_extended_info[column->unique_id()].path_to_data_types, column,
1238
7
                output_schema, uid_to_paths_set_info[column->unique_id()]));
1239
1240
        // 3. get the subpaths
1241
7
        get_subpaths(column->variant_max_subcolumns_count(),
1242
7
                     uid_to_variant_extended_info[column->unique_id()].path_to_none_null_values,
1243
7
                     uid_to_paths_set_info[column->unique_id()]);
1244
1245
        // 4. append subcolumns
1246
7
        if (column->variant_max_subcolumns_count() > 0 || !column->get_sub_columns().empty()) {
1247
4
            get_compaction_subcolumns_from_subpaths(
1248
4
                    uid_to_paths_set_info[column->unique_id()], column, target,
1249
4
                    uid_to_variant_extended_info[column->unique_id()].path_to_data_types,
1250
4
                    uid_to_variant_extended_info[column->unique_id()].sparse_paths, output_schema);
1251
4
        }
1252
        // variant_max_subcolumns_count == 0 and no typed paths materialized
1253
        // it means that all subcolumns are materialized, may be from old data
1254
3
        else {
1255
3
            get_compaction_subcolumns_from_data_types(
1256
3
                    uid_to_paths_set_info[column->unique_id()], column, target,
1257
3
                    uid_to_variant_extended_info[column->unique_id()].path_to_data_types,
1258
3
                    output_schema);
1259
3
        }
1260
1261
        // append sparse column(s)
1262
        // If variant uses bucketized sparse columns, append one sparse bucket column per bucket.
1263
        // Otherwise, append the single sparse column.
1264
7
        int bucket_num = std::max(1, column->variant_sparse_hash_shard_count());
1265
7
        if (bucket_num > 1) {
1266
0
            for (int b = 0; b < bucket_num; ++b) {
1267
0
                TabletColumn sparse_bucket_column = create_sparse_shard_column(*column, b);
1268
0
                output_schema->append_column(sparse_bucket_column);
1269
0
            }
1270
7
        } else {
1271
7
            TabletColumn sparse_column = create_sparse_column(*column);
1272
7
            output_schema->append_column(sparse_column);
1273
7
        }
1274
7
    }
1275
1276
85
    target = output_schema;
1277
    // used to merge & filter path to sparse column during reading in compaction
1278
85
    target->set_path_set_info(std::move(uid_to_paths_set_info));
1279
85
    VLOG_DEBUG << "dump schema " << target->dump_full_schema();
1280
85
    return Status::OK();
1281
85
}
1282
1283
// Calculate statistics about variant data paths from the encoded sparse column
1284
void VariantCompactionUtil::calculate_variant_stats(const IColumn& encoded_sparse_column,
1285
                                                    segment_v2::VariantStatisticsPB* stats,
1286
                                                    size_t max_sparse_column_statistics_size,
1287
2
                                                    size_t row_pos, size_t num_rows) {
1288
    // Cast input column to ColumnMap type since sparse column is stored as a map
1289
2
    const auto& map_column = assert_cast<const ColumnMap&>(encoded_sparse_column);
1290
1291
    // Get the keys column which contains the paths as strings
1292
2
    const auto& sparse_data_paths =
1293
2
            assert_cast<const ColumnString*>(map_column.get_keys_ptr().get());
1294
2
    const auto& serialized_sparse_column_offsets =
1295
2
            assert_cast<const ColumnArray::Offsets64&>(map_column.get_offsets());
1296
2
    auto& count_map = *stats->mutable_sparse_column_non_null_size();
1297
    // Iterate through all paths in the sparse column
1298
6
    for (size_t i = row_pos; i != row_pos + num_rows; ++i) {
1299
4
        size_t offset = serialized_sparse_column_offsets[i - 1];
1300
4
        size_t end = serialized_sparse_column_offsets[i];
1301
6
        for (size_t j = offset; j != end; ++j) {
1302
2
            auto path = sparse_data_paths->get_data_at(j);
1303
1304
2
            const auto& sparse_path = path.to_string();
1305
            // If path already exists in statistics, increment its count
1306
2
            if (auto it = count_map.find(sparse_path); it != count_map.end()) {
1307
0
                ++it->second;
1308
0
            }
1309
            // If path doesn't exist and we haven't hit the max statistics size limit,
1310
            // add it with count 1
1311
2
            else if (count_map.size() < max_sparse_column_statistics_size) {
1312
2
                count_map.emplace(sparse_path, 1);
1313
2
            }
1314
2
        }
1315
4
    }
1316
1317
2
    if (stats->sparse_column_non_null_size().size() > max_sparse_column_statistics_size) {
1318
0
        throw doris::Exception(
1319
0
                ErrorCode::INTERNAL_ERROR,
1320
0
                "Sparse column non null size: {} is greater than max statistics size: {}",
1321
0
                stats->sparse_column_non_null_size().size(), max_sparse_column_statistics_size);
1322
0
    }
1323
2
}
1324
1325
/// Calculates number of dimensions in array field.
1326
/// Returns 0 for scalar fields.
1327
class FieldVisitorToNumberOfDimensions : public StaticVisitor<size_t> {
1328
public:
1329
    FieldVisitorToNumberOfDimensions() = default;
1330
    template <PrimitiveType T>
1331
2.24M
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1332
2.24M
        if constexpr (T == TYPE_ARRAY) {
1333
127k
            const size_t size = x.size();
1334
127k
            size_t dimensions = 0;
1335
873k
            for (size_t i = 0; i < size; ++i) {
1336
745k
                size_t element_dimensions = apply_visitor(*this, x[i]);
1337
745k
                dimensions = std::max(dimensions, element_dimensions);
1338
745k
            }
1339
127k
            return 1 + dimensions;
1340
2.11M
        } else {
1341
2.11M
            return 0;
1342
2.11M
        }
1343
2.24M
    }
_ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE1EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1331
24.0k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1332
        if constexpr (T == TYPE_ARRAY) {
1333
            const size_t size = x.size();
1334
            size_t dimensions = 0;
1335
            for (size_t i = 0; i < size; ++i) {
1336
                size_t element_dimensions = apply_visitor(*this, x[i]);
1337
                dimensions = std::max(dimensions, element_dimensions);
1338
            }
1339
            return 1 + dimensions;
1340
24.0k
        } else {
1341
24.0k
            return 0;
1342
24.0k
        }
1343
24.0k
    }
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE26EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE42EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE7EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1331
40.9k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1332
        if constexpr (T == TYPE_ARRAY) {
1333
            const size_t size = x.size();
1334
            size_t dimensions = 0;
1335
            for (size_t i = 0; i < size; ++i) {
1336
                size_t element_dimensions = apply_visitor(*this, x[i]);
1337
                dimensions = std::max(dimensions, element_dimensions);
1338
            }
1339
            return 1 + dimensions;
1340
40.9k
        } else {
1341
40.9k
            return 0;
1342
40.9k
        }
1343
40.9k
    }
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE12EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE11EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE25EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE2EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1331
69.6k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1332
        if constexpr (T == TYPE_ARRAY) {
1333
            const size_t size = x.size();
1334
            size_t dimensions = 0;
1335
            for (size_t i = 0; i < size; ++i) {
1336
                size_t element_dimensions = apply_visitor(*this, x[i]);
1337
                dimensions = std::max(dimensions, element_dimensions);
1338
            }
1339
            return 1 + dimensions;
1340
69.6k
        } else {
1341
69.6k
            return 0;
1342
69.6k
        }
1343
69.6k
    }
_ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE3EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1331
6
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1332
        if constexpr (T == TYPE_ARRAY) {
1333
            const size_t size = x.size();
1334
            size_t dimensions = 0;
1335
            for (size_t i = 0; i < size; ++i) {
1336
                size_t element_dimensions = apply_visitor(*this, x[i]);
1337
                dimensions = std::max(dimensions, element_dimensions);
1338
            }
1339
            return 1 + dimensions;
1340
6
        } else {
1341
6
            return 0;
1342
6
        }
1343
6
    }
_ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE4EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1331
7
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1332
        if constexpr (T == TYPE_ARRAY) {
1333
            const size_t size = x.size();
1334
            size_t dimensions = 0;
1335
            for (size_t i = 0; i < size; ++i) {
1336
                size_t element_dimensions = apply_visitor(*this, x[i]);
1337
                dimensions = std::max(dimensions, element_dimensions);
1338
            }
1339
            return 1 + dimensions;
1340
7
        } else {
1341
7
            return 0;
1342
7
        }
1343
7
    }
_ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE5EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1331
951
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1332
        if constexpr (T == TYPE_ARRAY) {
1333
            const size_t size = x.size();
1334
            size_t dimensions = 0;
1335
            for (size_t i = 0; i < size; ++i) {
1336
                size_t element_dimensions = apply_visitor(*this, x[i]);
1337
                dimensions = std::max(dimensions, element_dimensions);
1338
            }
1339
            return 1 + dimensions;
1340
951
        } else {
1341
951
            return 0;
1342
951
        }
1343
951
    }
_ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE6EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1331
851k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1332
        if constexpr (T == TYPE_ARRAY) {
1333
            const size_t size = x.size();
1334
            size_t dimensions = 0;
1335
            for (size_t i = 0; i < size; ++i) {
1336
                size_t element_dimensions = apply_visitor(*this, x[i]);
1337
                dimensions = std::max(dimensions, element_dimensions);
1338
            }
1339
            return 1 + dimensions;
1340
851k
        } else {
1341
851k
            return 0;
1342
851k
        }
1343
851k
    }
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE38EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE39EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE8EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1331
1
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1332
        if constexpr (T == TYPE_ARRAY) {
1333
            const size_t size = x.size();
1334
            size_t dimensions = 0;
1335
            for (size_t i = 0; i < size; ++i) {
1336
                size_t element_dimensions = apply_visitor(*this, x[i]);
1337
                dimensions = std::max(dimensions, element_dimensions);
1338
            }
1339
            return 1 + dimensions;
1340
1
        } else {
1341
1
            return 0;
1342
1
        }
1343
1
    }
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE27EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE9EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1331
164k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1332
        if constexpr (T == TYPE_ARRAY) {
1333
            const size_t size = x.size();
1334
            size_t dimensions = 0;
1335
            for (size_t i = 0; i < size; ++i) {
1336
                size_t element_dimensions = apply_visitor(*this, x[i]);
1337
                dimensions = std::max(dimensions, element_dimensions);
1338
            }
1339
            return 1 + dimensions;
1340
164k
        } else {
1341
164k
            return 0;
1342
164k
        }
1343
164k
    }
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE36EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE37EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE23EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1331
967k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1332
        if constexpr (T == TYPE_ARRAY) {
1333
            const size_t size = x.size();
1334
            size_t dimensions = 0;
1335
            for (size_t i = 0; i < size; ++i) {
1336
                size_t element_dimensions = apply_visitor(*this, x[i]);
1337
                dimensions = std::max(dimensions, element_dimensions);
1338
            }
1339
            return 1 + dimensions;
1340
967k
        } else {
1341
967k
            return 0;
1342
967k
        }
1343
967k
    }
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE15EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE10EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE41EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE17EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1331
127k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1332
127k
        if constexpr (T == TYPE_ARRAY) {
1333
127k
            const size_t size = x.size();
1334
127k
            size_t dimensions = 0;
1335
873k
            for (size_t i = 0; i < size; ++i) {
1336
745k
                size_t element_dimensions = apply_visitor(*this, x[i]);
1337
745k
                dimensions = std::max(dimensions, element_dimensions);
1338
745k
            }
1339
127k
            return 1 + dimensions;
1340
        } else {
1341
            return 0;
1342
        }
1343
127k
    }
_ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE16EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1331
1
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1332
        if constexpr (T == TYPE_ARRAY) {
1333
            const size_t size = x.size();
1334
            size_t dimensions = 0;
1335
            for (size_t i = 0; i < size; ++i) {
1336
                size_t element_dimensions = apply_visitor(*this, x[i]);
1337
                dimensions = std::max(dimensions, element_dimensions);
1338
            }
1339
            return 1 + dimensions;
1340
1
        } else {
1341
1
            return 0;
1342
1
        }
1343
1
    }
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE18EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE32EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1331
1
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1332
        if constexpr (T == TYPE_ARRAY) {
1333
            const size_t size = x.size();
1334
            size_t dimensions = 0;
1335
            for (size_t i = 0; i < size; ++i) {
1336
                size_t element_dimensions = apply_visitor(*this, x[i]);
1337
                dimensions = std::max(dimensions, element_dimensions);
1338
            }
1339
            return 1 + dimensions;
1340
1
        } else {
1341
1
            return 0;
1342
1
        }
1343
1
    }
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE28EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE29EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE20EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE30EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE35EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE22EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE19EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE24EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util32FieldVisitorToNumberOfDimensions5applyILNS_13PrimitiveTypeE31EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1331
27
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1332
        if constexpr (T == TYPE_ARRAY) {
1333
            const size_t size = x.size();
1334
            size_t dimensions = 0;
1335
            for (size_t i = 0; i < size; ++i) {
1336
                size_t element_dimensions = apply_visitor(*this, x[i]);
1337
                dimensions = std::max(dimensions, element_dimensions);
1338
            }
1339
            return 1 + dimensions;
1340
27
        } else {
1341
27
            return 0;
1342
27
        }
1343
27
    }
1344
};
1345
1346
// Visitor that allows to get type of scalar field
1347
// but exclude fields contain complex field.This is a faster version
1348
// for FieldVisitorToScalarType which does not support complex field.
1349
class SimpleFieldVisitorToScalarType : public StaticVisitor<size_t> {
1350
public:
1351
    template <PrimitiveType T>
1352
1.42M
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1353
1.42M
        if constexpr (T == TYPE_ARRAY) {
1354
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Array type is not supported");
1355
12.3k
        } else if constexpr (T == TYPE_NULL) {
1356
12.3k
            have_nulls = true;
1357
12.3k
            return 1;
1358
1.41M
        } else {
1359
1.41M
            type = T;
1360
1.41M
            return 1;
1361
1.41M
        }
1362
1.42M
    }
_ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE1EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1352
12.3k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1353
        if constexpr (T == TYPE_ARRAY) {
1354
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Array type is not supported");
1355
12.3k
        } else if constexpr (T == TYPE_NULL) {
1356
12.3k
            have_nulls = true;
1357
12.3k
            return 1;
1358
        } else {
1359
            type = T;
1360
            return 1;
1361
        }
1362
12.3k
    }
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE26EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE42EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE7EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1352
12.3k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1353
        if constexpr (T == TYPE_ARRAY) {
1354
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Array type is not supported");
1355
        } else if constexpr (T == TYPE_NULL) {
1356
            have_nulls = true;
1357
            return 1;
1358
12.3k
        } else {
1359
12.3k
            type = T;
1360
12.3k
            return 1;
1361
12.3k
        }
1362
12.3k
    }
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE12EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE11EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE25EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE2EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1352
12.4k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1353
        if constexpr (T == TYPE_ARRAY) {
1354
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Array type is not supported");
1355
        } else if constexpr (T == TYPE_NULL) {
1356
            have_nulls = true;
1357
            return 1;
1358
12.4k
        } else {
1359
12.4k
            type = T;
1360
12.4k
            return 1;
1361
12.4k
        }
1362
12.4k
    }
_ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE3EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1352
2
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1353
        if constexpr (T == TYPE_ARRAY) {
1354
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Array type is not supported");
1355
        } else if constexpr (T == TYPE_NULL) {
1356
            have_nulls = true;
1357
            return 1;
1358
2
        } else {
1359
2
            type = T;
1360
2
            return 1;
1361
2
        }
1362
2
    }
_ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE4EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1352
7
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1353
        if constexpr (T == TYPE_ARRAY) {
1354
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Array type is not supported");
1355
        } else if constexpr (T == TYPE_NULL) {
1356
            have_nulls = true;
1357
            return 1;
1358
7
        } else {
1359
7
            type = T;
1360
7
            return 1;
1361
7
        }
1362
7
    }
_ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE5EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1352
570
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1353
        if constexpr (T == TYPE_ARRAY) {
1354
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Array type is not supported");
1355
        } else if constexpr (T == TYPE_NULL) {
1356
            have_nulls = true;
1357
            return 1;
1358
570
        } else {
1359
570
            type = T;
1360
570
            return 1;
1361
570
        }
1362
570
    }
_ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE6EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1352
703k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1353
        if constexpr (T == TYPE_ARRAY) {
1354
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Array type is not supported");
1355
        } else if constexpr (T == TYPE_NULL) {
1356
            have_nulls = true;
1357
            return 1;
1358
703k
        } else {
1359
703k
            type = T;
1360
703k
            return 1;
1361
703k
        }
1362
703k
    }
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE38EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE39EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE8EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1352
1
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1353
        if constexpr (T == TYPE_ARRAY) {
1354
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Array type is not supported");
1355
        } else if constexpr (T == TYPE_NULL) {
1356
            have_nulls = true;
1357
            return 1;
1358
1
        } else {
1359
1
            type = T;
1360
1
            return 1;
1361
1
        }
1362
1
    }
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE27EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE9EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1352
12.6k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1353
        if constexpr (T == TYPE_ARRAY) {
1354
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Array type is not supported");
1355
        } else if constexpr (T == TYPE_NULL) {
1356
            have_nulls = true;
1357
            return 1;
1358
12.6k
        } else {
1359
12.6k
            type = T;
1360
12.6k
            return 1;
1361
12.6k
        }
1362
12.6k
    }
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE36EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE37EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE23EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1352
670k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1353
        if constexpr (T == TYPE_ARRAY) {
1354
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Array type is not supported");
1355
        } else if constexpr (T == TYPE_NULL) {
1356
            have_nulls = true;
1357
            return 1;
1358
670k
        } else {
1359
670k
            type = T;
1360
670k
            return 1;
1361
670k
        }
1362
670k
    }
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE15EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE10EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE41EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE17EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE16EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE18EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE32EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE28EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE29EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE20EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE30EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE35EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE22EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE19EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE24EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util30SimpleFieldVisitorToScalarType5applyILNS_13PrimitiveTypeE31EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1352
1
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1353
        if constexpr (T == TYPE_ARRAY) {
1354
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Array type is not supported");
1355
        } else if constexpr (T == TYPE_NULL) {
1356
            have_nulls = true;
1357
            return 1;
1358
1
        } else {
1359
1
            type = T;
1360
1
            return 1;
1361
1
        }
1362
1
    }
1363
1.42M
    void get_scalar_type(PrimitiveType* data_type) const { *data_type = type; }
1364
1.42M
    bool contain_nulls() const { return have_nulls; }
1365
1366
1.42M
    bool need_convert_field() const { return false; }
1367
1368
private:
1369
    PrimitiveType type = PrimitiveType::INVALID_TYPE;
1370
    bool have_nulls = false;
1371
};
1372
1373
/// Visitor that allows to get type of scalar field
1374
/// or least common type of scalars in array.
1375
/// More optimized version of FieldToDataType.
1376
class FieldVisitorToScalarType : public StaticVisitor<size_t> {
1377
public:
1378
    template <PrimitiveType T>
1379
823k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1380
823k
        if constexpr (T == TYPE_ARRAY) {
1381
127k
            size_t size = x.size();
1382
873k
            for (size_t i = 0; i < size; ++i) {
1383
745k
                apply_visitor(*this, x[i]);
1384
745k
            }
1385
127k
            return 0;
1386
127k
        } else if constexpr (T == TYPE_NULL) {
1387
11.6k
            have_nulls = true;
1388
11.6k
            return 0;
1389
683k
        } else {
1390
683k
            field_types.insert(T);
1391
683k
            type_indexes.insert(T);
1392
683k
            return 0;
1393
683k
        }
1394
823k
    }
_ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE1EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1379
11.6k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1380
        if constexpr (T == TYPE_ARRAY) {
1381
            size_t size = x.size();
1382
            for (size_t i = 0; i < size; ++i) {
1383
                apply_visitor(*this, x[i]);
1384
            }
1385
            return 0;
1386
11.6k
        } else if constexpr (T == TYPE_NULL) {
1387
11.6k
            have_nulls = true;
1388
11.6k
            return 0;
1389
        } else {
1390
            field_types.insert(T);
1391
            type_indexes.insert(T);
1392
            return 0;
1393
        }
1394
11.6k
    }
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE26EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE42EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE7EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1379
28.6k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1380
        if constexpr (T == TYPE_ARRAY) {
1381
            size_t size = x.size();
1382
            for (size_t i = 0; i < size; ++i) {
1383
                apply_visitor(*this, x[i]);
1384
            }
1385
            return 0;
1386
        } else if constexpr (T == TYPE_NULL) {
1387
            have_nulls = true;
1388
            return 0;
1389
28.6k
        } else {
1390
28.6k
            field_types.insert(T);
1391
28.6k
            type_indexes.insert(T);
1392
28.6k
            return 0;
1393
28.6k
        }
1394
28.6k
    }
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE12EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE11EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE25EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE2EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1379
57.2k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1380
        if constexpr (T == TYPE_ARRAY) {
1381
            size_t size = x.size();
1382
            for (size_t i = 0; i < size; ++i) {
1383
                apply_visitor(*this, x[i]);
1384
            }
1385
            return 0;
1386
        } else if constexpr (T == TYPE_NULL) {
1387
            have_nulls = true;
1388
            return 0;
1389
57.2k
        } else {
1390
57.2k
            field_types.insert(T);
1391
57.2k
            type_indexes.insert(T);
1392
57.2k
            return 0;
1393
57.2k
        }
1394
57.2k
    }
_ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE3EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1379
4
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1380
        if constexpr (T == TYPE_ARRAY) {
1381
            size_t size = x.size();
1382
            for (size_t i = 0; i < size; ++i) {
1383
                apply_visitor(*this, x[i]);
1384
            }
1385
            return 0;
1386
        } else if constexpr (T == TYPE_NULL) {
1387
            have_nulls = true;
1388
            return 0;
1389
4
        } else {
1390
4
            field_types.insert(T);
1391
4
            type_indexes.insert(T);
1392
4
            return 0;
1393
4
        }
1394
4
    }
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE4EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE5EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1379
381
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1380
        if constexpr (T == TYPE_ARRAY) {
1381
            size_t size = x.size();
1382
            for (size_t i = 0; i < size; ++i) {
1383
                apply_visitor(*this, x[i]);
1384
            }
1385
            return 0;
1386
        } else if constexpr (T == TYPE_NULL) {
1387
            have_nulls = true;
1388
            return 0;
1389
381
        } else {
1390
381
            field_types.insert(T);
1391
381
            type_indexes.insert(T);
1392
381
            return 0;
1393
381
        }
1394
381
    }
_ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE6EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1379
147k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1380
        if constexpr (T == TYPE_ARRAY) {
1381
            size_t size = x.size();
1382
            for (size_t i = 0; i < size; ++i) {
1383
                apply_visitor(*this, x[i]);
1384
            }
1385
            return 0;
1386
        } else if constexpr (T == TYPE_NULL) {
1387
            have_nulls = true;
1388
            return 0;
1389
147k
        } else {
1390
147k
            field_types.insert(T);
1391
147k
            type_indexes.insert(T);
1392
147k
            return 0;
1393
147k
        }
1394
147k
    }
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE38EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE39EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE8EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE27EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE9EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1379
151k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1380
        if constexpr (T == TYPE_ARRAY) {
1381
            size_t size = x.size();
1382
            for (size_t i = 0; i < size; ++i) {
1383
                apply_visitor(*this, x[i]);
1384
            }
1385
            return 0;
1386
        } else if constexpr (T == TYPE_NULL) {
1387
            have_nulls = true;
1388
            return 0;
1389
151k
        } else {
1390
151k
            field_types.insert(T);
1391
151k
            type_indexes.insert(T);
1392
151k
            return 0;
1393
151k
        }
1394
151k
    }
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE36EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE37EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE23EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1379
297k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1380
        if constexpr (T == TYPE_ARRAY) {
1381
            size_t size = x.size();
1382
            for (size_t i = 0; i < size; ++i) {
1383
                apply_visitor(*this, x[i]);
1384
            }
1385
            return 0;
1386
        } else if constexpr (T == TYPE_NULL) {
1387
            have_nulls = true;
1388
            return 0;
1389
297k
        } else {
1390
297k
            field_types.insert(T);
1391
297k
            type_indexes.insert(T);
1392
297k
            return 0;
1393
297k
        }
1394
297k
    }
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE15EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE10EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE41EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE17EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1379
127k
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1380
127k
        if constexpr (T == TYPE_ARRAY) {
1381
127k
            size_t size = x.size();
1382
873k
            for (size_t i = 0; i < size; ++i) {
1383
745k
                apply_visitor(*this, x[i]);
1384
745k
            }
1385
127k
            return 0;
1386
        } else if constexpr (T == TYPE_NULL) {
1387
            have_nulls = true;
1388
            return 0;
1389
        } else {
1390
            field_types.insert(T);
1391
            type_indexes.insert(T);
1392
            return 0;
1393
        }
1394
127k
    }
_ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE16EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1379
1
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1380
        if constexpr (T == TYPE_ARRAY) {
1381
            size_t size = x.size();
1382
            for (size_t i = 0; i < size; ++i) {
1383
                apply_visitor(*this, x[i]);
1384
            }
1385
            return 0;
1386
        } else if constexpr (T == TYPE_NULL) {
1387
            have_nulls = true;
1388
            return 0;
1389
1
        } else {
1390
1
            field_types.insert(T);
1391
1
            type_indexes.insert(T);
1392
1
            return 0;
1393
1
        }
1394
1
    }
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE18EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE32EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1379
1
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1380
        if constexpr (T == TYPE_ARRAY) {
1381
            size_t size = x.size();
1382
            for (size_t i = 0; i < size; ++i) {
1383
                apply_visitor(*this, x[i]);
1384
            }
1385
            return 0;
1386
        } else if constexpr (T == TYPE_NULL) {
1387
            have_nulls = true;
1388
            return 0;
1389
1
        } else {
1390
1
            field_types.insert(T);
1391
1
            type_indexes.insert(T);
1392
1
            return 0;
1393
1
        }
1394
1
    }
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE28EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE29EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE20EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE30EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE35EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE22EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE19EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Unexecuted instantiation: _ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE24EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
_ZN5doris12variant_util24FieldVisitorToScalarType5applyILNS_13PrimitiveTypeE31EEEmRKNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
1379
26
    size_t apply(const typename PrimitiveTypeTraits<T>::CppType& x) {
1380
        if constexpr (T == TYPE_ARRAY) {
1381
            size_t size = x.size();
1382
            for (size_t i = 0; i < size; ++i) {
1383
                apply_visitor(*this, x[i]);
1384
            }
1385
            return 0;
1386
        } else if constexpr (T == TYPE_NULL) {
1387
            have_nulls = true;
1388
            return 0;
1389
26
        } else {
1390
26
            field_types.insert(T);
1391
26
            type_indexes.insert(T);
1392
26
            return 0;
1393
26
        }
1394
26
    }
1395
77.7k
    void get_scalar_type(PrimitiveType* type) const {
1396
77.7k
        if (type_indexes.size() == 1) {
1397
            // Most cases will have only one type
1398
64.4k
            *type = *type_indexes.begin();
1399
64.4k
            return;
1400
64.4k
        }
1401
13.2k
        DataTypePtr data_type;
1402
13.2k
        get_least_supertype_jsonb(type_indexes, &data_type);
1403
13.2k
        *type = data_type->get_primitive_type();
1404
13.2k
    }
1405
77.7k
    bool contain_nulls() const { return have_nulls; }
1406
77.7k
    bool need_convert_field() const { return field_types.size() > 1; }
1407
1408
private:
1409
    phmap::flat_hash_set<PrimitiveType> type_indexes;
1410
    phmap::flat_hash_set<PrimitiveType> field_types;
1411
    bool have_nulls = false;
1412
};
1413
1414
template <typename Visitor>
1415
1.50M
void get_field_info_impl(const Field& field, FieldInfo* info) {
1416
1.50M
    Visitor to_scalar_type_visitor;
1417
1.50M
    apply_visitor(to_scalar_type_visitor, field);
1418
1.50M
    PrimitiveType type_id;
1419
1.50M
    to_scalar_type_visitor.get_scalar_type(&type_id);
1420
    // array item's dimension may missmatch, eg. [1, 2, [1, 2, 3]]
1421
1.50M
    *info = {type_id, to_scalar_type_visitor.contain_nulls(),
1422
1.50M
             to_scalar_type_visitor.need_convert_field(),
1423
1.50M
             apply_visitor(FieldVisitorToNumberOfDimensions(), field)};
1424
1.50M
}
_ZN5doris12variant_util19get_field_info_implINS0_24FieldVisitorToScalarTypeEEEvRKNS_5FieldEPNS_9FieldInfoE
Line
Count
Source
1415
77.7k
void get_field_info_impl(const Field& field, FieldInfo* info) {
1416
77.7k
    Visitor to_scalar_type_visitor;
1417
77.7k
    apply_visitor(to_scalar_type_visitor, field);
1418
77.7k
    PrimitiveType type_id;
1419
77.7k
    to_scalar_type_visitor.get_scalar_type(&type_id);
1420
    // array item's dimension may missmatch, eg. [1, 2, [1, 2, 3]]
1421
77.7k
    *info = {type_id, to_scalar_type_visitor.contain_nulls(),
1422
77.7k
             to_scalar_type_visitor.need_convert_field(),
1423
77.7k
             apply_visitor(FieldVisitorToNumberOfDimensions(), field)};
1424
77.7k
}
_ZN5doris12variant_util19get_field_info_implINS0_30SimpleFieldVisitorToScalarTypeEEEvRKNS_5FieldEPNS_9FieldInfoE
Line
Count
Source
1415
1.42M
void get_field_info_impl(const Field& field, FieldInfo* info) {
1416
1.42M
    Visitor to_scalar_type_visitor;
1417
1.42M
    apply_visitor(to_scalar_type_visitor, field);
1418
1.42M
    PrimitiveType type_id;
1419
1.42M
    to_scalar_type_visitor.get_scalar_type(&type_id);
1420
    // array item's dimension may missmatch, eg. [1, 2, [1, 2, 3]]
1421
1.42M
    *info = {type_id, to_scalar_type_visitor.contain_nulls(),
1422
1.42M
             to_scalar_type_visitor.need_convert_field(),
1423
1.42M
             apply_visitor(FieldVisitorToNumberOfDimensions(), field)};
1424
1.42M
}
1425
1426
1.50M
void get_field_info(const Field& field, FieldInfo* info) {
1427
1.50M
    if (field.is_complex_field()) {
1428
77.7k
        get_field_info_impl<FieldVisitorToScalarType>(field, info);
1429
1.42M
    } else {
1430
1.42M
        get_field_info_impl<SimpleFieldVisitorToScalarType>(field, info);
1431
1.42M
    }
1432
1.50M
}
1433
1434
bool generate_sub_column_info(const TabletSchema& schema, int32_t col_unique_id,
1435
                              const std::string& path,
1436
1.05k
                              TabletSchema::SubColumnInfo* sub_column_info) {
1437
1.05k
    const auto& parent_column = schema.column_by_uid(col_unique_id);
1438
1.05k
    std::function<void(const TabletColumn&, TabletColumn*)> generate_result_column =
1439
1.05k
            [&](const TabletColumn& from_column, TabletColumn* to_column) {
1440
31
                to_column->set_name(parent_column.name_lower_case() + "." + path);
1441
31
                to_column->set_type(from_column.type());
1442
31
                to_column->set_parent_unique_id(parent_column.unique_id());
1443
31
                bool is_typed = !parent_column.variant_enable_typed_paths_to_sparse();
1444
31
                to_column->set_path_info(
1445
31
                        PathInData(parent_column.name_lower_case() + "." + path, is_typed));
1446
31
                to_column->set_aggregation_method(parent_column.aggregation());
1447
31
                to_column->set_is_nullable(true);
1448
31
                to_column->set_parent_unique_id(parent_column.unique_id());
1449
31
                if (from_column.is_decimal()) {
1450
0
                    to_column->set_precision(from_column.precision());
1451
0
                }
1452
31
                to_column->set_frac(from_column.frac());
1453
1454
31
                if (from_column.is_array_type()) {
1455
2
                    TabletColumn nested_column;
1456
2
                    generate_result_column(*from_column.get_sub_columns()[0], &nested_column);
1457
2
                    to_column->add_sub_column(nested_column);
1458
2
                }
1459
31
            };
1460
1461
1.05k
    auto generate_index = [&](const std::string& pattern) {
1462
        // 1. find subcolumn's index
1463
29
        if (const auto& indexes = schema.inverted_index_by_field_pattern(col_unique_id, pattern);
1464
29
            !indexes.empty()) {
1465
2
            for (const auto& index : indexes) {
1466
2
                auto index_ptr = std::make_shared<TabletIndex>(*index);
1467
2
                index_ptr->set_escaped_escaped_index_suffix_path(
1468
2
                        sub_column_info->column.path_info_ptr()->get_path());
1469
2
                sub_column_info->indexes.emplace_back(std::move(index_ptr));
1470
2
            }
1471
2
        }
1472
        // 2. find parent column's index
1473
27
        else if (const auto parent_index = schema.inverted_indexs(col_unique_id);
1474
27
                 !parent_index.empty()) {
1475
0
            inherit_index(parent_index, sub_column_info->indexes, sub_column_info->column);
1476
27
        } else {
1477
27
            sub_column_info->indexes.clear();
1478
27
        }
1479
29
    };
1480
1481
1.05k
    const auto& sub_columns = parent_column.get_sub_columns();
1482
1.05k
    for (const auto& sub_column : sub_columns) {
1483
91
        const char* pattern = sub_column->name().c_str();
1484
91
        switch (sub_column->pattern_type()) {
1485
0
        case PatternTypePB::MATCH_NAME: {
1486
0
            if (strcmp(pattern, path.c_str()) == 0) {
1487
0
                generate_result_column(*sub_column, &sub_column_info->column);
1488
0
                generate_index(sub_column->name());
1489
0
                return true;
1490
0
            }
1491
0
            break;
1492
0
        }
1493
91
        case PatternTypePB::MATCH_NAME_GLOB: {
1494
91
            if (glob_match_re2(pattern, path)) {
1495
29
                generate_result_column(*sub_column, &sub_column_info->column);
1496
29
                generate_index(sub_column->name());
1497
29
                return true;
1498
29
            }
1499
62
            break;
1500
91
        }
1501
62
        default:
1502
0
            break;
1503
91
        }
1504
91
    }
1505
1.02k
    return false;
1506
1.05k
}
1507
1508
TabletSchemaSPtr VariantCompactionUtil::calculate_variant_extended_schema(
1509
0
        const std::vector<RowsetSharedPtr>& rowsets, const TabletSchemaSPtr& base_schema) {
1510
0
    if (rowsets.empty()) {
1511
0
        return nullptr;
1512
0
    }
1513
1514
0
    std::vector<TabletSchemaSPtr> schemas;
1515
0
    for (const auto& rs : rowsets) {
1516
0
        if (rs->num_segments() == 0) {
1517
0
            continue;
1518
0
        }
1519
0
        const auto& tablet_schema = rs->tablet_schema();
1520
0
        SegmentCacheHandle segment_cache;
1521
0
        auto st = SegmentLoader::instance()->load_segments(std::static_pointer_cast<BetaRowset>(rs),
1522
0
                                                           &segment_cache);
1523
0
        if (!st.ok()) {
1524
0
            return base_schema;
1525
0
        }
1526
0
        for (const auto& segment : segment_cache.get_segments()) {
1527
0
            TabletSchemaSPtr schema = tablet_schema->copy_without_variant_extracted_columns();
1528
0
            for (const auto& column : tablet_schema->columns()) {
1529
0
                if (!column->is_variant_type()) {
1530
0
                    continue;
1531
0
                }
1532
0
                std::shared_ptr<ColumnReader> column_reader;
1533
0
                OlapReaderStatistics stats;
1534
0
                st = segment->get_column_reader(column->unique_id(), &column_reader, &stats);
1535
0
                if (!st.ok()) {
1536
0
                    LOG(WARNING) << "Failed to get column reader for column: " << column->name()
1537
0
                                 << " error: " << st.to_string();
1538
0
                    continue;
1539
0
                }
1540
0
                if (!column_reader) {
1541
0
                    continue;
1542
0
                }
1543
1544
0
                CHECK(column_reader->get_meta_type() == FieldType::OLAP_FIELD_TYPE_VARIANT);
1545
0
                auto* variant_column_reader =
1546
0
                        assert_cast<segment_v2::VariantColumnReader*>(column_reader.get());
1547
                // load external meta before getting subcolumn meta info
1548
0
                st = variant_column_reader->load_external_meta_once();
1549
0
                if (!st.ok()) {
1550
0
                    LOG(WARNING) << "Failed to load external meta for column: " << column->name()
1551
0
                                 << " error: " << st.to_string();
1552
0
                    continue;
1553
0
                }
1554
0
                const auto* subcolumn_meta_info = variant_column_reader->get_subcolumns_meta_info();
1555
0
                for (const auto& entry : *subcolumn_meta_info) {
1556
0
                    if (entry->path.empty()) {
1557
0
                        continue;
1558
0
                    }
1559
0
                    const std::string& column_name =
1560
0
                            column->name_lower_case() + "." + entry->path.get_path();
1561
0
                    const DataTypePtr& data_type = entry->data.file_column_type;
1562
0
                    PathInDataBuilder full_path_builder;
1563
0
                    auto full_path = full_path_builder.append(column->name_lower_case(), false)
1564
0
                                             .append(entry->path.get_parts(), false)
1565
0
                                             .build();
1566
0
                    TabletColumn subcolumn =
1567
0
                            get_column_by_type(data_type, column_name,
1568
0
                                               ExtraInfo {.unique_id = -1,
1569
0
                                                          .parent_unique_id = column->unique_id(),
1570
0
                                                          .path_info = full_path});
1571
0
                    schema->append_column(subcolumn);
1572
0
                }
1573
0
            }
1574
0
            schemas.emplace_back(schema);
1575
0
        }
1576
0
    }
1577
0
    TabletSchemaSPtr least_common_schema;
1578
0
    auto st = get_least_common_schema(schemas, base_schema, least_common_schema, false);
1579
0
    if (!st.ok()) {
1580
0
        return base_schema;
1581
0
    }
1582
0
    return least_common_schema;
1583
0
}
1584
1585
bool inherit_index(const std::vector<const TabletIndex*>& parent_indexes,
1586
                   TabletIndexes& subcolumns_indexes, FieldType column_type,
1587
58
                   const std::string& suffix_path, bool is_array_nested_type) {
1588
58
    if (parent_indexes.empty()) {
1589
44
        return false;
1590
44
    }
1591
14
    subcolumns_indexes.clear();
1592
    // bkd index or array index only need to inherit one index
1593
14
    if (field_is_numeric_type(column_type) ||
1594
14
        (is_array_nested_type &&
1595
8
         (field_is_numeric_type(column_type) || field_is_slice_type(column_type)))) {
1596
7
        auto index_ptr = std::make_shared<TabletIndex>(*parent_indexes[0]);
1597
7
        index_ptr->set_escaped_escaped_index_suffix_path(suffix_path);
1598
        // no need parse for bkd index or array index
1599
7
        index_ptr->remove_parser_and_analyzer();
1600
7
        subcolumns_indexes.emplace_back(std::move(index_ptr));
1601
7
        return true;
1602
7
    }
1603
    // string type need to inherit all indexes
1604
7
    else if (field_is_slice_type(column_type) && !is_array_nested_type) {
1605
6
        for (const auto& index : parent_indexes) {
1606
6
            auto index_ptr = std::make_shared<TabletIndex>(*index);
1607
6
            index_ptr->set_escaped_escaped_index_suffix_path(suffix_path);
1608
6
            subcolumns_indexes.emplace_back(std::move(index_ptr));
1609
6
        }
1610
5
        return true;
1611
5
    }
1612
2
    return false;
1613
14
}
1614
1615
bool inherit_index(const std::vector<const TabletIndex*>& parent_indexes,
1616
61
                   TabletIndexes& subcolumns_indexes, const TabletColumn& column) {
1617
61
    if (!column.is_extracted_column()) {
1618
3
        return false;
1619
3
    }
1620
58
    if (column.is_array_type()) {
1621
4
        if (column.get_sub_columns().empty()) {
1622
0
            return false;
1623
0
        }
1624
4
        const TabletColumn* nested = column.get_sub_columns()[0].get();
1625
4
        while (nested != nullptr && nested->is_array_type()) {
1626
0
            if (nested->get_sub_columns().empty()) {
1627
0
                return false;
1628
0
            }
1629
0
            nested = nested->get_sub_columns()[0].get();
1630
0
        }
1631
4
        if (nested == nullptr) {
1632
0
            return false;
1633
0
        }
1634
4
        return inherit_index(parent_indexes, subcolumns_indexes, nested->type(),
1635
4
                             column.path_info_ptr()->get_path(), true);
1636
4
    }
1637
54
    return inherit_index(parent_indexes, subcolumns_indexes, column.type(),
1638
54
                         column.path_info_ptr()->get_path());
1639
58
}
1640
1641
bool inherit_index(const std::vector<const TabletIndex*>& parent_indexes,
1642
0
                   TabletIndexes& subcolumns_indexes, const ColumnMetaPB& column_pb) {
1643
0
    if (!column_pb.has_column_path_info()) {
1644
0
        return false;
1645
0
    }
1646
0
    if (column_pb.type() == (int)FieldType::OLAP_FIELD_TYPE_ARRAY) {
1647
0
        if (column_pb.children_columns_size() == 0) {
1648
0
            return false;
1649
0
        }
1650
0
        const ColumnMetaPB* nested = &column_pb.children_columns(0);
1651
0
        while (nested != nullptr && nested->type() == (int)FieldType::OLAP_FIELD_TYPE_ARRAY) {
1652
0
            if (nested->children_columns_size() == 0) {
1653
0
                return false;
1654
0
            }
1655
0
            nested = &nested->children_columns(0);
1656
0
        }
1657
0
        if (nested == nullptr) {
1658
0
            return false;
1659
0
        }
1660
0
        return inherit_index(parent_indexes, subcolumns_indexes, (FieldType)nested->type(),
1661
0
                             column_pb.column_path_info().path(), true);
1662
0
    }
1663
0
    return inherit_index(parent_indexes, subcolumns_indexes, (FieldType)column_pb.type(),
1664
0
                         column_pb.column_path_info().path());
1665
0
}
1666
1667
// ============ Implementation from parse2column.cpp ============
1668
1669
/** Pool for objects that cannot be used from different threads simultaneously.
1670
  * Allows to create an object for each thread.
1671
  * Pool has unbounded size and objects are not destroyed before destruction of pool.
1672
  *
1673
  * Use it in cases when thread local storage is not appropriate
1674
  *  (when maximum number of simultaneously used objects is less
1675
  *   than number of running/sleeping threads, that has ever used object,
1676
  *   and creation/destruction of objects is expensive).
1677
  */
1678
template <typename T>
1679
class SimpleObjectPool {
1680
protected:
1681
    /// Hold all available objects in stack.
1682
    std::mutex mutex;
1683
    std::stack<std::unique_ptr<T>> stack;
1684
    /// Specialized deleter for std::unique_ptr.
1685
    /// Returns underlying pointer back to stack thus reclaiming its ownership.
1686
    struct Deleter {
1687
        SimpleObjectPool<T>* parent;
1688
12.6k
        Deleter(SimpleObjectPool<T>* parent_ = nullptr) : parent {parent_} {} /// NOLINT
1689
12.6k
        void operator()(T* owning_ptr) const {
1690
12.6k
            std::lock_guard lock {parent->mutex};
1691
12.6k
            parent->stack.emplace(owning_ptr);
1692
12.6k
        }
1693
    };
1694
1695
public:
1696
    using Pointer = std::unique_ptr<T, Deleter>;
1697
    /// Extracts and returns a pointer from the stack if it's not empty,
1698
    ///  creates a new one by calling provided f() otherwise.
1699
    template <typename Factory>
1700
12.6k
    Pointer get(Factory&& f) {
1701
12.6k
        std::unique_lock lock(mutex);
1702
12.6k
        if (stack.empty()) {
1703
1
            return {f(), this};
1704
1
        }
1705
12.6k
        auto object = stack.top().release();
1706
12.6k
        stack.pop();
1707
12.6k
        return std::unique_ptr<T, Deleter>(object, Deleter(this));
1708
12.6k
    }
variant_util.cpp:_ZN5doris12variant_util16SimpleObjectPoolINS_14JSONDataParserINS_14SimdJSONParserEEEE3getIZNS0_21parse_json_to_variantERNS_7IColumnERKNS_9StringRefEPS4_RKNS_11ParseConfigEE3$_0EESt10unique_ptrIS4_NS5_7DeleterEEOT_
Line
Count
Source
1700
12.4k
    Pointer get(Factory&& f) {
1701
12.4k
        std::unique_lock lock(mutex);
1702
12.4k
        if (stack.empty()) {
1703
1
            return {f(), this};
1704
1
        }
1705
12.4k
        auto object = stack.top().release();
1706
12.4k
        stack.pop();
1707
12.4k
        return std::unique_ptr<T, Deleter>(object, Deleter(this));
1708
12.4k
    }
variant_util.cpp:_ZN5doris12variant_util16SimpleObjectPoolINS_14JSONDataParserINS_14SimdJSONParserEEEE3getIZNS0_21parse_json_to_variantERNS_7IColumnERKNS_9ColumnStrIjEERKNS_11ParseConfigEE3$_0EESt10unique_ptrIS4_NS5_7DeleterEEOT_
Line
Count
Source
1700
201
    Pointer get(Factory&& f) {
1701
201
        std::unique_lock lock(mutex);
1702
201
        if (stack.empty()) {
1703
0
            return {f(), this};
1704
0
        }
1705
201
        auto object = stack.top().release();
1706
201
        stack.pop();
1707
201
        return std::unique_ptr<T, Deleter>(object, Deleter(this));
1708
201
    }
1709
    /// Like get(), but creates object using default constructor.
1710
    Pointer getDefault() {
1711
        return get([] { return new T; });
1712
    }
1713
};
1714
1715
SimpleObjectPool<JsonParser> parsers_pool;
1716
1717
using Node = typename ColumnVariant::Subcolumns::Node;
1718
1719
11.5k
static inline void append_binary_bytes(ColumnString::Chars& chars, const void* data, size_t size) {
1720
11.5k
    const auto old_size = chars.size();
1721
11.5k
    chars.resize(old_size + size);
1722
11.5k
    memcpy(chars.data() + old_size, reinterpret_cast<const char*>(data), size);
1723
11.5k
}
1724
1725
4.67k
static inline void append_binary_type(ColumnString::Chars& chars, FieldType type) {
1726
4.67k
    const uint8_t t = static_cast<uint8_t>(type);
1727
4.67k
    append_binary_bytes(chars, &t, sizeof(uint8_t));
1728
4.67k
}
1729
1730
2.22k
static inline void append_binary_sizet(ColumnString::Chars& chars, size_t v) {
1731
2.22k
    append_binary_bytes(chars, &v, sizeof(size_t));
1732
2.22k
}
1733
1734
4.67k
static void append_field_to_binary_chars(const Field& field, ColumnString::Chars& chars) {
1735
4.67k
    switch (field.get_type()) {
1736
0
    case PrimitiveType::TYPE_NULL: {
1737
0
        append_binary_type(chars, FieldType::OLAP_FIELD_TYPE_NONE);
1738
0
        return;
1739
0
    }
1740
2
    case PrimitiveType::TYPE_BOOLEAN: {
1741
2
        append_binary_type(chars,
1742
2
                           TabletColumn::get_field_type_by_type(PrimitiveType::TYPE_BOOLEAN));
1743
2
        const auto v = static_cast<UInt8>(field.get<PrimitiveType::TYPE_BOOLEAN>());
1744
2
        append_binary_bytes(chars, &v, sizeof(UInt8));
1745
2
        return;
1746
0
    }
1747
2.44k
    case PrimitiveType::TYPE_BIGINT: {
1748
2.44k
        append_binary_type(chars, TabletColumn::get_field_type_by_type(PrimitiveType::TYPE_BIGINT));
1749
2.44k
        const auto v = field.get<PrimitiveType::TYPE_BIGINT>();
1750
2.44k
        append_binary_bytes(chars, &v, sizeof(Int64));
1751
2.44k
        return;
1752
0
    }
1753
1
    case PrimitiveType::TYPE_LARGEINT: {
1754
1
        append_binary_type(chars,
1755
1
                           TabletColumn::get_field_type_by_type(PrimitiveType::TYPE_LARGEINT));
1756
1
        const auto v = field.get<PrimitiveType::TYPE_LARGEINT>();
1757
1
        append_binary_bytes(chars, &v, sizeof(int128_t));
1758
1
        return;
1759
0
    }
1760
1
    case PrimitiveType::TYPE_DOUBLE: {
1761
1
        append_binary_type(chars, TabletColumn::get_field_type_by_type(PrimitiveType::TYPE_DOUBLE));
1762
1
        const auto v = field.get<PrimitiveType::TYPE_DOUBLE>();
1763
1
        append_binary_bytes(chars, &v, sizeof(Float64));
1764
1
        return;
1765
0
    }
1766
2.22k
    case PrimitiveType::TYPE_STRING: {
1767
2.22k
        append_binary_type(chars, FieldType::OLAP_FIELD_TYPE_STRING);
1768
2.22k
        const auto& v = field.get<PrimitiveType::TYPE_STRING>();
1769
2.22k
        append_binary_sizet(chars, v.size());
1770
2.22k
        append_binary_bytes(chars, v.data(), v.size());
1771
2.22k
        return;
1772
0
    }
1773
0
    case PrimitiveType::TYPE_JSONB: {
1774
0
        append_binary_type(chars, FieldType::OLAP_FIELD_TYPE_JSONB);
1775
0
        const auto& v = field.get<PrimitiveType::TYPE_JSONB>();
1776
0
        append_binary_sizet(chars, v.get_size());
1777
0
        append_binary_bytes(chars, v.get_value(), v.get_size());
1778
0
        return;
1779
0
    }
1780
7
    case PrimitiveType::TYPE_ARRAY: {
1781
7
        append_binary_type(chars, FieldType::OLAP_FIELD_TYPE_ARRAY);
1782
7
        const auto& a = field.get<PrimitiveType::TYPE_ARRAY>();
1783
7
        append_binary_sizet(chars, a.size());
1784
12
        for (const auto& elem : a) {
1785
12
            append_field_to_binary_chars(elem, chars);
1786
12
        }
1787
7
        return;
1788
0
    }
1789
0
    default:
1790
0
        throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Unsupported field type {}",
1791
0
                               field.get_type());
1792
4.67k
    }
1793
4.67k
}
1794
/// Visitor that keeps @num_dimensions_to_keep dimensions in arrays
1795
/// and replaces all scalars or nested arrays to @replacement at that level.
1796
class FieldVisitorReplaceScalars : public StaticVisitor<Field> {
1797
public:
1798
    FieldVisitorReplaceScalars(const Field& replacement_, size_t num_dimensions_to_keep_)
1799
0
            : replacement(replacement_), num_dimensions_to_keep(num_dimensions_to_keep_) {}
1800
    template <PrimitiveType T>
1801
    Field operator()(const typename PrimitiveTypeTraits<T>::CppType& x) const {
1802
        if constexpr (T == TYPE_ARRAY) {
1803
            if (num_dimensions_to_keep == 0) {
1804
                return replacement;
1805
            }
1806
            const size_t size = x.size();
1807
            Array res(size);
1808
            for (size_t i = 0; i < size; ++i) {
1809
                res[i] = apply_visitor(
1810
                        FieldVisitorReplaceScalars(replacement, num_dimensions_to_keep - 1), x[i]);
1811
            }
1812
            return Field::create_field<TYPE_ARRAY>(res);
1813
        } else {
1814
            return replacement;
1815
        }
1816
    }
1817
1818
private:
1819
    const Field& replacement;
1820
    size_t num_dimensions_to_keep;
1821
};
1822
1823
template <typename ParserImpl>
1824
void parse_json_to_variant_impl(IColumn& column, const char* src, size_t length,
1825
80.5k
                                JSONDataParser<ParserImpl>* parser, const ParseConfig& config) {
1826
80.5k
    auto& column_variant = assert_cast<ColumnVariant&>(column);
1827
80.5k
    std::optional<ParseResult> result;
1828
    /// Treat empty string as an empty object
1829
    /// for better CAST from String to Object.
1830
80.5k
    if (length > 0) {
1831
80.5k
        result = parser->parse(src, length, config);
1832
80.5k
    } else {
1833
1
        result = ParseResult {};
1834
1
    }
1835
80.5k
    if (!result) {
1836
11
        VLOG_DEBUG << "failed to parse " << std::string_view(src, length) << ", length= " << length;
1837
11
        if (config::variant_throw_exeception_on_invalid_json) {
1838
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Failed to parse object {}",
1839
0
                                   std::string_view(src, length));
1840
0
        }
1841
        // Treat as string
1842
11
        PathInData root_path;
1843
11
        Field field = Field::create_field<TYPE_STRING>(String(src, length));
1844
11
        result = ParseResult {{root_path}, {field}};
1845
11
    }
1846
80.5k
    auto& [paths, values] = *result;
1847
80.5k
    assert(paths.size() == values.size());
1848
80.0k
    size_t old_num_rows = column_variant.rows();
1849
80.0k
    if (config.enable_flatten_nested) {
1850
        // here we should check the paths in variant and paths in result,
1851
        // if two paths which same prefix have different structure, we should throw an exception
1852
3.00k
        std::vector<PathInData> check_paths;
1853
11.9k
        for (const auto& entry : column_variant.get_subcolumns()) {
1854
11.9k
            check_paths.push_back(entry->path);
1855
11.9k
        }
1856
3.00k
        check_paths.insert(check_paths.end(), paths.begin(), paths.end());
1857
3.00k
        THROW_IF_ERROR(check_variant_has_no_ambiguous_paths(check_paths));
1858
3.00k
    }
1859
80.0k
    auto [doc_value_data_paths, doc_value_data_values] =
1860
80.0k
            column_variant.get_doc_value_data_paths_and_values();
1861
80.0k
    auto& doc_value_data_offsets = column_variant.serialized_doc_value_column_offsets();
1862
1863
1.35M
    auto flush_defaults = [](ColumnVariant::Subcolumn* subcolumn) {
1864
1.35M
        const auto num_defaults = subcolumn->cur_num_of_defaults();
1865
1.35M
        if (num_defaults > 0) {
1866
104k
            subcolumn->insert_many_defaults(num_defaults);
1867
104k
            subcolumn->reset_current_num_of_defaults();
1868
104k
        }
1869
1.35M
    };
1870
1871
80.0k
    auto get_or_create_subcolumn = [&](const PathInData& path, size_t index_hint,
1872
1.35M
                                       const FieldInfo& field_info) -> ColumnVariant::Subcolumn* {
1873
1.35M
        if (column_variant.get_subcolumn(path, index_hint) == nullptr) {
1874
1.57k
            if (path.has_nested_part()) {
1875
8
                column_variant.add_nested_subcolumn(path, field_info, old_num_rows);
1876
1.57k
            } else {
1877
1.57k
                column_variant.add_sub_column(path, old_num_rows);
1878
1.57k
            }
1879
1.57k
        }
1880
1.35M
        auto* subcolumn = column_variant.get_subcolumn(path, index_hint);
1881
1.35M
        if (!subcolumn) {
1882
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Failed to find sub column {}",
1883
0
                                   path.get_path());
1884
0
        }
1885
1.35M
        return subcolumn;
1886
1.35M
    };
1887
1888
80.0k
    auto insert_into_subcolumn = [&](size_t i,
1889
1.35M
                                     bool check_size_mismatch) -> ColumnVariant::Subcolumn* {
1890
1.35M
        FieldInfo field_info;
1891
1.35M
        get_field_info(values[i], &field_info);
1892
1.35M
        if (field_info.scalar_type_id == PrimitiveType::INVALID_TYPE) {
1893
101
            return nullptr;
1894
101
        }
1895
1.35M
        auto* subcolumn = get_or_create_subcolumn(paths[i], i, field_info);
1896
1.35M
        flush_defaults(subcolumn);
1897
1.35M
        if (check_size_mismatch && subcolumn->size() != old_num_rows) {
1898
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
1899
0
                                   "subcolumn {} size missmatched, may contains duplicated entry",
1900
0
                                   paths[i].get_path());
1901
0
        }
1902
1.35M
        subcolumn->insert(std::move(values[i]), std::move(field_info));
1903
1.35M
        return subcolumn;
1904
1.35M
    };
1905
1906
80.0k
    switch (config.parse_to) {
1907
79.0k
    case ParseConfig::ParseTo::OnlySubcolumns:
1908
1.43M
        for (size_t i = 0; i < paths.size(); ++i) {
1909
1.35M
            insert_into_subcolumn(i, true);
1910
1.35M
        }
1911
79.0k
        break;
1912
1.01k
    case ParseConfig::ParseTo::OnlyDocValueColumn: {
1913
1.01k
        std::vector<size_t> doc_item_indexes;
1914
1.01k
        doc_item_indexes.reserve(paths.size());
1915
1.01k
        phmap::flat_hash_set<StringRef, StringRefHash> seen_paths;
1916
1.01k
        seen_paths.reserve(paths.size());
1917
1918
5.67k
        for (size_t i = 0; i < paths.size(); ++i) {
1919
4.66k
            FieldInfo field_info;
1920
4.66k
            get_field_info(values[i], &field_info);
1921
4.66k
            if (paths[i].empty()) {
1922
0
                auto* subcolumn = column_variant.get_subcolumn(paths[i]);
1923
0
                DCHECK(subcolumn != nullptr);
1924
0
                flush_defaults(subcolumn);
1925
0
                subcolumn->insert(std::move(values[i]), std::move(field_info));
1926
0
                continue;
1927
0
            }
1928
4.66k
            if (field_info.scalar_type_id == PrimitiveType::INVALID_TYPE ||
1929
4.66k
                values[i].get_type() == PrimitiveType::TYPE_NULL) {
1930
0
                continue;
1931
0
            }
1932
4.66k
            const auto& path_str = paths[i].get_path();
1933
4.66k
            StringRef path_ref {path_str.data(), path_str.size()};
1934
4.66k
            if (UNLIKELY(!seen_paths.emplace(path_ref).second)) {
1935
0
                throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
1936
0
                                       "may contains duplicated entry : {}",
1937
0
                                       std::string_view(path_str));
1938
0
            }
1939
4.66k
            doc_item_indexes.push_back(i);
1940
4.66k
        }
1941
1942
1.01k
        std::sort(doc_item_indexes.begin(), doc_item_indexes.end(),
1943
7.36k
                  [&](size_t l, size_t r) { return paths[l].get_path() < paths[r].get_path(); });
1944
4.66k
        for (const auto idx : doc_item_indexes) {
1945
4.66k
            const auto& path_str = paths[idx].get_path();
1946
4.66k
            doc_value_data_paths->insert_data(path_str.data(), path_str.size());
1947
4.66k
            auto& chars = doc_value_data_values->get_chars();
1948
4.66k
            append_field_to_binary_chars(values[idx], chars);
1949
4.66k
            doc_value_data_values->get_offsets().push_back(chars.size());
1950
4.66k
        }
1951
1.01k
    } break;
1952
80.0k
    }
1953
80.0k
    doc_value_data_offsets.push_back(doc_value_data_paths->size());
1954
    // /// Insert default values to missed subcolumns.
1955
80.0k
    const auto& subcolumns = column_variant.get_subcolumns();
1956
1.73M
    for (const auto& entry : subcolumns) {
1957
1.73M
        if (entry->data.size() == old_num_rows) {
1958
            // Handle nested paths differently from simple paths
1959
384k
            if (entry->path.has_nested_part()) {
1960
                // Try to insert default from nested, if failed, insert regular default
1961
0
                bool success = UNLIKELY(column_variant.try_insert_default_from_nested(entry));
1962
0
                if (!success) {
1963
0
                    entry->data.insert_default();
1964
0
                }
1965
384k
            } else {
1966
                // For non-nested paths, increment default counter
1967
384k
                entry->data.increment_default_counter();
1968
384k
            }
1969
384k
        }
1970
1.73M
    }
1971
80.0k
    column_variant.incr_num_rows();
1972
80.0k
    auto sparse_column = column_variant.get_sparse_column();
1973
80.0k
    if (sparse_column->size() == old_num_rows) {
1974
80.0k
        sparse_column->assume_mutable()->insert_default();
1975
80.0k
    }
1976
80.0k
#ifndef NDEBUG
1977
80.0k
    column_variant.check_consistency();
1978
80.0k
#endif
1979
80.0k
}
1980
1981
// exposed interfaces
1982
void parse_json_to_variant(IColumn& column, const StringRef& json, JsonParser* parser,
1983
12.4k
                           const ParseConfig& config) {
1984
12.4k
    if (parser) {
1985
0
        return parse_json_to_variant_impl(column, json.data, json.size, parser, config);
1986
12.4k
    } else {
1987
12.4k
        auto pool_parser = parsers_pool.get([] { return new JsonParser(); });
1988
12.4k
        return parse_json_to_variant_impl(column, json.data, json.size, pool_parser.get(), config);
1989
12.4k
    }
1990
12.4k
}
1991
1992
void parse_json_to_variant(IColumn& column, const ColumnString& raw_json_column,
1993
201
                           const ParseConfig& config) {
1994
201
    auto parser = parsers_pool.get([] { return new JsonParser(); });
1995
68.3k
    for (size_t i = 0; i < raw_json_column.size(); ++i) {
1996
68.1k
        StringRef raw_json = raw_json_column.get_data_at(i);
1997
68.1k
        parse_json_to_variant_impl(column, raw_json.data, raw_json.size, parser.get(), config);
1998
68.1k
    }
1999
201
    column.finalize();
2000
201
}
2001
2002
// parse the doc snapshot column to subcolumns
2003
0
void materialize_docs_to_subcolumns(ColumnVariant& column_variant) {
2004
0
    auto subcolumns = materialize_docs_to_subcolumns_map(column_variant);
2005
2006
0
    for (auto& entry : subcolumns) {
2007
0
        entry.second.finalize();
2008
0
        if (!column_variant.add_sub_column(PathInData(entry.first),
2009
0
                                           IColumn::mutate(entry.second.get_finalized_column_ptr()),
2010
0
                                           entry.second.get_least_common_type())) {
2011
0
            throw doris::Exception(ErrorCode::INTERNAL_ERROR,
2012
0
                                   "Failed to add subcolumn {}, which is from doc snapshot column",
2013
0
                                   entry.first);
2014
0
        }
2015
0
    }
2016
2017
0
    column_variant.finalize();
2018
0
}
2019
2020
// ============ Implementation from variant_util.cpp ============
2021
2022
phmap::flat_hash_map<std::string_view, ColumnVariant::Subcolumn> materialize_docs_to_subcolumns_map(
2023
3
        const ColumnVariant& variant) {
2024
3
    phmap::flat_hash_map<std::string_view, ColumnVariant::Subcolumn> subcolumns;
2025
2026
3
    const auto [column_key, column_value] = variant.get_doc_value_data_paths_and_values();
2027
3
    const auto& column_offsets = variant.serialized_doc_value_column_offsets();
2028
3
    const size_t num_rows = column_offsets.size();
2029
2030
3
    DCHECK_EQ(num_rows, variant.size()) << "doc snapshot offsets size mismatch with variant rows";
2031
2032
    // Best-effort reserve: at most number of kv pairs.
2033
3
    subcolumns.reserve(column_key->size());
2034
2035
10
    for (size_t row = 0; row < num_rows; ++row) {
2036
7
        const size_t start = (row == 0) ? 0 : column_offsets[row - 1];
2037
7
        const size_t end = column_offsets[row];
2038
24
        for (size_t i = start; i < end; ++i) {
2039
17
            const auto& key = column_key->get_data_at(i);
2040
17
            const std::string_view path_sv(key.data, key.size);
2041
2042
17
            auto [it, inserted] =
2043
17
                    subcolumns.try_emplace(path_sv, ColumnVariant::Subcolumn {0, true, false});
2044
17
            auto& subcolumn = it->second;
2045
17
            if (inserted) {
2046
10
                subcolumn.insert_many_defaults(row);
2047
10
            } else if (subcolumn.size() != row) {
2048
1
                subcolumn.insert_many_defaults(row - subcolumn.size());
2049
1
            }
2050
17
            subcolumn.deserialize_from_binary_column(column_value, i);
2051
17
        }
2052
7
    }
2053
2054
10
    for (auto& [path, subcolumn] : subcolumns) {
2055
10
        if (subcolumn.size() != num_rows) {
2056
4
            subcolumn.insert_many_defaults(num_rows - subcolumn.size());
2057
4
        }
2058
10
    }
2059
2060
3
    return subcolumns;
2061
3
}
2062
2063
Status _parse_and_materialize_variant_columns(Block& block,
2064
                                              const std::vector<uint32_t>& variant_pos,
2065
144
                                              const std::vector<ParseConfig>& configs) {
2066
423
    for (size_t i = 0; i < variant_pos.size(); ++i) {
2067
279
        auto column_ref = block.get_by_position(variant_pos[i]).column;
2068
279
        bool is_nullable = column_ref->is_nullable();
2069
279
        MutableColumnPtr var_column = column_ref->assume_mutable();
2070
279
        if (is_nullable) {
2071
1
            const auto& nullable = assert_cast<const ColumnNullable&>(*column_ref);
2072
1
            var_column = nullable.get_nested_column_ptr()->assume_mutable();
2073
1
        }
2074
279
        auto& var = assert_cast<ColumnVariant&>(*var_column);
2075
279
        var_column->finalize();
2076
2077
279
        MutableColumnPtr variant_column;
2078
279
        if (!var.is_scalar_variant()) {
2079
            // already parsed
2080
273
            continue;
2081
273
        }
2082
2083
6
        VLOG_DEBUG << "parse scalar variant column: " << var.get_root_type()->get_name();
2084
6
        ColumnPtr scalar_root_column;
2085
6
        if (var.get_root_type()->get_primitive_type() == TYPE_JSONB) {
2086
            // TODO more efficient way to parse jsonb type, currently we just convert jsonb to
2087
            // json str and parse them into variant
2088
1
            RETURN_IF_ERROR(cast_column({var.get_root(), var.get_root_type(), ""},
2089
1
                                        var.get_root()->is_nullable()
2090
1
                                                ? make_nullable(std::make_shared<DataTypeString>())
2091
1
                                                : std::make_shared<DataTypeString>(),
2092
1
                                        &scalar_root_column));
2093
1
            if (scalar_root_column->is_nullable()) {
2094
1
                scalar_root_column = assert_cast<const ColumnNullable*>(scalar_root_column.get())
2095
1
                                             ->get_nested_column_ptr();
2096
1
            }
2097
5
        } else {
2098
5
            const auto& root = *var.get_root();
2099
5
            scalar_root_column =
2100
5
                    root.is_nullable()
2101
5
                            ? assert_cast<const ColumnNullable&>(root).get_nested_column_ptr()
2102
5
                            : var.get_root();
2103
5
        }
2104
2105
6
        if (scalar_root_column->is_column_string()) {
2106
6
            variant_column = ColumnVariant::create(0);
2107
6
            parse_json_to_variant(*variant_column.get(),
2108
6
                                  assert_cast<const ColumnString&>(*scalar_root_column),
2109
6
                                  configs[i]);
2110
6
        } else {
2111
            // Root maybe other types rather than string like ColumnVariant(Int32).
2112
            // In this case, we should finlize the root and cast to JSON type
2113
0
            auto expected_root_type =
2114
0
                    make_nullable(std::make_shared<ColumnVariant::MostCommonType>());
2115
0
            var.ensure_root_node_type(expected_root_type);
2116
0
            variant_column = var.assume_mutable();
2117
0
        }
2118
2119
        // Wrap variant with nullmap if it is nullable
2120
6
        ColumnPtr result = variant_column->get_ptr();
2121
6
        if (is_nullable) {
2122
1
            const auto& null_map =
2123
1
                    assert_cast<const ColumnNullable&>(*column_ref).get_null_map_column_ptr();
2124
1
            result = ColumnNullable::create(result, null_map);
2125
1
        }
2126
6
        block.get_by_position(variant_pos[i]).column = result;
2127
6
    }
2128
144
    return Status::OK();
2129
144
}
2130
2131
Status parse_and_materialize_variant_columns(Block& block, const std::vector<uint32_t>& variant_pos,
2132
144
                                             const std::vector<ParseConfig>& configs) {
2133
144
    RETURN_IF_CATCH_EXCEPTION(
2134
144
            { return _parse_and_materialize_variant_columns(block, variant_pos, configs); });
2135
144
}
2136
2137
Status parse_and_materialize_variant_columns(Block& block, const TabletSchema& tablet_schema,
2138
136
                                             const std::vector<uint32_t>& column_pos) {
2139
136
    std::vector<uint32_t> variant_column_pos;
2140
136
    std::vector<uint32_t> variant_schema_pos;
2141
136
    variant_column_pos.reserve(column_pos.size());
2142
136
    variant_schema_pos.reserve(column_pos.size());
2143
812
    for (size_t block_pos = 0; block_pos < column_pos.size(); ++block_pos) {
2144
676
        const uint32_t schema_pos = column_pos[block_pos];
2145
676
        const auto& column = tablet_schema.column(schema_pos);
2146
676
        if (column.is_variant_type()) {
2147
271
            variant_column_pos.push_back(schema_pos);
2148
271
            variant_schema_pos.push_back(schema_pos);
2149
271
        }
2150
676
    }
2151
2152
136
    if (variant_column_pos.empty()) {
2153
0
        return Status::OK();
2154
0
    }
2155
2156
136
    std::vector<ParseConfig> configs(variant_column_pos.size());
2157
407
    for (size_t i = 0; i < variant_column_pos.size(); ++i) {
2158
271
        configs[i].enable_flatten_nested = tablet_schema.variant_flatten_nested();
2159
271
        const auto& column = tablet_schema.column(variant_schema_pos[i]);
2160
271
        if (!column.is_variant_type()) {
2161
0
            return Status::InternalError("column is not variant type, column name: {}",
2162
0
                                         column.name());
2163
0
        }
2164
        // if doc mode is not enabled, no need to parse to doc value column
2165
271
        if (!column.variant_enable_doc_mode()) {
2166
271
            configs[i].parse_to = ParseConfig::ParseTo::OnlySubcolumns;
2167
271
            continue;
2168
271
        }
2169
2170
0
        configs[i].parse_to = ParseConfig::ParseTo::OnlyDocValueColumn;
2171
0
    }
2172
2173
136
    RETURN_IF_ERROR(parse_and_materialize_variant_columns(block, variant_column_pos, configs));
2174
136
    return Status::OK();
2175
136
}
2176
2177
#include "common/compile_check_end.h"
2178
} // namespace doris::variant_util