Coverage Report

Created: 2026-05-26 10:40

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/function/cast/cast_to_variant.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include "core/column/column_nullable.h"
21
#include "core/data_type/data_type_variant.h"
22
#include "exprs/function/cast/cast_base.h"
23
#include "exprs/function/cast/cast_to_string.h"
24
25
namespace doris::CastWrapper {
26
27
// shared implementation for casting from variant to arbitrary non-nullable target type
28
inline Status cast_from_variant_impl(FunctionContext* context, Block& block,
29
                                     const ColumnNumbers& arguments, uint32_t result,
30
                                     size_t input_rows_count, const NullMap::value_type* null_map,
31
11
                                     const DataTypePtr& data_type_to) {
32
11
    auto& col_with_type_and_name = block.get_by_position(arguments[0]);
33
11
    auto& col_from = col_with_type_and_name.column;
34
11
    const IColumn* variant_column = col_from.get();
35
11
    if (const auto* nullable = check_and_get_column<ColumnNullable>(*variant_column)) {
36
0
        variant_column = &nullable->get_nested_column();
37
0
    }
38
39
11
    if (!assert_cast<const ColumnVariant&>(*variant_column).is_finalized()) {
40
        // ColumnVariant should be finalized before parsing, finalize maybe modify original column structure
41
0
        auto mutable_column = IColumn::mutate(std::move(col_with_type_and_name.column));
42
0
        if (auto* nullable = check_and_get_column<ColumnNullable>(*mutable_column)) {
43
0
            const auto& const_nullable = *nullable;
44
0
            auto nested_column = IColumn::mutate(const_nullable.get_nested_column_ptr());
45
0
            assert_cast<ColumnVariant&>(*nested_column).finalize();
46
0
            ColumnPtr nested_column_ptr = std::move(nested_column);
47
0
            nullable->change_nested_column(nested_column_ptr);
48
0
        } else {
49
0
            assert_cast<ColumnVariant&>(*mutable_column).finalize();
50
0
        }
51
0
        col_with_type_and_name.column = std::move(mutable_column);
52
0
    }
53
54
11
    variant_column = col_with_type_and_name.column.get();
55
11
    if (const auto* nullable = check_and_get_column<ColumnNullable>(*variant_column)) {
56
0
        variant_column = &nullable->get_nested_column();
57
0
    }
58
11
    const auto& variant = assert_cast<const ColumnVariant&>(*variant_column);
59
11
    ColumnPtr col_to = data_type_to->create_column();
60
61
    // It's important to convert as many elements as possible in this context. For instance,
62
    // if the root of this variant column is a number column, converting it to a number column
63
    // is acceptable. However, if the destination type is a string and root is none scalar root, then
64
    // we should convert the entire tree to a string.
65
11
    bool is_root_valuable = variant.is_scalar_variant() ||
66
11
                            (!variant.is_null_root() &&
67
5
                             variant.get_root_type()->get_primitive_type() != INVALID_TYPE &&
68
5
                             !is_string_type(data_type_to->get_primitive_type()) &&
69
5
                             data_type_to->get_primitive_type() != TYPE_JSONB);
70
71
11
    if (is_root_valuable) {
72
6
        ColumnPtr nested = variant.get_root();
73
6
        auto nested_from_type = variant.get_root_type();
74
        // DCHECK(nested_from_type->is_nullable());
75
6
        DCHECK(!data_type_to->is_nullable());
76
6
        auto new_context = context == nullptr ? nullptr : context->clone();
77
6
        if (new_context != nullptr) {
78
6
            new_context->set_jsonb_string_as_string(true);
79
            // Disable strict mode for the inner JSONB→target conversion.
80
            // The variant root column may contain null/empty JSONB entries for rows
81
            // where the subcolumn doesn't exist (e.g., mixed-schema variant data).
82
            // In strict mode (INSERT context), these null entries cause the ENTIRE
83
            // cast to fail and return all NULLs. Since this is an internal type
84
            // conversion within variant, not user-provided INSERT data validation,
85
            // strict mode should not apply here.
86
6
            new_context->set_enable_strict_mode(false);
87
6
        }
88
        // dst type nullable has been removed, so we should remove the inner nullable of root column
89
6
        auto wrapper =
90
6
                prepare_impl(new_context.get(), remove_nullable(nested_from_type), data_type_to);
91
6
        Block tmp_block {{remove_nullable(nested), remove_nullable(nested_from_type), ""}};
92
6
        tmp_block.insert({nullptr, data_type_to, ""});
93
        /// Perform the requested conversion.
94
6
        Status st = wrapper(new_context.get(), tmp_block, {0}, 1, input_rows_count, nullptr);
95
6
        if (!st.ok()) {
96
            // Fill with default values, which is null
97
0
            col_to->assert_mutable()->insert_many_defaults(input_rows_count);
98
0
            col_to = make_nullable(col_to, true);
99
6
        } else {
100
6
            col_to = tmp_block.get_by_position(1).column;
101
6
            col_to = wrap_in_nullable(col_to,
102
6
                                      Block({{nested, nested_from_type, ""},
103
6
                                             {col_from, col_with_type_and_name.type, ""},
104
6
                                             {col_to, data_type_to, ""}}),
105
6
                                      {0, 1}, input_rows_count);
106
6
        }
107
6
    } else {
108
5
        if (variant.only_have_default_values()) {
109
0
            col_to->assert_mutable()->insert_many_defaults(input_rows_count);
110
0
            col_to = make_nullable(col_to, true);
111
5
        } else if (is_string_type(data_type_to->get_primitive_type())) {
112
            // serialize to string
113
3
            return CastToStringFunction::execute_impl(context, block, arguments, result,
114
3
                                                      input_rows_count);
115
3
        } else if (data_type_to->get_primitive_type() == TYPE_JSONB) {
116
            // serialize to json by parsing
117
0
            return cast_from_generic_to_jsonb(context, block, arguments, result, input_rows_count);
118
2
        } else if (!data_type_to->is_nullable() &&
119
2
                   !is_string_type(data_type_to->get_primitive_type())) {
120
            // other types
121
2
            col_to->assert_mutable()->insert_many_defaults(input_rows_count);
122
2
            col_to = make_nullable(col_to, true);
123
2
        } else {
124
0
            assert_cast<ColumnNullable&>(*col_to->assert_mutable())
125
0
                    .insert_many_defaults(input_rows_count);
126
0
        }
127
5
    }
128
129
8
    if (null_map == nullptr) {
130
8
        if (const auto* nullable_result = check_and_get_column<ColumnNullable>(*col_to);
131
8
            nullable_result != nullptr && !nullable_result->has_null()) {
132
3
            col_to = nullable_result->get_nested_column_ptr();
133
3
        }
134
8
    }
135
136
8
    if (col_to->size() != input_rows_count) {
137
0
        return Status::InternalError("Unmatched row count {}, expected {}", col_to->size(),
138
0
                                     input_rows_count);
139
0
    }
140
141
8
    block.replace_by_position(result, std::move(col_to));
142
8
    return Status::OK();
143
8
}
144
145
struct CastFromVariant {
146
    static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
147
                          uint32_t result, size_t input_rows_count,
148
0
                          const NullMap::value_type* null_map = nullptr) {
149
0
        auto& data_type_to = block.get_by_position(result).type;
150
0
        return cast_from_variant_impl(context, block, arguments, result, input_rows_count, null_map,
151
0
                                      data_type_to);
152
0
    }
153
};
154
155
struct CastToVariant {
156
    static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
157
                          uint32_t result, size_t input_rows_count,
158
3
                          const NullMap::value_type* null_map = nullptr) {
159
        // auto& data_type_to = block.get_by_position(result).type;
160
3
        const auto& col_with_type_and_name = block.get_by_position(arguments[0]);
161
3
        const auto& from_type = col_with_type_and_name.type;
162
3
        const auto& col_from = col_with_type_and_name.column;
163
        // set variant root column/type to from column/type
164
3
        const auto& data_type_to = block.get_by_position(result).type;
165
3
        const auto* variant_type =
166
3
                typeid_cast<const DataTypeVariant*>(remove_nullable(data_type_to).get());
167
3
        auto variant = ColumnVariant::create(
168
3
                variant_type ? variant_type->variant_max_subcolumns_count() : 0,
169
3
                variant_type ? variant_type->enable_doc_mode() : false);
170
3
        variant->create_root(from_type, IColumn::mutate(col_from));
171
3
        block.replace_by_position(result, std::move(variant));
172
3
        return Status::OK();
173
3
    }
174
};
175
176
// create corresponding variant value to wrap from_type
177
WrapperType create_cast_to_variant_wrapper(const DataTypePtr& from_type,
178
3
                                           const DataTypeVariant& to_type) {
179
3
    if (from_type->get_primitive_type() == TYPE_VARIANT) {
180
        // variant_max_subcolumns_count is not equal
181
0
        return create_unsupport_wrapper(from_type->get_name(), to_type.get_name());
182
0
    }
183
3
    return &CastToVariant::execute;
184
3
}
185
186
// create corresponding type convert from variant
187
WrapperType create_cast_from_variant_wrapper(const DataTypeVariant& from_type,
188
11
                                             const DataTypePtr& to_type) {
189
11
    if (to_type->get_primitive_type() == TYPE_VARIANT) {
190
        // variant_max_subcolumns_count is not equal
191
0
        return create_unsupport_wrapper(from_type.get_name(), to_type->get_name());
192
0
    }
193
    // Capture explicit target type to make the cast independent from Block[result].type.
194
11
    DataTypePtr captured_to_type = to_type;
195
11
    return [captured_to_type](FunctionContext* context, Block& block,
196
11
                              const ColumnNumbers& arguments, uint32_t result,
197
11
                              size_t input_rows_count,
198
11
                              const NullMap::value_type* null_map) -> Status {
199
11
        return cast_from_variant_impl(context, block, arguments, result, input_rows_count, null_map,
200
11
                                      captured_to_type);
201
11
    };
202
11
}
203
204
} // namespace doris::CastWrapper