be/src/exprs/function/cast/cast_to_variant.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #pragma once |
19 | | |
20 | | #include "core/column/column_nullable.h" |
21 | | #include "core/data_type/data_type_variant.h" |
22 | | #include "exprs/function/cast/cast_base.h" |
23 | | #include "exprs/function/cast/cast_to_string.h" |
24 | | |
25 | | namespace doris::CastWrapper { |
26 | | |
27 | | // shared implementation for casting from variant to arbitrary non-nullable target type |
28 | | inline Status cast_from_variant_impl(FunctionContext* context, Block& block, |
29 | | const ColumnNumbers& arguments, uint32_t result, |
30 | | size_t input_rows_count, const NullMap::value_type* null_map, |
31 | 10 | const DataTypePtr& data_type_to) { |
32 | 10 | const auto& col_with_type_and_name = block.get_by_position(arguments[0]); |
33 | 10 | const auto& col_from = col_with_type_and_name.column; |
34 | 10 | const IColumn* variant_column = col_from.get(); |
35 | 10 | if (const auto* nullable = check_and_get_column<ColumnNullable>(*variant_column)) { |
36 | 0 | variant_column = &nullable->get_nested_column(); |
37 | 0 | } |
38 | 10 | const auto& variant = assert_cast<const ColumnVariant&>(*variant_column); |
39 | 10 | ColumnPtr col_to = data_type_to->create_column(); |
40 | | |
41 | 10 | if (!variant.is_finalized()) { |
42 | | // ColumnVariant should be finalized before parsing, finalize maybe modify original column structure |
43 | 0 | variant.assume_mutable()->finalize(); |
44 | 0 | } |
45 | | |
46 | | // It's important to convert as many elements as possible in this context. For instance, |
47 | | // if the root of this variant column is a number column, converting it to a number column |
48 | | // is acceptable. However, if the destination type is a string and root is none scalar root, then |
49 | | // we should convert the entire tree to a string. |
50 | 10 | bool is_root_valuable = variant.is_scalar_variant() || |
51 | 10 | (!variant.is_null_root() && |
52 | 4 | variant.get_root_type()->get_primitive_type() != INVALID_TYPE && |
53 | 4 | !is_string_type(data_type_to->get_primitive_type()) && |
54 | 4 | data_type_to->get_primitive_type() != TYPE_JSONB); |
55 | | |
56 | 10 | if (is_root_valuable) { |
57 | 6 | ColumnPtr nested = variant.get_root(); |
58 | 6 | auto nested_from_type = variant.get_root_type(); |
59 | | // DCHECK(nested_from_type->is_nullable()); |
60 | 6 | DCHECK(!data_type_to->is_nullable()); |
61 | 6 | auto new_context = context == nullptr ? nullptr : context->clone(); |
62 | 6 | if (new_context != nullptr) { |
63 | 6 | new_context->set_jsonb_string_as_string(true); |
64 | | // Disable strict mode for the inner JSONBātarget conversion. |
65 | | // The variant root column may contain null/empty JSONB entries for rows |
66 | | // where the subcolumn doesn't exist (e.g., mixed-schema variant data). |
67 | | // In strict mode (INSERT context), these null entries cause the ENTIRE |
68 | | // cast to fail and return all NULLs. Since this is an internal type |
69 | | // conversion within variant, not user-provided INSERT data validation, |
70 | | // strict mode should not apply here. |
71 | 6 | new_context->set_enable_strict_mode(false); |
72 | 6 | } |
73 | | // dst type nullable has been removed, so we should remove the inner nullable of root column |
74 | 6 | auto wrapper = |
75 | 6 | prepare_impl(new_context.get(), remove_nullable(nested_from_type), data_type_to); |
76 | 6 | Block tmp_block {{remove_nullable(nested), remove_nullable(nested_from_type), ""}}; |
77 | 6 | tmp_block.insert({nullptr, data_type_to, ""}); |
78 | | /// Perform the requested conversion. |
79 | 6 | Status st = wrapper(new_context.get(), tmp_block, {0}, 1, input_rows_count, nullptr); |
80 | 6 | if (!st.ok()) { |
81 | | // Fill with default values, which is null |
82 | 0 | col_to->assume_mutable()->insert_many_defaults(input_rows_count); |
83 | 0 | col_to = make_nullable(col_to, true); |
84 | 6 | } else { |
85 | 6 | col_to = tmp_block.get_by_position(1).column; |
86 | 6 | col_to = wrap_in_nullable(col_to, |
87 | 6 | Block({{nested, nested_from_type, ""}, |
88 | 6 | {col_from, col_with_type_and_name.type, ""}, |
89 | 6 | {col_to, data_type_to, ""}}), |
90 | 6 | {0, 1}, input_rows_count); |
91 | 6 | } |
92 | 6 | } else { |
93 | 4 | if (variant.only_have_default_values()) { |
94 | 0 | col_to->assume_mutable()->insert_many_defaults(input_rows_count); |
95 | 0 | col_to = make_nullable(col_to, true); |
96 | 4 | } else if (is_string_type(data_type_to->get_primitive_type())) { |
97 | | // serialize to string |
98 | 2 | return CastToStringFunction::execute_impl(context, block, arguments, result, |
99 | 2 | input_rows_count); |
100 | 2 | } else if (data_type_to->get_primitive_type() == TYPE_JSONB) { |
101 | | // serialize to json by parsing |
102 | 0 | return cast_from_generic_to_jsonb(context, block, arguments, result, input_rows_count); |
103 | 2 | } else if (!data_type_to->is_nullable() && |
104 | 2 | !is_string_type(data_type_to->get_primitive_type())) { |
105 | | // other types |
106 | 2 | col_to->assume_mutable()->insert_many_defaults(input_rows_count); |
107 | 2 | col_to = make_nullable(col_to, true); |
108 | 2 | } else { |
109 | 0 | assert_cast<ColumnNullable&>(*col_to->assume_mutable()) |
110 | 0 | .insert_many_defaults(input_rows_count); |
111 | 0 | } |
112 | 4 | } |
113 | | |
114 | 8 | if (null_map == nullptr) { |
115 | 8 | if (const auto* nullable_result = check_and_get_column<ColumnNullable>(*col_to); |
116 | 8 | nullable_result != nullptr && !nullable_result->has_null()) { |
117 | 3 | col_to = nullable_result->get_nested_column_ptr(); |
118 | 3 | } |
119 | 8 | } |
120 | | |
121 | 8 | if (col_to->size() != input_rows_count) { |
122 | 0 | return Status::InternalError("Unmatched row count {}, expected {}", col_to->size(), |
123 | 0 | input_rows_count); |
124 | 0 | } |
125 | | |
126 | 8 | block.replace_by_position(result, std::move(col_to)); |
127 | 8 | return Status::OK(); |
128 | 8 | } |
129 | | |
130 | | struct CastFromVariant { |
131 | | static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
132 | | uint32_t result, size_t input_rows_count, |
133 | 0 | const NullMap::value_type* null_map = nullptr) { |
134 | 0 | auto& data_type_to = block.get_by_position(result).type; |
135 | 0 | return cast_from_variant_impl(context, block, arguments, result, input_rows_count, null_map, |
136 | 0 | data_type_to); |
137 | 0 | } |
138 | | }; |
139 | | |
140 | | struct CastToVariant { |
141 | | static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
142 | | uint32_t result, size_t input_rows_count, |
143 | 3 | const NullMap::value_type* null_map = nullptr) { |
144 | | // auto& data_type_to = block.get_by_position(result).type; |
145 | 3 | const auto& col_with_type_and_name = block.get_by_position(arguments[0]); |
146 | 3 | const auto& from_type = col_with_type_and_name.type; |
147 | 3 | const auto& col_from = col_with_type_and_name.column; |
148 | | // set variant root column/type to from column/type |
149 | 3 | const auto& data_type_to = block.get_by_position(result).type; |
150 | 3 | const auto* variant_type = |
151 | 3 | typeid_cast<const DataTypeVariant*>(remove_nullable(data_type_to).get()); |
152 | 3 | auto variant = ColumnVariant::create( |
153 | 3 | variant_type ? variant_type->variant_max_subcolumns_count() : 0, |
154 | 3 | variant_type ? variant_type->enable_doc_mode() : false); |
155 | 3 | variant->create_root(from_type, col_from->assume_mutable()); |
156 | 3 | block.replace_by_position(result, std::move(variant)); |
157 | 3 | return Status::OK(); |
158 | 3 | } |
159 | | }; |
160 | | |
161 | | // create corresponding variant value to wrap from_type |
162 | | WrapperType create_cast_to_variant_wrapper(const DataTypePtr& from_type, |
163 | 3 | const DataTypeVariant& to_type) { |
164 | 3 | if (from_type->get_primitive_type() == TYPE_VARIANT) { |
165 | | // variant_max_subcolumns_count is not equal |
166 | 0 | return create_unsupport_wrapper(from_type->get_name(), to_type.get_name()); |
167 | 0 | } |
168 | 3 | return &CastToVariant::execute; |
169 | 3 | } |
170 | | |
171 | | // create corresponding type convert from variant |
172 | | WrapperType create_cast_from_variant_wrapper(const DataTypeVariant& from_type, |
173 | 10 | const DataTypePtr& to_type) { |
174 | 10 | if (to_type->get_primitive_type() == TYPE_VARIANT) { |
175 | | // variant_max_subcolumns_count is not equal |
176 | 0 | return create_unsupport_wrapper(from_type.get_name(), to_type->get_name()); |
177 | 0 | } |
178 | | // Capture explicit target type to make the cast independent from Block[result].type. |
179 | 10 | DataTypePtr captured_to_type = to_type; |
180 | 10 | return [captured_to_type](FunctionContext* context, Block& block, |
181 | 10 | const ColumnNumbers& arguments, uint32_t result, |
182 | 10 | size_t input_rows_count, |
183 | 10 | const NullMap::value_type* null_map) -> Status { |
184 | 10 | return cast_from_variant_impl(context, block, arguments, result, input_rows_count, null_map, |
185 | 10 | captured_to_type); |
186 | 10 | }; |
187 | 10 | } |
188 | | |
189 | | } // namespace doris::CastWrapper |