be/src/exprs/function/cast/cast_to_variant.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #pragma once |
19 | | |
20 | | #include "core/column/column_nullable.h" |
21 | | #include "core/data_type/data_type_variant.h" |
22 | | #include "exprs/function/cast/cast_base.h" |
23 | | #include "exprs/function/cast/cast_to_string.h" |
24 | | |
25 | | namespace doris::CastWrapper { |
26 | | |
27 | | // shared implementation for casting from variant to arbitrary non-nullable target type |
28 | | inline Status cast_from_variant_impl(FunctionContext* context, Block& block, |
29 | | const ColumnNumbers& arguments, uint32_t result, |
30 | | size_t input_rows_count, const NullMap::value_type* null_map, |
31 | 13 | const DataTypePtr& data_type_to) { |
32 | 13 | auto& col_with_type_and_name = block.get_by_position(arguments[0]); |
33 | 13 | auto& col_from = col_with_type_and_name.column; |
34 | 13 | const IColumn* variant_column = col_from.get(); |
35 | 13 | const auto* nullable = check_and_get_column<ColumnNullable>(*variant_column); |
36 | 13 | if (nullable != nullptr) { |
37 | 0 | variant_column = &nullable->get_nested_column(); |
38 | 0 | } |
39 | 13 | const auto* variant = assert_cast<const ColumnVariant*>(variant_column); |
40 | 13 | ColumnPtr col_to = data_type_to->create_column(); |
41 | | |
42 | 13 | ColumnPtr finalized_input_column; |
43 | 13 | if (!variant->is_finalized()) { |
44 | | // Local exchange can share the same input block across multiple downstream tasks. |
45 | | // Finalize a private copy so variant casts never mutate shared input columns. |
46 | 2 | auto finalized_variant = variant->clone_finalized(); |
47 | 2 | variant = assert_cast<const ColumnVariant*>(finalized_variant.get()); |
48 | 2 | if (nullable != nullptr) { |
49 | 0 | auto cloned_null_map = |
50 | 0 | nullable->get_null_map_column_ptr()->clone_resized(input_rows_count); |
51 | 0 | finalized_input_column = ColumnNullable::create(std::move(finalized_variant), |
52 | 0 | std::move(cloned_null_map)); |
53 | 2 | } else { |
54 | 2 | finalized_input_column = std::move(finalized_variant); |
55 | 2 | } |
56 | 2 | } |
57 | 13 | auto execute_on_finalized_input = [&](auto&& executor) -> Status { |
58 | 4 | if (!finalized_input_column) { |
59 | 3 | return executor(block); |
60 | 3 | } |
61 | 1 | Block finalized_block = block; |
62 | 1 | finalized_block.replace_by_position(arguments[0], finalized_input_column); |
63 | 1 | RETURN_IF_ERROR(executor(finalized_block)); |
64 | 1 | block.replace_by_position(result, finalized_block.get_by_position(result).column); |
65 | 1 | return Status::OK(); |
66 | 1 | }; _ZZN5doris11CastWrapper22cast_from_variant_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmPKhRKSt10shared_ptrIKNS_9IDataTypeEEENKUlOT_E_clIZNS0_22cast_from_variant_implES2_S4_S9_jmSB_SH_EUlS4_E_EENS_6StatusESJ_ Line | Count | Source | 57 | 4 | auto execute_on_finalized_input = [&](auto&& executor) -> Status { | 58 | 4 | if (!finalized_input_column) { | 59 | 3 | return executor(block); | 60 | 3 | } | 61 | 1 | Block finalized_block = block; | 62 | 1 | finalized_block.replace_by_position(arguments[0], finalized_input_column); | 63 | 1 | RETURN_IF_ERROR(executor(finalized_block)); | 64 | 1 | block.replace_by_position(result, finalized_block.get_by_position(result).column); | 65 | 1 | return Status::OK(); | 66 | 1 | }; |
Unexecuted instantiation: _ZZN5doris11CastWrapper22cast_from_variant_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmPKhRKSt10shared_ptrIKNS_9IDataTypeEEENKUlOT_E_clIZNS0_22cast_from_variant_implES2_S4_S9_jmSB_SH_EUlS4_E0_EENS_6StatusESJ_ |
67 | | |
68 | | // It's important to convert as many elements as possible in this context. For instance, |
69 | | // if the root of this variant column is a number column, converting it to a number column |
70 | | // is acceptable. However, if the destination type is a string and root is none scalar root, then |
71 | | // we should convert the entire tree to a string. |
72 | 13 | bool is_root_valuable = variant->is_scalar_variant() || |
73 | 13 | (!variant->is_null_root() && |
74 | 7 | variant->get_root_type()->get_primitive_type() != INVALID_TYPE && |
75 | 7 | !is_string_type(data_type_to->get_primitive_type()) && |
76 | 7 | data_type_to->get_primitive_type() != TYPE_JSONB); |
77 | | |
78 | 13 | if (is_root_valuable) { |
79 | 6 | ColumnPtr nested = variant->get_root(); |
80 | 6 | auto nested_from_type = variant->get_root_type(); |
81 | | // DCHECK(nested_from_type->is_nullable()); |
82 | 6 | DCHECK(!data_type_to->is_nullable()); |
83 | 6 | auto new_context = context == nullptr ? nullptr : context->clone(); |
84 | 6 | if (new_context != nullptr) { |
85 | 6 | new_context->set_jsonb_string_as_string(true); |
86 | | // Disable strict mode for the inner JSONBātarget conversion. |
87 | | // The variant root column may contain null/empty JSONB entries for rows |
88 | | // where the subcolumn doesn't exist (e.g., mixed-schema variant data). |
89 | | // In strict mode (INSERT context), these null entries cause the ENTIRE |
90 | | // cast to fail and return all NULLs. Since this is an internal type |
91 | | // conversion within variant, not user-provided INSERT data validation, |
92 | | // strict mode should not apply here. |
93 | 6 | new_context->set_enable_strict_mode(false); |
94 | 6 | } |
95 | | // dst type nullable has been removed, so we should remove the inner nullable of root column |
96 | 6 | auto wrapper = |
97 | 6 | prepare_impl(new_context.get(), remove_nullable(nested_from_type), data_type_to); |
98 | 6 | Block tmp_block {{remove_nullable(nested), remove_nullable(nested_from_type), ""}}; |
99 | 6 | tmp_block.insert({nullptr, data_type_to, ""}); |
100 | | /// Perform the requested conversion. |
101 | 6 | Status st = wrapper(new_context.get(), tmp_block, {0}, 1, input_rows_count, nullptr); |
102 | 6 | if (!st.ok()) { |
103 | | // Fill with default values, which is null |
104 | 0 | col_to->assert_mutable()->insert_many_defaults(input_rows_count); |
105 | 0 | col_to = make_nullable(col_to, true); |
106 | 6 | } else { |
107 | 6 | col_to = tmp_block.get_by_position(1).column; |
108 | 6 | col_to = wrap_in_nullable(col_to, |
109 | 6 | Block({{nested, nested_from_type, ""}, |
110 | 6 | {col_from, col_with_type_and_name.type, ""}, |
111 | 6 | {col_to, data_type_to, ""}}), |
112 | 6 | {0, 1}, input_rows_count); |
113 | 6 | } |
114 | 7 | } else { |
115 | 7 | if (variant->only_have_default_values()) { |
116 | 0 | col_to->assert_mutable()->insert_many_defaults(input_rows_count); |
117 | 0 | col_to = make_nullable(col_to, true); |
118 | 7 | } else if (is_string_type(data_type_to->get_primitive_type())) { |
119 | | // serialize to string |
120 | 4 | return execute_on_finalized_input([&](Block& finalized_block) { |
121 | 4 | return CastToStringFunction::execute_impl(context, finalized_block, arguments, |
122 | 4 | result, input_rows_count); |
123 | 4 | }); |
124 | 4 | } else if (data_type_to->get_primitive_type() == TYPE_JSONB) { |
125 | | // serialize to json by parsing |
126 | 0 | return execute_on_finalized_input([&](Block& finalized_block) { |
127 | 0 | return cast_from_generic_to_jsonb(context, finalized_block, arguments, result, |
128 | 0 | input_rows_count); |
129 | 0 | }); |
130 | 3 | } else if (!data_type_to->is_nullable() && |
131 | 3 | !is_string_type(data_type_to->get_primitive_type())) { |
132 | | // other types |
133 | 3 | col_to->assert_mutable()->insert_many_defaults(input_rows_count); |
134 | 3 | col_to = make_nullable(col_to, true); |
135 | 3 | } else { |
136 | 0 | assert_cast<ColumnNullable&>(*col_to->assert_mutable()) |
137 | 0 | .insert_many_defaults(input_rows_count); |
138 | 0 | } |
139 | 7 | } |
140 | | |
141 | 9 | if (null_map == nullptr) { |
142 | 9 | if (const auto* nullable_result = check_and_get_column<ColumnNullable>(*col_to); |
143 | 9 | nullable_result != nullptr && !nullable_result->has_null()) { |
144 | 3 | col_to = nullable_result->get_nested_column_ptr(); |
145 | 3 | } |
146 | 9 | } |
147 | | |
148 | 9 | if (col_to->size() != input_rows_count) { |
149 | 0 | return Status::InternalError("Unmatched row count {}, expected {}", col_to->size(), |
150 | 0 | input_rows_count); |
151 | 0 | } |
152 | | |
153 | 9 | block.replace_by_position(result, std::move(col_to)); |
154 | 9 | return Status::OK(); |
155 | 9 | } |
156 | | |
157 | | struct CastFromVariant { |
158 | | static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
159 | | uint32_t result, size_t input_rows_count, |
160 | 0 | const NullMap::value_type* null_map = nullptr) { |
161 | 0 | auto& data_type_to = block.get_by_position(result).type; |
162 | 0 | return cast_from_variant_impl(context, block, arguments, result, input_rows_count, null_map, |
163 | 0 | data_type_to); |
164 | 0 | } |
165 | | }; |
166 | | |
167 | | struct CastToVariant { |
168 | | static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
169 | | uint32_t result, size_t input_rows_count, |
170 | 3 | const NullMap::value_type* null_map = nullptr) { |
171 | | // auto& data_type_to = block.get_by_position(result).type; |
172 | 3 | const auto& col_with_type_and_name = block.get_by_position(arguments[0]); |
173 | 3 | const auto& from_type = col_with_type_and_name.type; |
174 | 3 | const auto& col_from = col_with_type_and_name.column; |
175 | | // set variant root column/type to from column/type |
176 | 3 | const auto& data_type_to = block.get_by_position(result).type; |
177 | 3 | const auto* variant_type = |
178 | 3 | typeid_cast<const DataTypeVariant*>(remove_nullable(data_type_to).get()); |
179 | 3 | auto variant = ColumnVariant::create( |
180 | 3 | variant_type ? variant_type->variant_max_subcolumns_count() : 0, |
181 | 3 | variant_type ? variant_type->enable_doc_mode() : false); |
182 | 3 | variant->create_root(from_type, IColumn::mutate(col_from)); |
183 | 3 | block.replace_by_position(result, std::move(variant)); |
184 | 3 | return Status::OK(); |
185 | 3 | } |
186 | | }; |
187 | | |
188 | | // create corresponding variant value to wrap from_type |
189 | | WrapperType create_cast_to_variant_wrapper(const DataTypePtr& from_type, |
190 | 3 | const DataTypeVariant& to_type) { |
191 | 3 | if (from_type->get_primitive_type() == TYPE_VARIANT) { |
192 | | // variant_max_subcolumns_count is not equal |
193 | 0 | return create_unsupport_wrapper(from_type->get_name(), to_type.get_name()); |
194 | 0 | } |
195 | 3 | return &CastToVariant::execute; |
196 | 3 | } |
197 | | |
198 | | // create corresponding type convert from variant |
199 | | WrapperType create_cast_from_variant_wrapper(const DataTypeVariant& from_type, |
200 | 13 | const DataTypePtr& to_type) { |
201 | 13 | if (to_type->get_primitive_type() == TYPE_VARIANT) { |
202 | | // variant_max_subcolumns_count is not equal |
203 | 0 | return create_unsupport_wrapper(from_type.get_name(), to_type->get_name()); |
204 | 0 | } |
205 | | // Capture explicit target type to make the cast independent from Block[result].type. |
206 | 13 | DataTypePtr captured_to_type = to_type; |
207 | 13 | return [captured_to_type](FunctionContext* context, Block& block, |
208 | 13 | const ColumnNumbers& arguments, uint32_t result, |
209 | 13 | size_t input_rows_count, |
210 | 13 | const NullMap::value_type* null_map) -> Status { |
211 | 13 | return cast_from_variant_impl(context, block, arguments, result, input_rows_count, null_map, |
212 | 13 | captured_to_type); |
213 | 13 | }; |
214 | 13 | } |
215 | | |
216 | | } // namespace doris::CastWrapper |