be/src/exprs/function/cast/cast_to_variant.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #pragma once |
19 | | |
20 | | #include "core/data_type/data_type_variant.h" |
21 | | #include "exprs/function/cast/cast_base.h" |
22 | | #include "exprs/function/cast/cast_to_string.h" |
23 | | |
24 | | namespace doris::CastWrapper { |
25 | | |
26 | | // shared implementation for casting from variant to arbitrary non-nullable target type |
27 | | inline Status cast_from_variant_impl(FunctionContext* context, Block& block, |
28 | | const ColumnNumbers& arguments, uint32_t result, |
29 | | size_t input_rows_count, |
30 | | const NullMap::value_type* /*null_map*/, |
31 | 10 | const DataTypePtr& data_type_to) { |
32 | 10 | const auto& col_with_type_and_name = block.get_by_position(arguments[0]); |
33 | 10 | const auto& col_from = col_with_type_and_name.column; |
34 | 10 | const auto& variant = assert_cast<const ColumnVariant&>(*col_from); |
35 | 10 | ColumnPtr col_to = data_type_to->create_column(); |
36 | | |
37 | 10 | if (!variant.is_finalized()) { |
38 | | // ColumnVariant should be finalized before parsing, finalize maybe modify original column structure |
39 | 0 | variant.assume_mutable()->finalize(); |
40 | 0 | } |
41 | | |
42 | | // It's important to convert as many elements as possible in this context. For instance, |
43 | | // if the root of this variant column is a number column, converting it to a number column |
44 | | // is acceptable. However, if the destination type is a string and root is none scalar root, then |
45 | | // we should convert the entire tree to a string. |
46 | 10 | bool is_root_valuable = variant.is_scalar_variant() || |
47 | 10 | (!variant.is_null_root() && |
48 | 4 | variant.get_root_type()->get_primitive_type() != INVALID_TYPE && |
49 | 4 | !is_string_type(data_type_to->get_primitive_type()) && |
50 | 4 | data_type_to->get_primitive_type() != TYPE_JSONB); |
51 | | |
52 | 10 | if (is_root_valuable) { |
53 | 6 | ColumnPtr nested = variant.get_root(); |
54 | 6 | auto nested_from_type = variant.get_root_type(); |
55 | | // DCHECK(nested_from_type->is_nullable()); |
56 | 6 | DCHECK(!data_type_to->is_nullable()); |
57 | 6 | auto new_context = context == nullptr ? nullptr : context->clone(); |
58 | 6 | if (new_context != nullptr) { |
59 | 6 | new_context->set_jsonb_string_as_string(true); |
60 | | // Disable strict mode for the inner JSONBātarget conversion. |
61 | | // The variant root column may contain null/empty JSONB entries for rows |
62 | | // where the subcolumn doesn't exist (e.g., mixed-schema variant data). |
63 | | // In strict mode (INSERT context), these null entries cause the ENTIRE |
64 | | // cast to fail and return all NULLs. Since this is an internal type |
65 | | // conversion within variant, not user-provided INSERT data validation, |
66 | | // strict mode should not apply here. |
67 | 6 | new_context->set_enable_strict_mode(false); |
68 | 6 | } |
69 | | // dst type nullable has been removed, so we should remove the inner nullable of root column |
70 | 6 | auto wrapper = |
71 | 6 | prepare_impl(new_context.get(), remove_nullable(nested_from_type), data_type_to); |
72 | 6 | Block tmp_block {{remove_nullable(nested), remove_nullable(nested_from_type), ""}}; |
73 | 6 | tmp_block.insert({nullptr, data_type_to, ""}); |
74 | | /// Perform the requested conversion. |
75 | 6 | Status st = wrapper(new_context.get(), tmp_block, {0}, 1, input_rows_count, nullptr); |
76 | 6 | if (!st.ok()) { |
77 | | // Fill with default values, which is null |
78 | 0 | col_to->assume_mutable()->insert_many_defaults(input_rows_count); |
79 | 0 | col_to = make_nullable(col_to, true); |
80 | 6 | } else { |
81 | 6 | col_to = tmp_block.get_by_position(1).column; |
82 | | // Note: here we should return the nullable result column |
83 | 6 | col_to = wrap_in_nullable( |
84 | 6 | col_to, Block({{nested, nested_from_type, ""}, {col_to, data_type_to, ""}}), |
85 | 6 | {0}, input_rows_count); |
86 | 6 | } |
87 | 6 | } else { |
88 | 4 | if (variant.only_have_default_values()) { |
89 | 0 | col_to->assume_mutable()->insert_many_defaults(input_rows_count); |
90 | 0 | col_to = make_nullable(col_to, true); |
91 | 4 | } else if (is_string_type(data_type_to->get_primitive_type())) { |
92 | | // serialize to string |
93 | 2 | return CastToStringFunction::execute_impl(context, block, arguments, result, |
94 | 2 | input_rows_count); |
95 | 2 | } else if (data_type_to->get_primitive_type() == TYPE_JSONB) { |
96 | | // serialize to json by parsing |
97 | 0 | return cast_from_generic_to_jsonb(context, block, arguments, result, input_rows_count); |
98 | 2 | } else if (!data_type_to->is_nullable() && |
99 | 2 | !is_string_type(data_type_to->get_primitive_type())) { |
100 | | // other types |
101 | 2 | col_to->assume_mutable()->insert_many_defaults(input_rows_count); |
102 | 2 | col_to = make_nullable(col_to, true); |
103 | 2 | } else { |
104 | 0 | assert_cast<ColumnNullable&>(*col_to->assume_mutable()) |
105 | 0 | .insert_many_defaults(input_rows_count); |
106 | 0 | } |
107 | 4 | } |
108 | | |
109 | 8 | if (col_to->size() != input_rows_count) { |
110 | 0 | return Status::InternalError("Unmatched row count {}, expected {}", col_to->size(), |
111 | 0 | input_rows_count); |
112 | 0 | } |
113 | | |
114 | 8 | block.replace_by_position(result, std::move(col_to)); |
115 | 8 | return Status::OK(); |
116 | 8 | } |
117 | | |
118 | | struct CastFromVariant { |
119 | | static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
120 | | uint32_t result, size_t input_rows_count, |
121 | 0 | const NullMap::value_type* null_map = nullptr) { |
122 | 0 | auto& data_type_to = block.get_by_position(result).type; |
123 | 0 | return cast_from_variant_impl(context, block, arguments, result, input_rows_count, null_map, |
124 | 0 | data_type_to); |
125 | 0 | } |
126 | | }; |
127 | | |
128 | | struct CastToVariant { |
129 | | static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
130 | | uint32_t result, size_t input_rows_count, |
131 | 3 | const NullMap::value_type* null_map = nullptr) { |
132 | | // auto& data_type_to = block.get_by_position(result).type; |
133 | 3 | const auto& col_with_type_and_name = block.get_by_position(arguments[0]); |
134 | 3 | const auto& from_type = col_with_type_and_name.type; |
135 | 3 | const auto& col_from = col_with_type_and_name.column; |
136 | | // set variant root column/type to from column/type |
137 | 3 | auto variant = ColumnVariant::create(true /*always nullable*/); |
138 | 3 | variant->create_root(from_type, col_from->assume_mutable()); |
139 | 3 | block.replace_by_position(result, std::move(variant)); |
140 | 3 | return Status::OK(); |
141 | 3 | } |
142 | | }; |
143 | | |
144 | | // create corresponding variant value to wrap from_type |
145 | | WrapperType create_cast_to_variant_wrapper(const DataTypePtr& from_type, |
146 | 3 | const DataTypeVariant& to_type) { |
147 | 3 | if (from_type->get_primitive_type() == TYPE_VARIANT) { |
148 | | // variant_max_subcolumns_count is not equal |
149 | 0 | return create_unsupport_wrapper(from_type->get_name(), to_type.get_name()); |
150 | 0 | } |
151 | 3 | return &CastToVariant::execute; |
152 | 3 | } |
153 | | |
154 | | // create corresponding type convert from variant |
155 | | WrapperType create_cast_from_variant_wrapper(const DataTypeVariant& from_type, |
156 | 10 | const DataTypePtr& to_type) { |
157 | 10 | if (to_type->get_primitive_type() == TYPE_VARIANT) { |
158 | | // variant_max_subcolumns_count is not equal |
159 | 0 | return create_unsupport_wrapper(from_type.get_name(), to_type->get_name()); |
160 | 0 | } |
161 | | // Capture explicit target type to make the cast independent from Block[result].type. |
162 | 10 | DataTypePtr captured_to_type = to_type; |
163 | 10 | return [captured_to_type](FunctionContext* context, Block& block, |
164 | 10 | const ColumnNumbers& arguments, uint32_t result, |
165 | 10 | size_t input_rows_count, |
166 | 10 | const NullMap::value_type* null_map) -> Status { |
167 | 10 | return cast_from_variant_impl(context, block, arguments, result, input_rows_count, null_map, |
168 | 10 | captured_to_type); |
169 | 10 | }; |
170 | 10 | } |
171 | | |
172 | | } // namespace doris::CastWrapper |