be/src/exprs/function/cast/cast_base.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include "exprs/function/cast/cast_base.h" |
19 | | |
20 | | #include <cstdint> |
21 | | |
22 | | #include "util/jsonb_writer.h" |
23 | | namespace doris::CastWrapper { |
24 | | |
25 | | Status cast_from_generic_to_jsonb(FunctionContext* context, Block& block, |
26 | | const ColumnNumbers& arguments, uint32_t result, |
27 | 0 | size_t input_rows_count, const NullMap::value_type* null_map) { |
28 | 0 | auto data_type_to = block.get_by_position(result).type; |
29 | 0 | const auto& col_with_type_and_name = block.get_by_position(arguments[0]); |
30 | 0 | const IDataType& type = *col_with_type_and_name.type; |
31 | 0 | const IColumn& col_from = *col_with_type_and_name.column; |
32 | |
|
33 | 0 | auto column_string = ColumnString::create(); |
34 | 0 | JsonbWriter writer; |
35 | |
|
36 | 0 | ColumnUInt8::MutablePtr col_null_map_to = ColumnUInt8::create(col_from.size(), 0); |
37 | 0 | ColumnUInt8::Container* vec_null_map_to = &col_null_map_to->get_data(); |
38 | 0 | DataTypeSerDe::FormatOptions format_options; |
39 | 0 | format_options.converted_from_string = true; |
40 | 0 | DataTypeSerDeSPtr from_serde = type.get_serde(); |
41 | 0 | DataTypeSerDeSPtr to_serde = data_type_to->get_serde(); |
42 | 0 | auto col_to = data_type_to->create_column(); |
43 | |
|
44 | 0 | auto tmp_col = ColumnString::create(); |
45 | 0 | DataTypeSerDe::FormatOptions options; |
46 | 0 | auto time_zone = cctz::utc_time_zone(); |
47 | 0 | options.timezone = |
48 | 0 | (context && context->state()) ? &context->state()->timezone_obj() : &time_zone; |
49 | |
|
50 | 0 | options.escape_char = '\\'; |
51 | 0 | for (size_t i = 0; i < input_rows_count; i++) { |
52 | | // convert to string |
53 | 0 | tmp_col->clear(); |
54 | 0 | VectorBufferWriter write_buffer(*tmp_col.get()); |
55 | 0 | Status st = from_serde->serialize_column_to_json(col_from, i, i + 1, write_buffer, options); |
56 | | // if serialized failed, will return null |
57 | 0 | (*vec_null_map_to)[i] = !st.ok(); |
58 | 0 | if (!st.ok()) { |
59 | 0 | col_to->insert_default(); |
60 | 0 | continue; |
61 | 0 | } |
62 | 0 | write_buffer.commit(); |
63 | 0 | writer.reset(); |
64 | 0 | auto str_ref = tmp_col->get_data_at(0); |
65 | 0 | Slice data((char*)(str_ref.data), str_ref.size); |
66 | | // first try to parse string |
67 | 0 | st = to_serde->deserialize_one_cell_from_json(*col_to, data, format_options); |
68 | | // if parsing failed, will return null |
69 | 0 | (*vec_null_map_to)[i] = !st.ok(); |
70 | 0 | if (!st.ok()) { |
71 | 0 | col_to->insert_default(); |
72 | 0 | } |
73 | 0 | } |
74 | |
|
75 | 0 | block.replace_by_position( |
76 | 0 | result, ColumnNullable::create(std::move(col_to), std::move(col_null_map_to))); |
77 | 0 | return Status::OK(); |
78 | 0 | } |
79 | | |
80 | | Status cast_from_string_to_generic(FunctionContext* context, Block& block, |
81 | | const ColumnNumbers& arguments, uint32_t result, |
82 | 0 | size_t input_rows_count, const NullMap::value_type* null_map) { |
83 | 0 | const auto& col_with_type_and_name = block.get_by_position(arguments[0]); |
84 | 0 | const IColumn& col_from = *col_with_type_and_name.column; |
85 | | // result column must set type |
86 | 0 | DCHECK(block.get_by_position(result).type != nullptr); |
87 | 0 | auto data_type_to = block.get_by_position(result).type; |
88 | 0 | if (const auto* col_from_string = check_and_get_column<ColumnString>(&col_from)) { |
89 | 0 | auto col_to = data_type_to->create_column(); |
90 | 0 | auto serde = data_type_to->get_serde(); |
91 | 0 | size_t size = col_from.size(); |
92 | 0 | col_to->reserve(size); |
93 | |
|
94 | 0 | ColumnUInt8::MutablePtr col_null_map_to = ColumnUInt8::create(size, 0); |
95 | 0 | ColumnUInt8::Container* vec_null_map_to = &col_null_map_to->get_data(); |
96 | 0 | const bool is_complex = is_complex_type(data_type_to->get_primitive_type()); |
97 | 0 | DataTypeSerDe::FormatOptions format_options; |
98 | 0 | format_options.converted_from_string = true; |
99 | 0 | format_options.escape_char = '\\'; |
100 | |
|
101 | 0 | for (size_t i = 0; i < size; ++i) { |
102 | 0 | const auto& val = col_from_string->get_data_at(i); |
103 | | // Note: here we should handle the null element |
104 | 0 | if (val.size == 0) { |
105 | 0 | col_to->insert_default(); |
106 | | // empty string('') is an invalid format for complex type, set null_map to 1 |
107 | 0 | if (is_complex) { |
108 | 0 | (*vec_null_map_to)[i] = 1; |
109 | 0 | } |
110 | 0 | continue; |
111 | 0 | } |
112 | 0 | Slice string_slice(val.data, val.size); |
113 | 0 | Status st = |
114 | 0 | serde->deserialize_one_cell_from_json(*col_to, string_slice, format_options); |
115 | | // if parsing failed, will return null |
116 | 0 | (*vec_null_map_to)[i] = !st.ok(); |
117 | 0 | if (!st.ok()) { |
118 | 0 | col_to->insert_default(); |
119 | 0 | } |
120 | 0 | } |
121 | 0 | block.get_by_position(result).column = |
122 | 0 | ColumnNullable::create(std::move(col_to), std::move(col_null_map_to)); |
123 | 0 | } else { |
124 | 0 | return Status::RuntimeError( |
125 | 0 | "Illegal column {} of first argument of conversion function from string", |
126 | 0 | col_from.get_name()); |
127 | 0 | } |
128 | 0 | return Status::OK(); |
129 | 0 | } |
130 | | |
131 | | ElementWrappers get_element_wrappers(FunctionContext* context, const DataTypes& from_element_types, |
132 | 0 | const DataTypes& to_element_types) { |
133 | 0 | DCHECK(from_element_types.size() == to_element_types.size()); |
134 | 0 | ElementWrappers element_wrappers; |
135 | 0 | element_wrappers.reserve(from_element_types.size()); |
136 | 0 | for (size_t i = 0; i < from_element_types.size(); ++i) { |
137 | 0 | const DataTypePtr& from_element_type = from_element_types[i]; |
138 | 0 | const DataTypePtr& to_element_type = to_element_types[i]; |
139 | 0 | element_wrappers.push_back( |
140 | 0 | prepare_unpack_dictionaries(context, from_element_type, to_element_type)); |
141 | 0 | } |
142 | 0 | return element_wrappers; |
143 | 0 | } |
144 | | |
145 | 0 | WrapperType create_unsupport_wrapper(const String error_msg) { |
146 | 0 | return [error_msg](FunctionContext* /*context*/, Block& /*block*/, |
147 | 0 | const ColumnNumbers& /*arguments*/, uint32_t /*result*/, |
148 | 0 | size_t /*input_rows_count*/, const NullMap::value_type* null_map = nullptr) { |
149 | 0 | return Status::InvalidArgument(error_msg); |
150 | 0 | }; |
151 | 0 | } |
152 | | |
153 | 0 | WrapperType create_unsupport_wrapper(const String from_type_name, const String to_type_name) { |
154 | 0 | const String error_msg = |
155 | 0 | fmt::format("Conversion from {} to {} is not supported", from_type_name, to_type_name); |
156 | 0 | return create_unsupport_wrapper(error_msg); |
157 | 0 | } |
158 | | |
159 | 131 | WrapperType create_identity_wrapper(const DataTypePtr&) { |
160 | 131 | return [](FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
161 | 131 | uint32_t result, size_t /*input_rows_count*/, |
162 | 131 | const NullMap::value_type* null_map = nullptr) { |
163 | 131 | block.get_by_position(result).column = block.get_by_position(arguments.front()).column; |
164 | 131 | return Status::OK(); |
165 | 131 | }; |
166 | 131 | } |
167 | | |
168 | | /// the only difference between these two functions is throw error or not when parsing fail. |
169 | | /// the return columns are both nullable columns. |
170 | | Status cast_from_string_to_complex_type(FunctionContext* context, Block& block, |
171 | | const ColumnNumbers& arguments, uint32_t result, |
172 | | size_t input_rows_count, |
173 | 9 | const NullMap::value_type* null_map) { |
174 | 9 | const auto* col_from = check_and_get_column<DataTypeString::ColumnType>( |
175 | 9 | block.get_by_position(arguments[0]).column.get()); |
176 | | |
177 | 9 | auto to_type = block.get_by_position(result).type; |
178 | 9 | auto to_serde = remove_nullable(to_type)->get_serde(); |
179 | | |
180 | | // string to complex type is always nullable |
181 | 9 | MutableColumnPtr to_column = make_nullable(to_type)->create_column(); |
182 | 9 | auto& nullable_col_to = assert_cast<ColumnNullable&>(*to_column); |
183 | 9 | auto& nested_column = nullable_col_to.get_nested_column(); |
184 | | |
185 | 9 | DataTypeSerDe::FormatOptions options; |
186 | 9 | options.converted_from_string = true; |
187 | 9 | options.escape_char = '\\'; |
188 | 9 | options.timezone = &context->state()->timezone_obj(); |
189 | | |
190 | 61 | for (size_t i = 0; i < input_rows_count; ++i) { |
191 | 52 | if (null_map && null_map[i]) { |
192 | 0 | nullable_col_to.insert_default(); |
193 | 52 | } else { |
194 | 52 | auto str = col_from->get_data_at(i); |
195 | 52 | Status st = to_serde->from_string(str, nested_column, options); |
196 | 52 | if (st.ok()) { |
197 | 44 | nullable_col_to.get_null_map_data().push_back(0); |
198 | 44 | } else { |
199 | 8 | nullable_col_to.insert_default(); // fill null if fail |
200 | 8 | } |
201 | 52 | } |
202 | 52 | } |
203 | | |
204 | 9 | block.get_by_position(result).column = std::move(to_column); |
205 | 9 | return Status::OK(); |
206 | 9 | } |
207 | | |
208 | | Status cast_from_string_to_complex_type_strict_mode(FunctionContext* context, Block& block, |
209 | | const ColumnNumbers& arguments, uint32_t result, |
210 | | size_t input_rows_count, |
211 | 0 | const NullMap::value_type* null_map) { |
212 | 0 | const auto* col_from = check_and_get_column<DataTypeString::ColumnType>( |
213 | 0 | block.get_by_position(arguments[0]).column.get()); |
214 | |
|
215 | 0 | auto to_type = block.get_by_position(result).type; |
216 | 0 | auto to_serde = remove_nullable(to_type)->get_serde(); |
217 | | |
218 | | // string to complex type is always nullable |
219 | 0 | MutableColumnPtr to_column = make_nullable(to_type)->create_column(); |
220 | 0 | auto& nullable_col_to = assert_cast<ColumnNullable&>(*to_column); |
221 | 0 | auto& nested_column = nullable_col_to.get_nested_column(); |
222 | |
|
223 | 0 | DataTypeSerDe::FormatOptions options; |
224 | 0 | options.converted_from_string = true; |
225 | 0 | options.escape_char = '\\'; |
226 | 0 | options.timezone = &context->state()->timezone_obj(); |
227 | |
|
228 | 0 | for (size_t i = 0; i < input_rows_count; ++i) { |
229 | 0 | if (null_map && null_map[i]) { |
230 | 0 | to_column->insert_default(); |
231 | 0 | } else { |
232 | 0 | auto str = col_from->get_data_at(i); |
233 | 0 | RETURN_IF_ERROR(to_serde->from_string_strict_mode(str, nested_column, options)); |
234 | | // fill not null if success |
235 | 0 | nullable_col_to.get_null_map_data().push_back(0); |
236 | 0 | } |
237 | 0 | } |
238 | 0 | block.get_by_position(result).column = std::move(to_column); |
239 | 0 | return Status::OK(); |
240 | 0 | } |
241 | | |
242 | | } // namespace doris::CastWrapper |