be/src/exprs/function/cast/cast_to_jsonb.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include "core/assert_cast.h" |
19 | | #include "core/data_type/data_type_jsonb.h" |
20 | | #include "core/data_type/data_type_nullable.h" |
21 | | #include "core/data_type/primitive_type.h" |
22 | | #include "core/data_type_serde/data_type_serde.h" |
23 | | #include "core/string_ref.h" |
24 | | #include "core/value/jsonb_value.h" |
25 | | #include "exprs/function/cast/cast_base.h" |
26 | | #include "exprs/function/cast/cast_to_string.h" |
27 | | #include "util/io_helper.h" |
28 | | #include "util/jsonb_utils.h" |
29 | | #include "util/jsonb_writer.h" |
30 | | |
31 | | namespace doris::CastWrapper { |
32 | | #include "common/compile_check_begin.h" |
33 | | |
34 | | struct ConvertImplGenericFromJsonb { |
35 | | static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
36 | | uint32_t result, size_t input_rows_count, |
37 | 1 | const NullMap::value_type* null_map = nullptr) { |
38 | 1 | auto data_type_to = block.get_by_position(result).type; |
39 | 1 | auto data_type_serde_to = data_type_to->get_serde(); |
40 | | |
41 | 1 | DataTypeSerDe::FormatOptions options; |
42 | 1 | options.converted_from_string = true; |
43 | 1 | options.escape_char = '\\'; |
44 | 1 | options.timezone = &context->state()->timezone_obj(); |
45 | | |
46 | 1 | const auto& col_with_type_and_name = block.get_by_position(arguments[0]); |
47 | 1 | const IColumn& col_from = *col_with_type_and_name.column; |
48 | 1 | if (const ColumnString* col_from_string = check_and_get_column<ColumnString>(&col_from)) { |
49 | 1 | auto col_to = data_type_to->create_column(); |
50 | | |
51 | 1 | size_t size = col_from.size(); |
52 | 1 | col_to->reserve(size); |
53 | | |
54 | 1 | ColumnUInt8::MutablePtr col_null_map_to = ColumnUInt8::create(size, 0); |
55 | 1 | ColumnUInt8::Container* vec_null_map_to = &col_null_map_to->get_data(); |
56 | 1 | const bool is_complex = is_complex_type(data_type_to->get_primitive_type()); |
57 | 1 | const bool is_dst_string = is_string_type(data_type_to->get_primitive_type()); |
58 | 2 | for (size_t i = 0; i < size; ++i) { |
59 | 1 | const auto& val = col_from_string->get_data_at(i); |
60 | 1 | const JsonbDocument* doc = nullptr; |
61 | 1 | auto st = JsonbDocument::checkAndCreateDocument(val.data, val.size, &doc); |
62 | 1 | if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { |
63 | 1 | (*vec_null_map_to)[i] = 1; |
64 | 1 | col_to->insert_default(); |
65 | 1 | continue; |
66 | 1 | } |
67 | | |
68 | | // value is NOT necessary to be deleted since JsonbValue will not allocate memory |
69 | 0 | const JsonbValue* value = doc->getValue(); |
70 | 0 | if (UNLIKELY(!value)) { |
71 | 0 | (*vec_null_map_to)[i] = 1; |
72 | 0 | col_to->insert_default(); |
73 | 0 | continue; |
74 | 0 | } |
75 | | // Note: here we should handle the null element |
76 | 0 | if (val.size == 0) { |
77 | 0 | col_to->insert_default(); |
78 | | // empty string('') is an invalid format for complex type, set null_map to 1 |
79 | 0 | if (is_complex) { |
80 | 0 | (*vec_null_map_to)[i] = 1; |
81 | 0 | } |
82 | 0 | continue; |
83 | 0 | } |
84 | | // add string to string column |
85 | 0 | if (context->jsonb_string_as_string() && is_dst_string && value->isString()) { |
86 | 0 | const auto* blob = value->unpack<JsonbBinaryVal>(); |
87 | 0 | assert_cast<ColumnString&, TypeCheckOnRelease::DISABLE>(*col_to).insert_data( |
88 | 0 | blob->getBlob(), blob->getBlobLen()); |
89 | 0 | (*vec_null_map_to)[i] = 0; |
90 | 0 | continue; |
91 | 0 | } |
92 | 0 | std::string input_str; |
93 | 0 | if (context->jsonb_string_as_string() && value->isString()) { |
94 | 0 | const auto* blob = value->unpack<JsonbBinaryVal>(); |
95 | 0 | input_str = std::string(blob->getBlob(), blob->getBlobLen()); |
96 | 0 | } else { |
97 | 0 | input_str = JsonbToJson::jsonb_to_json_string(val.data, val.size); |
98 | 0 | } |
99 | 0 | if (input_str.empty()) { |
100 | 0 | col_to->insert_default(); |
101 | 0 | (*vec_null_map_to)[i] = 1; |
102 | 0 | continue; |
103 | 0 | } |
104 | 0 | StringRef read_buffer((char*)(input_str.data()), input_str.size()); |
105 | 0 | st = data_type_serde_to->from_string(read_buffer, *col_to, options); |
106 | | // if parsing failed, will return null |
107 | 0 | (*vec_null_map_to)[i] = !st.ok(); |
108 | 0 | if (!st.ok()) { |
109 | 0 | col_to->insert_default(); |
110 | 0 | } |
111 | 0 | } |
112 | 1 | block.get_by_position(result).column = |
113 | 1 | ColumnNullable::create(std::move(col_to), std::move(col_null_map_to)); |
114 | 1 | } else { |
115 | 0 | return Status::RuntimeError( |
116 | 0 | "Illegal column {} of first argument of conversion function from string", |
117 | 0 | col_from.get_name()); |
118 | 0 | } |
119 | 1 | return Status::OK(); |
120 | 1 | } |
121 | | }; |
122 | | |
123 | 0 | inline bool can_cast_json_type(PrimitiveType pt) { |
124 | 0 | return is_int_or_bool(pt) || is_float_or_double(pt) || is_string_type(pt) || is_decimal(pt) || |
125 | 0 | pt == TYPE_ARRAY || pt == TYPE_STRUCT; |
126 | 0 | } |
127 | | |
128 | | // check jsonb value type and get to_type value |
129 | | WrapperType create_cast_from_jsonb_wrapper(const DataTypeJsonb& from_type, |
130 | | const DataTypePtr& to_type, |
131 | 113 | bool jsonb_string_as_string) { |
132 | 113 | if (is_string_type(to_type->get_primitive_type()) && jsonb_string_as_string) { |
133 | 1 | return ConvertImplGenericFromJsonb::execute; |
134 | 1 | } |
135 | | |
136 | 112 | return [](FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
137 | 112 | uint32_t result, size_t input_rows_count, const NullMap::value_type*) { |
138 | 112 | CastParameters params; |
139 | 112 | params.is_strict = context->enable_strict_mode(); |
140 | | |
141 | 112 | auto data_type_to = remove_nullable(block.get_by_position(result).type); |
142 | 112 | auto serde_to = data_type_to->get_serde(); |
143 | | |
144 | 112 | const auto& col_from_json = |
145 | 112 | assert_cast<const ColumnString&>(*block.get_by_position(arguments[0]).column); |
146 | | |
147 | 112 | auto column_to = make_nullable(data_type_to)->create_column(); |
148 | 112 | auto& column_to_nullable = assert_cast<ColumnNullable&>(*column_to); |
149 | | |
150 | 112 | RETURN_IF_ERROR(serde_to->deserialize_column_from_jsonb_vector(column_to_nullable, |
151 | 112 | col_from_json, params)); |
152 | | |
153 | 112 | block.get_by_position(result).column = std::move(column_to); |
154 | 112 | return Status::OK(); |
155 | 112 | }; |
156 | 113 | } |
157 | | |
158 | | struct ParseJsonbFromString { |
159 | 4 | static Status parse_json(const StringRef& str, ColumnString& column_string) { |
160 | 4 | if (str.empty()) { |
161 | 0 | return Status::InvalidArgument("Empty string cannot be parsed as jsonb"); |
162 | 0 | } |
163 | 4 | JsonBinaryValue value; |
164 | 4 | auto st = (value.from_json_string(str.data, str.size)); |
165 | 4 | if (!st.ok()) { |
166 | 2 | return Status::InvalidArgument("Failed to parse json string: {}, error: {}", |
167 | 2 | str.to_string(), st.msg()); |
168 | 2 | } |
169 | 2 | column_string.insert_data(value.value(), value.size()); |
170 | 2 | return Status::OK(); |
171 | 4 | } |
172 | | |
173 | | static Status execute_non_strict(const ColumnString& col_from, size_t size, |
174 | 4 | ColumnPtr& column_result) { |
175 | 4 | auto col_to = ColumnString::create(); |
176 | 4 | auto col_null = ColumnBool::create(size, 0); |
177 | 4 | auto& vec_null_map_to = col_null->get_data(); |
178 | | |
179 | 8 | for (size_t i = 0; i < size; ++i) { |
180 | 4 | Status st = parse_json(col_from.get_data_at(i), *col_to); |
181 | 4 | vec_null_map_to[i] = !st.ok(); |
182 | 4 | if (!st.ok()) [[unlikely]] { |
183 | 2 | col_to->insert_default(); |
184 | 2 | } |
185 | 4 | } |
186 | 4 | column_result = ColumnNullable::create(std::move(col_to), std::move(col_null)); |
187 | 4 | return Status::OK(); |
188 | 4 | } |
189 | | |
190 | | // in both strict or non-strict mode, the return type is nullable column |
191 | | static Status execute_strict(const ColumnString& col_from, const NullMap::value_type* null_map, |
192 | 0 | size_t size, ColumnPtr& column_result) { |
193 | 0 | auto col_to = ColumnString::create(); |
194 | 0 | for (size_t i = 0; i < size; ++i) { |
195 | 0 | if (null_map && null_map[i]) { |
196 | 0 | col_to->insert_default(); |
197 | 0 | continue; |
198 | 0 | } |
199 | 0 | RETURN_IF_ERROR(parse_json(col_from.get_data_at(i), *col_to)); |
200 | 0 | } |
201 | 0 | column_result = ColumnNullable::create(std::move(col_to), ColumnBool::create(size, 0)); |
202 | 0 | return Status::OK(); |
203 | 0 | } |
204 | | |
205 | | static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
206 | | uint32_t result, size_t input_rows_count, |
207 | 4 | const NullMap::value_type* null_map) { |
208 | 4 | const auto& col_from = |
209 | 4 | assert_cast<const ColumnString&>(*block.get_by_position(arguments[0]).column); |
210 | 4 | const auto size = col_from.size(); |
211 | | |
212 | 4 | ColumnPtr column_result; |
213 | 4 | if (context->enable_strict_mode()) { |
214 | 0 | RETURN_IF_ERROR(execute_strict(col_from, null_map, size, column_result)); |
215 | |
|
216 | 4 | } else { |
217 | 4 | RETURN_IF_ERROR(execute_non_strict(col_from, size, column_result)); |
218 | 4 | } |
219 | 4 | block.get_by_position(result).column = std::move(column_result); |
220 | | |
221 | 4 | return Status::OK(); |
222 | 4 | } |
223 | | }; |
224 | | |
225 | | // create corresponding jsonb value with type to_type |
226 | | // use jsonb writer to create jsonb value |
227 | | WrapperType create_cast_to_jsonb_wrapper(const DataTypePtr& from_type, const DataTypeJsonb& to_type, |
228 | 48.1k | bool string_as_jsonb_string) { |
229 | | // parse string as jsonb |
230 | 48.1k | if (is_string_type(from_type->get_primitive_type()) && !string_as_jsonb_string) { |
231 | 4 | return ParseJsonbFromString::execute; |
232 | 4 | } |
233 | | |
234 | 48.1k | return [](FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
235 | 48.1k | uint32_t result, size_t input_rows_count, const NullMap::value_type*) { |
236 | | // same as to_json function |
237 | 48.1k | auto to_column = ColumnString::create(); |
238 | 48.1k | auto from_type_serde = block.get_by_position(arguments[0]).type->get_serde(); |
239 | 48.1k | auto from_column = block.get_by_position(arguments[0]).column; |
240 | 48.1k | RETURN_IF_ERROR( |
241 | 48.1k | from_type_serde->serialize_column_to_jsonb_vector(*from_column, *to_column)); |
242 | 48.1k | block.get_by_position(result).column = std::move(to_column); |
243 | 48.1k | return Status::OK(); |
244 | 48.1k | }; |
245 | 48.1k | } |
246 | | #include "common/compile_check_end.h" |
247 | | } // namespace doris::CastWrapper |