be/src/exprs/function/cast/cast_to_jsonb.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include "core/assert_cast.h" |
19 | | #include "core/data_type/data_type_jsonb.h" |
20 | | #include "core/data_type/data_type_nullable.h" |
21 | | #include "core/data_type/primitive_type.h" |
22 | | #include "core/data_type_serde/data_type_serde.h" |
23 | | #include "core/string_ref.h" |
24 | | #include "core/value/jsonb_value.h" |
25 | | #include "exprs/function/cast/cast_base.h" |
26 | | #include "exprs/function/cast/cast_to_string.h" |
27 | | #include "util/io_helper.h" |
28 | | #include "util/jsonb_utils.h" |
29 | | #include "util/jsonb_writer.h" |
30 | | |
31 | | namespace doris::CastWrapper { |
32 | | |
33 | | struct ConvertImplGenericFromJsonb { |
34 | | static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
35 | | uint32_t result, size_t input_rows_count, |
36 | 1 | const NullMap::value_type* null_map = nullptr) { |
37 | 1 | auto data_type_to = block.get_by_position(result).type; |
38 | 1 | auto data_type_serde_to = data_type_to->get_serde(); |
39 | | |
40 | 1 | DataTypeSerDe::FormatOptions options; |
41 | 1 | options.converted_from_string = true; |
42 | 1 | options.escape_char = '\\'; |
43 | 1 | options.timezone = &context->state()->timezone_obj(); |
44 | | |
45 | 1 | const auto& col_with_type_and_name = block.get_by_position(arguments[0]); |
46 | 1 | const IColumn& col_from = *col_with_type_and_name.column; |
47 | 1 | if (const ColumnString* col_from_string = check_and_get_column<ColumnString>(&col_from)) { |
48 | 1 | auto col_to = data_type_to->create_column(); |
49 | | |
50 | 1 | size_t size = col_from.size(); |
51 | 1 | col_to->reserve(size); |
52 | | |
53 | 1 | ColumnUInt8::MutablePtr col_null_map_to = ColumnUInt8::create(size, 0); |
54 | 1 | ColumnUInt8::Container* vec_null_map_to = &col_null_map_to->get_data(); |
55 | 1 | const bool is_complex = is_complex_type(data_type_to->get_primitive_type()); |
56 | 1 | const bool is_dst_string = is_string_type(data_type_to->get_primitive_type()); |
57 | 2 | for (size_t i = 0; i < size; ++i) { |
58 | 1 | const auto& val = col_from_string->get_data_at(i); |
59 | 1 | const JsonbDocument* doc = nullptr; |
60 | 1 | auto st = JsonbDocument::checkAndCreateDocument(val.data, val.size, &doc); |
61 | 1 | if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { |
62 | 1 | (*vec_null_map_to)[i] = 1; |
63 | 1 | col_to->insert_default(); |
64 | 1 | continue; |
65 | 1 | } |
66 | | |
67 | | // value is NOT necessary to be deleted since JsonbValue will not allocate memory |
68 | 0 | const JsonbValue* value = doc->getValue(); |
69 | 0 | if (UNLIKELY(!value)) { |
70 | 0 | (*vec_null_map_to)[i] = 1; |
71 | 0 | col_to->insert_default(); |
72 | 0 | continue; |
73 | 0 | } |
74 | | // Note: here we should handle the null element |
75 | 0 | if (val.size == 0) { |
76 | 0 | col_to->insert_default(); |
77 | | // empty string('') is an invalid format for complex type, set null_map to 1 |
78 | 0 | if (is_complex) { |
79 | 0 | (*vec_null_map_to)[i] = 1; |
80 | 0 | } |
81 | 0 | continue; |
82 | 0 | } |
83 | | // add string to string column |
84 | 0 | if (context->jsonb_string_as_string() && is_dst_string && value->isString()) { |
85 | 0 | const auto* blob = value->unpack<JsonbBinaryVal>(); |
86 | 0 | assert_cast<ColumnString&, TypeCheckOnRelease::DISABLE>(*col_to).insert_data( |
87 | 0 | blob->getBlob(), blob->getBlobLen()); |
88 | 0 | (*vec_null_map_to)[i] = 0; |
89 | 0 | continue; |
90 | 0 | } |
91 | 0 | std::string input_str; |
92 | 0 | if (context->jsonb_string_as_string() && value->isString()) { |
93 | 0 | const auto* blob = value->unpack<JsonbBinaryVal>(); |
94 | 0 | input_str = std::string(blob->getBlob(), blob->getBlobLen()); |
95 | 0 | } else { |
96 | 0 | input_str = JsonbToJson::jsonb_to_json_string(val.data, val.size); |
97 | 0 | } |
98 | 0 | if (input_str.empty()) { |
99 | 0 | col_to->insert_default(); |
100 | 0 | (*vec_null_map_to)[i] = 1; |
101 | 0 | continue; |
102 | 0 | } |
103 | 0 | StringRef read_buffer((char*)(input_str.data()), input_str.size()); |
104 | 0 | st = data_type_serde_to->from_string(read_buffer, *col_to, options); |
105 | | // if parsing failed, will return null |
106 | 0 | (*vec_null_map_to)[i] = !st.ok(); |
107 | 0 | if (!st.ok()) { |
108 | 0 | col_to->insert_default(); |
109 | 0 | } |
110 | 0 | } |
111 | 1 | block.get_by_position(result).column = |
112 | 1 | ColumnNullable::create(std::move(col_to), std::move(col_null_map_to)); |
113 | 1 | } else { |
114 | 0 | return Status::RuntimeError( |
115 | 0 | "Illegal column {} of first argument of conversion function from string", |
116 | 0 | col_from.get_name()); |
117 | 0 | } |
118 | 1 | return Status::OK(); |
119 | 1 | } |
120 | | }; |
121 | | |
122 | 0 | inline bool can_cast_json_type(PrimitiveType pt) { |
123 | 0 | return is_int_or_bool(pt) || is_float_or_double(pt) || is_string_type(pt) || is_decimal(pt) || |
124 | 0 | pt == TYPE_ARRAY || pt == TYPE_STRUCT; |
125 | 0 | } |
126 | | |
127 | | // check jsonb value type and get to_type value |
128 | | WrapperType create_cast_from_jsonb_wrapper(const DataTypeJsonb& from_type, |
129 | | const DataTypePtr& to_type, |
130 | 113 | bool jsonb_string_as_string) { |
131 | 113 | if (is_string_type(to_type->get_primitive_type()) && jsonb_string_as_string) { |
132 | 1 | return ConvertImplGenericFromJsonb::execute; |
133 | 1 | } |
134 | | |
135 | 112 | return [](FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
136 | 112 | uint32_t result, size_t input_rows_count, const NullMap::value_type*) { |
137 | 112 | CastParameters params; |
138 | 112 | params.is_strict = context->enable_strict_mode(); |
139 | | |
140 | 112 | auto data_type_to = remove_nullable(block.get_by_position(result).type); |
141 | 112 | auto serde_to = data_type_to->get_serde(); |
142 | | |
143 | 112 | const auto& col_from_json = |
144 | 112 | assert_cast<const ColumnString&>(*block.get_by_position(arguments[0]).column); |
145 | | |
146 | 112 | auto column_to = make_nullable(data_type_to)->create_column(); |
147 | 112 | auto& column_to_nullable = assert_cast<ColumnNullable&>(*column_to); |
148 | | |
149 | 112 | RETURN_IF_ERROR(serde_to->deserialize_column_from_jsonb_vector(column_to_nullable, |
150 | 112 | col_from_json, params)); |
151 | | |
152 | 112 | block.get_by_position(result).column = std::move(column_to); |
153 | 112 | return Status::OK(); |
154 | 112 | }; |
155 | 113 | } |
156 | | |
157 | | struct ParseJsonbFromString { |
158 | 4 | static Status parse_json(const StringRef& str, ColumnString& column_string) { |
159 | 4 | if (str.empty()) { |
160 | 0 | return Status::InvalidArgument("Empty string cannot be parsed as jsonb"); |
161 | 0 | } |
162 | 4 | JsonBinaryValue value; |
163 | 4 | auto st = (value.from_json_string(str.data, str.size)); |
164 | 4 | if (!st.ok()) { |
165 | 2 | return Status::InvalidArgument("Failed to parse json string: {}, error: {}", |
166 | 2 | str.to_string(), st.msg()); |
167 | 2 | } |
168 | 2 | column_string.insert_data(value.value(), value.size()); |
169 | 2 | return Status::OK(); |
170 | 4 | } |
171 | | |
172 | | static Status execute_non_strict(const ColumnString& col_from, size_t size, |
173 | 4 | ColumnPtr& column_result) { |
174 | 4 | auto col_to = ColumnString::create(); |
175 | 4 | auto col_null = ColumnBool::create(size, 0); |
176 | 4 | auto& vec_null_map_to = col_null->get_data(); |
177 | | |
178 | 8 | for (size_t i = 0; i < size; ++i) { |
179 | 4 | Status st = parse_json(col_from.get_data_at(i), *col_to); |
180 | 4 | vec_null_map_to[i] = !st.ok(); |
181 | 4 | if (!st.ok()) [[unlikely]] { |
182 | 2 | col_to->insert_default(); |
183 | 2 | } |
184 | 4 | } |
185 | 4 | column_result = ColumnNullable::create(std::move(col_to), std::move(col_null)); |
186 | 4 | return Status::OK(); |
187 | 4 | } |
188 | | |
189 | | // in both strict or non-strict mode, the return type is nullable column |
190 | | static Status execute_strict(const ColumnString& col_from, const NullMap::value_type* null_map, |
191 | 0 | size_t size, ColumnPtr& column_result) { |
192 | 0 | auto col_to = ColumnString::create(); |
193 | 0 | for (size_t i = 0; i < size; ++i) { |
194 | 0 | if (null_map && null_map[i]) { |
195 | 0 | col_to->insert_default(); |
196 | 0 | continue; |
197 | 0 | } |
198 | 0 | RETURN_IF_ERROR(parse_json(col_from.get_data_at(i), *col_to)); |
199 | 0 | } |
200 | 0 | column_result = ColumnNullable::create(std::move(col_to), ColumnBool::create(size, 0)); |
201 | 0 | return Status::OK(); |
202 | 0 | } |
203 | | |
204 | | static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
205 | | uint32_t result, size_t input_rows_count, |
206 | 4 | const NullMap::value_type* null_map) { |
207 | 4 | const auto& col_from = |
208 | 4 | assert_cast<const ColumnString&>(*block.get_by_position(arguments[0]).column); |
209 | 4 | const auto size = col_from.size(); |
210 | | |
211 | 4 | ColumnPtr column_result; |
212 | 4 | if (context->enable_strict_mode()) { |
213 | 0 | RETURN_IF_ERROR(execute_strict(col_from, null_map, size, column_result)); |
214 | |
|
215 | 4 | } else { |
216 | 4 | RETURN_IF_ERROR(execute_non_strict(col_from, size, column_result)); |
217 | 4 | } |
218 | 4 | block.get_by_position(result).column = std::move(column_result); |
219 | | |
220 | 4 | return Status::OK(); |
221 | 4 | } |
222 | | }; |
223 | | |
224 | | // create corresponding jsonb value with type to_type |
225 | | // use jsonb writer to create jsonb value |
226 | | WrapperType create_cast_to_jsonb_wrapper(const DataTypePtr& from_type, const DataTypeJsonb& to_type, |
227 | 48.1k | bool string_as_jsonb_string) { |
228 | | // parse string as jsonb |
229 | 48.1k | if (is_string_type(from_type->get_primitive_type()) && !string_as_jsonb_string) { |
230 | 4 | return ParseJsonbFromString::execute; |
231 | 4 | } |
232 | | |
233 | 48.1k | return [](FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
234 | 48.1k | uint32_t result, size_t input_rows_count, const NullMap::value_type*) { |
235 | | // same as to_json function |
236 | 48.1k | auto to_column = ColumnString::create(); |
237 | 48.1k | auto from_type_serde = block.get_by_position(arguments[0]).type->get_serde(); |
238 | 48.1k | auto from_column = block.get_by_position(arguments[0]).column; |
239 | 48.1k | RETURN_IF_ERROR( |
240 | 48.1k | from_type_serde->serialize_column_to_jsonb_vector(*from_column, *to_column)); |
241 | 48.1k | block.get_by_position(result).column = std::move(to_column); |
242 | 48.1k | return Status::OK(); |
243 | 48.1k | }; |
244 | 48.1k | } |
245 | | } // namespace doris::CastWrapper |