be/src/exprs/function/function_jsonb_transform.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include <vector> |
19 | | |
20 | | #include "core/data_type/data_type_jsonb.h" |
21 | | #include "core/data_type/primitive_type.h" |
22 | | #include "exprs/function/simple_function_factory.h" |
23 | | #include "util/jsonb_document.h" |
24 | | #include "util/jsonb_document_cast.h" |
25 | | #include "util/jsonb_writer.h" |
26 | | |
27 | | namespace doris { |
28 | | |
29 | | // Sort the keys of the JSON object and deduplicate the repeated keys, keeping the first one |
30 | 0 | void sort_json_object_keys(JsonbWriter& jsonb_writer, const JsonbValue* jsonb_value) { |
31 | 0 | if (jsonb_value->isObject()) { |
32 | 0 | std::vector<std::pair<StringRef, const JsonbValue*>> kvs; |
33 | 0 | const auto* obj_val = jsonb_value->unpack<ObjectVal>(); |
34 | 0 | for (auto it = obj_val->begin(); it != obj_val->end(); ++it) { |
35 | 0 | kvs.emplace_back(StringRef(it->getKeyStr(), it->klen()), it->value()); |
36 | 0 | } |
37 | | // sort by key |
38 | 0 | std::sort(kvs.begin(), kvs.end(), |
39 | 0 | [](const auto& left, const auto& right) { return left.first < right.first; }); |
40 | | // unique by key |
41 | 0 | kvs.erase(std::unique(kvs.begin(), kvs.end(), |
42 | 0 | [](const auto& left, const auto& right) { |
43 | 0 | return left.first == right.first; |
44 | 0 | }), |
45 | 0 | kvs.end()); |
46 | 0 | jsonb_writer.writeStartObject(); |
47 | 0 | for (const auto& kv : kvs) { |
48 | 0 | jsonb_writer.writeKey(kv.first.data, static_cast<uint8_t>(kv.first.size)); |
49 | 0 | sort_json_object_keys(jsonb_writer, kv.second); |
50 | 0 | } |
51 | 0 | jsonb_writer.writeEndObject(); |
52 | 0 | } else if (jsonb_value->isArray()) { |
53 | 0 | const auto* array_val = jsonb_value->unpack<ArrayVal>(); |
54 | 0 | jsonb_writer.writeStartArray(); |
55 | 0 | for (auto it = array_val->begin(); it != array_val->end(); ++it) { |
56 | 0 | sort_json_object_keys(jsonb_writer, &*it); |
57 | 0 | } |
58 | 0 | jsonb_writer.writeEndArray(); |
59 | 0 | } else { |
60 | | // scalar value |
61 | 0 | jsonb_writer.writeValue(jsonb_value); |
62 | 0 | } |
63 | 0 | } |
64 | | |
65 | | // Convert all numeric types in JSON to double type |
66 | 0 | void normalize_json_numbers_to_double(JsonbWriter& jsonb_writer, const JsonbValue* jsonb_value) { |
67 | 0 | if (jsonb_value->isObject()) { |
68 | 0 | jsonb_writer.writeStartObject(); |
69 | 0 | const auto* obj_val = jsonb_value->unpack<ObjectVal>(); |
70 | 0 | for (auto it = obj_val->begin(); it != obj_val->end(); ++it) { |
71 | 0 | jsonb_writer.writeKey(it->getKeyStr(), it->klen()); |
72 | 0 | normalize_json_numbers_to_double(jsonb_writer, it->value()); |
73 | 0 | } |
74 | 0 | jsonb_writer.writeEndObject(); |
75 | 0 | } else if (jsonb_value->isArray()) { |
76 | 0 | const auto* array_val = jsonb_value->unpack<ArrayVal>(); |
77 | 0 | jsonb_writer.writeStartArray(); |
78 | 0 | for (auto it = array_val->begin(); it != array_val->end(); ++it) { |
79 | 0 | normalize_json_numbers_to_double(jsonb_writer, &*it); |
80 | 0 | } |
81 | 0 | jsonb_writer.writeEndArray(); |
82 | 0 | } else { |
83 | | // scalar value |
84 | 0 | if (jsonb_value->isInt() || jsonb_value->isFloat() || jsonb_value->isDouble() || |
85 | 0 | jsonb_value->isDecimal()) { |
86 | 0 | double to; |
87 | 0 | CastParameters params; |
88 | 0 | params.is_strict = false; |
89 | 0 | JsonbCast::cast_from_json_to_float(jsonb_value, to, params); |
90 | 0 | NormalizeFloat(to); |
91 | 0 | jsonb_writer.writeDouble(to); |
92 | 0 | } else { |
93 | 0 | jsonb_writer.writeValue(jsonb_value); |
94 | 0 | } |
95 | 0 | } |
96 | 0 | } |
97 | | |
98 | | // Input jsonb, output jsonb |
99 | | template <typename Impl> |
100 | | class FunctionJsonbTransform : public IFunction { |
101 | | public: |
102 | | static constexpr auto name = Impl::name; |
103 | | |
104 | 4 | static FunctionPtr create() { return std::make_shared<FunctionJsonbTransform>(); }_ZN5doris22FunctionJsonbTransformINS_18SortJsonObjectKeysEE6createEv Line | Count | Source | 104 | 2 | static FunctionPtr create() { return std::make_shared<FunctionJsonbTransform>(); } |
_ZN5doris22FunctionJsonbTransformINS_28NormalizeJsonNumbersToDoubleEE6createEv Line | Count | Source | 104 | 2 | static FunctionPtr create() { return std::make_shared<FunctionJsonbTransform>(); } |
|
105 | | |
106 | 2 | String get_name() const override { return name; }_ZNK5doris22FunctionJsonbTransformINS_18SortJsonObjectKeysEE8get_nameB5cxx11Ev Line | Count | Source | 106 | 1 | String get_name() const override { return name; } |
_ZNK5doris22FunctionJsonbTransformINS_28NormalizeJsonNumbersToDoubleEE8get_nameB5cxx11Ev Line | Count | Source | 106 | 1 | String get_name() const override { return name; } |
|
107 | | |
108 | 0 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
109 | 0 | return std::make_shared<DataTypeJsonb>(); |
110 | 0 | } Unexecuted instantiation: _ZNK5doris22FunctionJsonbTransformINS_18SortJsonObjectKeysEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE Unexecuted instantiation: _ZNK5doris22FunctionJsonbTransformINS_28NormalizeJsonNumbersToDoubleEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE |
111 | | |
112 | 0 | size_t get_number_of_arguments() const override { return 1; }Unexecuted instantiation: _ZNK5doris22FunctionJsonbTransformINS_18SortJsonObjectKeysEE23get_number_of_argumentsEv Unexecuted instantiation: _ZNK5doris22FunctionJsonbTransformINS_28NormalizeJsonNumbersToDoubleEE23get_number_of_argumentsEv |
113 | | |
114 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
115 | 0 | uint32_t result, size_t size) const override { |
116 | 0 | auto input_column = block.get_by_position(arguments[0]).column; |
117 | 0 | auto to_column = ColumnString::create(); |
118 | |
|
119 | 0 | const auto& input_jsonb_column = assert_cast<const ColumnString&>(*input_column); |
120 | |
|
121 | 0 | to_column->get_chars().reserve(input_jsonb_column.get_chars().size()); |
122 | 0 | to_column->get_offsets().reserve(input_jsonb_column.get_offsets().size()); |
123 | |
|
124 | 0 | JsonbWriter writer; |
125 | 0 | for (size_t i = 0; i < size; ++i) { |
126 | 0 | StringRef val = input_jsonb_column.get_data_at(i); |
127 | 0 | const JsonbDocument* doc = nullptr; |
128 | 0 | auto st = JsonbDocument::checkAndCreateDocument(val.data, val.size, &doc); |
129 | 0 | if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { |
130 | | // mayby be invalid jsonb, just insert default |
131 | | // invalid jsonb value may be caused by the default null processing |
132 | | // insert empty string |
133 | 0 | to_column->insert_default(); |
134 | 0 | continue; |
135 | 0 | } |
136 | 0 | const JsonbValue* value = doc->getValue(); |
137 | 0 | if (UNLIKELY(!value)) { |
138 | | // mayby be invalid jsonb, just insert default |
139 | | // invalid jsonb value may be caused by the default null processing |
140 | | // insert empty string |
141 | 0 | to_column->insert_default(); |
142 | 0 | continue; |
143 | 0 | } |
144 | | |
145 | 0 | writer.reset(); |
146 | |
|
147 | 0 | Impl::transform(writer, value); |
148 | |
|
149 | 0 | to_column->insert_data(writer.getOutput()->getBuffer(), writer.getOutput()->getSize()); |
150 | 0 | } |
151 | 0 | block.get_by_position(result).column = std::move(to_column); |
152 | 0 | return Status::OK(); |
153 | 0 | } Unexecuted instantiation: _ZNK5doris22FunctionJsonbTransformINS_18SortJsonObjectKeysEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Unexecuted instantiation: _ZNK5doris22FunctionJsonbTransformINS_28NormalizeJsonNumbersToDoubleEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm |
154 | | }; |
155 | | |
156 | | struct SortJsonObjectKeys { |
157 | | static constexpr auto name = "sort_json_object_keys"; |
158 | 0 | static void transform(JsonbWriter& writer, const JsonbValue* value) { |
159 | 0 | sort_json_object_keys(writer, value); |
160 | 0 | } |
161 | | }; |
162 | | |
163 | | struct NormalizeJsonNumbersToDouble { |
164 | | static constexpr auto name = "normalize_json_numbers_to_double"; |
165 | 0 | static void transform(JsonbWriter& writer, const JsonbValue* value) { |
166 | 0 | normalize_json_numbers_to_double(writer, value); |
167 | 0 | } |
168 | | }; |
169 | | |
170 | | using FunctionSortJsonObjectKeys = FunctionJsonbTransform<SortJsonObjectKeys>; |
171 | | using FunctionNormalizeJsonNumbersToDouble = FunctionJsonbTransform<NormalizeJsonNumbersToDouble>; |
172 | | |
173 | 1 | void register_function_json_transform(SimpleFunctionFactory& factory) { |
174 | 1 | factory.register_function<FunctionSortJsonObjectKeys>(); |
175 | 1 | factory.register_function<FunctionNormalizeJsonNumbersToDouble>(); |
176 | | |
177 | 1 | factory.register_alias(FunctionSortJsonObjectKeys::name, "sort_jsonb_object_keys"); |
178 | 1 | factory.register_alias(FunctionNormalizeJsonNumbersToDouble::name, |
179 | 1 | "normalize_jsonb_numbers_to_double"); |
180 | 1 | } |
181 | | |
182 | | } // namespace doris |