be/src/util/json/simd_json_parser.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | // This file is copied from |
18 | | // https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/JSONParsers/SimdJSONParser.h |
19 | | // and modified by Doris |
20 | | |
21 | | #pragma once |
22 | | |
23 | | #include <rapidjson/document.h> |
24 | | #include <simdjson.h> |
25 | | |
26 | | #include "core/types.h" |
27 | | |
28 | | namespace doris { |
29 | | |
30 | | #include "common/compile_check_begin.h" |
31 | | |
32 | | /// This class can be used as an argument for the template class FunctionJSON. |
33 | | /// It provides ability to parse JSONs using simdjson library. |
34 | | class SimdJSONParser { |
35 | | public: |
36 | | class Array; |
37 | | class Object; |
38 | | /// References an element in a JSON document, representing a JSON null, boolean, string, number, |
39 | | /// array or object. |
40 | | class Element { |
41 | | public: |
42 | 1.35M | ALWAYS_INLINE Element() {} /// NOLINT |
43 | | ALWAYS_INLINE Element(const simdjson::dom::element& element_) |
44 | 24.6M | : element(element_) {} /// NOLINT |
45 | 19.8M | ALWAYS_INLINE bool isInt64() const { |
46 | 19.8M | return element.type() == simdjson::dom::element_type::INT64; |
47 | 19.8M | } |
48 | 14.6M | ALWAYS_INLINE bool isUInt64() const { |
49 | 14.6M | return element.type() == simdjson::dom::element_type::UINT64; |
50 | 14.6M | } |
51 | 14.6M | ALWAYS_INLINE bool isDouble() const { |
52 | 14.6M | return element.type() == simdjson::dom::element_type::DOUBLE; |
53 | 14.6M | } |
54 | 11.8M | ALWAYS_INLINE bool isString() const { |
55 | 11.8M | return element.type() == simdjson::dom::element_type::STRING; |
56 | 11.8M | } |
57 | 24.8M | ALWAYS_INLINE bool isArray() const { |
58 | 24.8M | return element.type() == simdjson::dom::element_type::ARRAY; |
59 | 24.8M | } |
60 | 25.7M | ALWAYS_INLINE bool isObject() const { |
61 | 25.7M | return element.type() == simdjson::dom::element_type::OBJECT; |
62 | 25.7M | } |
63 | 20.2M | ALWAYS_INLINE bool isBool() const { |
64 | 20.2M | return element.type() == simdjson::dom::element_type::BOOLEAN; |
65 | 20.2M | } |
66 | 110k | ALWAYS_INLINE bool isNull() const { |
67 | 110k | return element.type() == simdjson::dom::element_type::NULL_VALUE; |
68 | 110k | } |
69 | 5.33M | ALWAYS_INLINE Int64 getInt64() const { return element.get_int64().value_unsafe(); } |
70 | 2.91M | ALWAYS_INLINE double getDouble() const { return element.get_double().value_unsafe(); } |
71 | 358k | ALWAYS_INLINE bool getBool() const { return element.get_bool().value_unsafe(); } |
72 | 11.7M | ALWAYS_INLINE std::string_view getString() const { |
73 | 11.7M | return element.get_string().value_unsafe(); |
74 | 11.7M | } |
75 | 29 | ALWAYS_INLINE UInt64 getUInt64() const { return element.get_uint64().value_unsafe(); } |
76 | | ALWAYS_INLINE Array getArray() const; |
77 | | ALWAYS_INLINE Object getObject() const; |
78 | | |
79 | | private: |
80 | | simdjson::dom::element element; |
81 | | }; |
82 | | /// References an array in a JSON document. |
83 | | class Array { |
84 | | public: |
85 | | class Iterator { |
86 | | public: |
87 | | ALWAYS_INLINE Iterator(const simdjson::dom::array::iterator& it_) |
88 | 6.61M | : it(it_) {} /// NOLINT |
89 | 3.57M | ALWAYS_INLINE Element operator*() const { return *it; } |
90 | 3.57M | ALWAYS_INLINE Iterator& operator++() { |
91 | 3.57M | ++it; |
92 | 3.57M | return *this; |
93 | 3.57M | } |
94 | 5.09M | ALWAYS_INLINE friend bool operator!=(const Iterator& left, const Iterator& right) { |
95 | 5.09M | return left.it != right.it; |
96 | 5.09M | } |
97 | | |
98 | | private: |
99 | | simdjson::dom::array::iterator it; |
100 | | }; |
101 | 1.52M | ALWAYS_INLINE Array(const simdjson::dom::array& array_) : array(array_) {} /// NOLINT |
102 | 1.52M | ALWAYS_INLINE Iterator begin() const { return array.begin(); } |
103 | 5.09M | ALWAYS_INLINE Iterator end() const { return array.end(); } |
104 | 635k | ALWAYS_INLINE size_t size() const { return array.size(); } |
105 | 0 | ALWAYS_INLINE Element operator[](size_t index) const { |
106 | 0 | assert(index < size()); |
107 | 0 | return array.at(index).value_unsafe(); |
108 | 0 | } |
109 | | |
110 | | private: |
111 | | simdjson::dom::array array; |
112 | | }; |
113 | | using KeyValuePair = std::pair<std::string_view, Element>; |
114 | | /// References an object in a JSON document. |
115 | | class Object { |
116 | | public: |
117 | | class Iterator { |
118 | | public: |
119 | | ALWAYS_INLINE Iterator(const simdjson::dom::object::iterator& it_) |
120 | 24.1M | : it(it_) {} /// NOLINT |
121 | 19.8M | ALWAYS_INLINE KeyValuePair operator*() const { |
122 | 19.8M | const auto& res = *it; |
123 | 19.8M | return {res.key, res.value}; |
124 | 19.8M | } |
125 | 20.0M | ALWAYS_INLINE Iterator& operator++() { |
126 | 20.0M | ++it; |
127 | 20.0M | return *this; |
128 | 20.0M | } |
129 | 0 | ALWAYS_INLINE Iterator operator++(int) { |
130 | 0 | auto res = *this; |
131 | 0 | ++it; |
132 | 0 | return res; |
133 | 0 | } /// NOLINT |
134 | 21.9M | ALWAYS_INLINE friend bool operator!=(const Iterator& left, const Iterator& right) { |
135 | 21.9M | return left.it != right.it; |
136 | 21.9M | } |
137 | 0 | ALWAYS_INLINE friend bool operator==(const Iterator& left, const Iterator& right) { |
138 | 0 | return !(left != right); |
139 | 0 | } |
140 | | |
141 | | private: |
142 | | simdjson::dom::object::iterator it; |
143 | | }; |
144 | 2.19M | ALWAYS_INLINE Object(const simdjson::dom::object& object_) : object(object_) {} /// NOLINT |
145 | 2.19M | ALWAYS_INLINE Iterator begin() const { return object.begin(); } |
146 | 22.0M | ALWAYS_INLINE Iterator end() const { return object.end(); } |
147 | 4.07M | ALWAYS_INLINE size_t size() const { return object.size(); } |
148 | | /// Optional: Provides access to an object's element by index. |
149 | 0 | KeyValuePair operator[](size_t index) const { |
150 | 0 | assert(index < size()); |
151 | 0 | auto it = object.begin(); |
152 | 0 | while (index--) { |
153 | 0 | ++it; |
154 | 0 | } |
155 | 0 | const auto& res = *it; |
156 | 0 | return {res.key, res.value}; |
157 | 0 | } |
158 | | |
159 | | private: |
160 | | simdjson::dom::object object; |
161 | | }; |
162 | | /// Parses a JSON document, returns the reference to its root element if succeeded. |
163 | 1.35M | bool parse(const char* data, size_t size, Element& result) { |
164 | 1.35M | auto document = parser.parse(data, size); |
165 | 1.35M | if (document.error()) { |
166 | 660 | return false; |
167 | 660 | } |
168 | 1.35M | result = document.value_unsafe(); |
169 | 1.35M | return true; |
170 | 1.35M | } |
171 | | |
172 | | private: |
173 | | simdjson::dom::parser parser; |
174 | | }; |
175 | 1.52M | inline ALWAYS_INLINE SimdJSONParser::Array SimdJSONParser::Element::getArray() const { |
176 | 1.52M | return element.get_array().value_unsafe(); |
177 | 1.52M | } |
178 | 2.19M | inline ALWAYS_INLINE SimdJSONParser::Object SimdJSONParser::Element::getObject() const { |
179 | 2.19M | return element.get_object().value_unsafe(); |
180 | 2.19M | } |
181 | | |
182 | | #include "common/compile_check_end.h" |
183 | | |
184 | | } // namespace doris |