Coverage Report

Created: 2026-04-15 19:34

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/util/json/simd_json_parser.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
// This file is copied from
18
// https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/JSONParsers/SimdJSONParser.h
19
// and modified by Doris
20
21
#pragma once
22
23
#include <rapidjson/document.h>
24
#include <simdjson.h>
25
26
#include "core/types.h"
27
28
namespace doris {
29
30
/// This class can be used as an argument for the template class FunctionJSON.
31
/// It provides ability to parse JSONs using simdjson library.
32
class SimdJSONParser {
33
public:
34
    class Array;
35
    class Object;
36
    /// References an element in a JSON document, representing a JSON null, boolean, string, number,
37
    /// array or object.
38
    class Element {
39
    public:
40
81.0k
        ALWAYS_INLINE Element() {} /// NOLINT
41
        ALWAYS_INLINE Element(const simdjson::dom::element& element_)
42
3.15M
                : element(element_) {} /// NOLINT
43
1.86M
        ALWAYS_INLINE bool isInt64() const {
44
1.86M
            return element.type() == simdjson::dom::element_type::INT64;
45
1.86M
        }
46
1.06M
        ALWAYS_INLINE bool isUInt64() const {
47
1.06M
            return element.type() == simdjson::dom::element_type::UINT64;
48
1.06M
        }
49
1.06M
        ALWAYS_INLINE bool isDouble() const {
50
1.06M
            return element.type() == simdjson::dom::element_type::DOUBLE;
51
1.06M
        }
52
944k
        ALWAYS_INLINE bool isString() const {
53
944k
            return element.type() == simdjson::dom::element_type::STRING;
54
944k
        }
55
3.14M
        ALWAYS_INLINE bool isArray() const {
56
3.14M
            return element.type() == simdjson::dom::element_type::ARRAY;
57
3.14M
        }
58
3.21M
        ALWAYS_INLINE bool isObject() const {
59
3.21M
            return element.type() == simdjson::dom::element_type::OBJECT;
60
3.21M
        }
61
1.88M
        ALWAYS_INLINE bool isBool() const {
62
1.88M
            return element.type() == simdjson::dom::element_type::BOOLEAN;
63
1.88M
        }
64
12.0k
        ALWAYS_INLINE bool isNull() const {
65
12.0k
            return element.type() == simdjson::dom::element_type::NULL_VALUE;
66
12.0k
        }
67
804k
        ALWAYS_INLINE Int64 getInt64() const { return element.get_int64().value_unsafe(); }
68
116k
        ALWAYS_INLINE double getDouble() const { return element.get_double().value_unsafe(); }
69
20.7k
        ALWAYS_INLINE bool getBool() const { return element.get_bool().value_unsafe(); }
70
932k
        ALWAYS_INLINE std::string_view getString() const {
71
932k
            return element.get_string().value_unsafe();
72
932k
        }
73
20
        ALWAYS_INLINE UInt64 getUInt64() const { return element.get_uint64().value_unsafe(); }
74
        ALWAYS_INLINE Array getArray() const;
75
        ALWAYS_INLINE Object getObject() const;
76
77
    private:
78
        simdjson::dom::element element;
79
    };
80
    /// References an array in a JSON document.
81
    class Array {
82
    public:
83
        class Iterator {
84
        public:
85
            ALWAYS_INLINE Iterator(const simdjson::dom::array::iterator& it_)
86
1.96M
                    : it(it_) {} /// NOLINT
87
1.64M
            ALWAYS_INLINE Element operator*() const { return *it; }
88
1.64M
            ALWAYS_INLINE Iterator& operator++() {
89
1.64M
                ++it;
90
1.64M
                return *this;
91
1.64M
            }
92
1.80M
            ALWAYS_INLINE friend bool operator!=(const Iterator& left, const Iterator& right) {
93
1.80M
                return left.it != right.it;
94
1.80M
            }
95
96
        private:
97
            simdjson::dom::array::iterator it;
98
        };
99
160k
        ALWAYS_INLINE Array(const simdjson::dom::array& array_) : array(array_) {} /// NOLINT
100
160k
        ALWAYS_INLINE Iterator begin() const { return array.begin(); }
101
1.80M
        ALWAYS_INLINE Iterator end() const { return array.end(); }
102
55.1k
        ALWAYS_INLINE size_t size() const { return array.size(); }
103
0
        ALWAYS_INLINE Element operator[](size_t index) const {
104
0
            assert(index < size());
105
0
            return array.at(index).value_unsafe();
106
0
        }
107
108
    private:
109
        simdjson::dom::array array;
110
    };
111
    using KeyValuePair = std::pair<std::string_view, Element>;
112
    /// References an object in a JSON document.
113
    class Object {
114
    public:
115
        class Iterator {
116
        public:
117
            ALWAYS_INLINE Iterator(const simdjson::dom::object::iterator& it_)
118
1.72M
                    : it(it_) {} /// NOLINT
119
1.42M
            ALWAYS_INLINE KeyValuePair operator*() const {
120
1.42M
                const auto& res = *it;
121
1.42M
                return {res.key, res.value};
122
1.42M
            }
123
1.42M
            ALWAYS_INLINE Iterator& operator++() {
124
1.42M
                ++it;
125
1.42M
                return *this;
126
1.42M
            }
127
0
            ALWAYS_INLINE Iterator operator++(int) {
128
0
                auto res = *this;
129
0
                ++it;
130
0
                return res;
131
0
            } /// NOLINT
132
1.57M
            ALWAYS_INLINE friend bool operator!=(const Iterator& left, const Iterator& right) {
133
1.57M
                return left.it != right.it;
134
1.57M
            }
135
0
            ALWAYS_INLINE friend bool operator==(const Iterator& left, const Iterator& right) {
136
0
                return !(left != right);
137
0
            }
138
139
        private:
140
            simdjson::dom::object::iterator it;
141
        };
142
146k
        ALWAYS_INLINE Object(const simdjson::dom::object& object_) : object(object_) {} /// NOLINT
143
146k
        ALWAYS_INLINE Iterator begin() const { return object.begin(); }
144
1.57M
        ALWAYS_INLINE Iterator end() const { return object.end(); }
145
290k
        ALWAYS_INLINE size_t size() const { return object.size(); }
146
        /// Optional: Provides access to an object's element by index.
147
0
        KeyValuePair operator[](size_t index) const {
148
0
            assert(index < size());
149
0
            auto it = object.begin();
150
0
            while (index--) {
151
0
                ++it;
152
0
            }
153
0
            const auto& res = *it;
154
0
            return {res.key, res.value};
155
0
        }
156
157
    private:
158
        simdjson::dom::object object;
159
    };
160
    /// Parses a JSON document, returns the reference to its root element if succeeded.
161
81.0k
    bool parse(const char* data, size_t size, Element& result) {
162
81.0k
        auto document = parser.parse(data, size);
163
81.0k
        if (document.error()) {
164
13
            return false;
165
13
        }
166
81.0k
        result = document.value_unsafe();
167
81.0k
        return true;
168
81.0k
    }
169
170
private:
171
    simdjson::dom::parser parser;
172
};
173
160k
inline ALWAYS_INLINE SimdJSONParser::Array SimdJSONParser::Element::getArray() const {
174
160k
    return element.get_array().value_unsafe();
175
160k
}
176
146k
inline ALWAYS_INLINE SimdJSONParser::Object SimdJSONParser::Element::getObject() const {
177
146k
    return element.get_object().value_unsafe();
178
146k
}
179
180
} // namespace doris