Coverage Report

Created: 2026-04-14 12:18

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/util/json/simd_json_parser.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
// This file is copied from
18
// https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/JSONParsers/SimdJSONParser.h
19
// and modified by Doris
20
21
#pragma once
22
23
#include <rapidjson/document.h>
24
#include <simdjson.h>
25
26
#include "core/types.h"
27
28
namespace doris {
29
30
/// This class can be used as an argument for the template class FunctionJSON.
31
/// It provides ability to parse JSONs using simdjson library.
32
class SimdJSONParser {
33
public:
34
    class Array;
35
    class Object;
36
    /// References an element in a JSON document, representing a JSON null, boolean, string, number,
37
    /// array or object.
38
    class Element {
39
    public:
40
1.35M
        ALWAYS_INLINE Element() {} /// NOLINT
41
        ALWAYS_INLINE Element(const simdjson::dom::element& element_)
42
25.0M
                : element(element_) {} /// NOLINT
43
19.6M
        ALWAYS_INLINE bool isInt64() const {
44
19.6M
            return element.type() == simdjson::dom::element_type::INT64;
45
19.6M
        }
46
14.5M
        ALWAYS_INLINE bool isUInt64() const {
47
14.5M
            return element.type() == simdjson::dom::element_type::UINT64;
48
14.5M
        }
49
14.5M
        ALWAYS_INLINE bool isDouble() const {
50
14.5M
            return element.type() == simdjson::dom::element_type::DOUBLE;
51
14.5M
        }
52
11.7M
        ALWAYS_INLINE bool isString() const {
53
11.7M
            return element.type() == simdjson::dom::element_type::STRING;
54
11.7M
        }
55
24.8M
        ALWAYS_INLINE bool isArray() const {
56
24.8M
            return element.type() == simdjson::dom::element_type::ARRAY;
57
24.8M
        }
58
25.7M
        ALWAYS_INLINE bool isObject() const {
59
25.7M
            return element.type() == simdjson::dom::element_type::OBJECT;
60
25.7M
        }
61
19.9M
        ALWAYS_INLINE bool isBool() const {
62
19.9M
            return element.type() == simdjson::dom::element_type::BOOLEAN;
63
19.9M
        }
64
110k
        ALWAYS_INLINE bool isNull() const {
65
110k
            return element.type() == simdjson::dom::element_type::NULL_VALUE;
66
110k
        }
67
5.31M
        ALWAYS_INLINE Int64 getInt64() const { return element.get_int64().value_unsafe(); }
68
2.89M
        ALWAYS_INLINE double getDouble() const { return element.get_double().value_unsafe(); }
69
358k
        ALWAYS_INLINE bool getBool() const { return element.get_bool().value_unsafe(); }
70
11.6M
        ALWAYS_INLINE std::string_view getString() const {
71
11.6M
            return element.get_string().value_unsafe();
72
11.6M
        }
73
29
        ALWAYS_INLINE UInt64 getUInt64() const { return element.get_uint64().value_unsafe(); }
74
        ALWAYS_INLINE Array getArray() const;
75
        ALWAYS_INLINE Object getObject() const;
76
77
    private:
78
        simdjson::dom::element element;
79
    };
80
    /// References an array in a JSON document.
81
    class Array {
82
    public:
83
        class Iterator {
84
        public:
85
            ALWAYS_INLINE Iterator(const simdjson::dom::array::iterator& it_)
86
6.60M
                    : it(it_) {} /// NOLINT
87
3.56M
            ALWAYS_INLINE Element operator*() const { return *it; }
88
3.57M
            ALWAYS_INLINE Iterator& operator++() {
89
3.57M
                ++it;
90
3.57M
                return *this;
91
3.57M
            }
92
5.08M
            ALWAYS_INLINE friend bool operator!=(const Iterator& left, const Iterator& right) {
93
5.08M
                return left.it != right.it;
94
5.08M
            }
95
96
        private:
97
            simdjson::dom::array::iterator it;
98
        };
99
1.52M
        ALWAYS_INLINE Array(const simdjson::dom::array& array_) : array(array_) {} /// NOLINT
100
1.52M
        ALWAYS_INLINE Iterator begin() const { return array.begin(); }
101
5.08M
        ALWAYS_INLINE Iterator end() const { return array.end(); }
102
635k
        ALWAYS_INLINE size_t size() const { return array.size(); }
103
0
        ALWAYS_INLINE Element operator[](size_t index) const {
104
0
            assert(index < size());
105
0
            return array.at(index).value_unsafe();
106
0
        }
107
108
    private:
109
        simdjson::dom::array array;
110
    };
111
    using KeyValuePair = std::pair<std::string_view, Element>;
112
    /// References an object in a JSON document.
113
    class Object {
114
    public:
115
        class Iterator {
116
        public:
117
            ALWAYS_INLINE Iterator(const simdjson::dom::object::iterator& it_)
118
24.5M
                    : it(it_) {} /// NOLINT
119
20.2M
            ALWAYS_INLINE KeyValuePair operator*() const {
120
20.2M
                const auto& res = *it;
121
20.2M
                return {res.key, res.value};
122
20.2M
            }
123
20.2M
            ALWAYS_INLINE Iterator& operator++() {
124
20.2M
                ++it;
125
20.2M
                return *this;
126
20.2M
            }
127
0
            ALWAYS_INLINE Iterator operator++(int) {
128
0
                auto res = *this;
129
0
                ++it;
130
0
                return res;
131
0
            } /// NOLINT
132
22.3M
            ALWAYS_INLINE friend bool operator!=(const Iterator& left, const Iterator& right) {
133
22.3M
                return left.it != right.it;
134
22.3M
            }
135
0
            ALWAYS_INLINE friend bool operator==(const Iterator& left, const Iterator& right) {
136
0
                return !(left != right);
137
0
            }
138
139
        private:
140
            simdjson::dom::object::iterator it;
141
        };
142
2.19M
        ALWAYS_INLINE Object(const simdjson::dom::object& object_) : object(object_) {} /// NOLINT
143
2.19M
        ALWAYS_INLINE Iterator begin() const { return object.begin(); }
144
22.4M
        ALWAYS_INLINE Iterator end() const { return object.end(); }
145
4.06M
        ALWAYS_INLINE size_t size() const { return object.size(); }
146
        /// Optional: Provides access to an object's element by index.
147
0
        KeyValuePair operator[](size_t index) const {
148
0
            assert(index < size());
149
0
            auto it = object.begin();
150
0
            while (index--) {
151
0
                ++it;
152
0
            }
153
0
            const auto& res = *it;
154
0
            return {res.key, res.value};
155
0
        }
156
157
    private:
158
        simdjson::dom::object object;
159
    };
160
    /// Parses a JSON document, returns the reference to its root element if succeeded.
161
1.35M
    bool parse(const char* data, size_t size, Element& result) {
162
1.35M
        auto document = parser.parse(data, size);
163
1.35M
        if (document.error()) {
164
666
            return false;
165
666
        }
166
1.35M
        result = document.value_unsafe();
167
1.35M
        return true;
168
1.35M
    }
169
170
private:
171
    simdjson::dom::parser parser;
172
};
173
1.52M
inline ALWAYS_INLINE SimdJSONParser::Array SimdJSONParser::Element::getArray() const {
174
1.52M
    return element.get_array().value_unsafe();
175
1.52M
}
176
2.19M
inline ALWAYS_INLINE SimdJSONParser::Object SimdJSONParser::Element::getObject() const {
177
2.19M
    return element.get_object().value_unsafe();
178
2.19M
}
179
180
} // namespace doris