Coverage Report

Created: 2026-03-14 13:34

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/util/json/simd_json_parser.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
// This file is copied from
18
// https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/JSONParsers/SimdJSONParser.h
19
// and modified by Doris
20
21
#pragma once
22
23
#include <rapidjson/document.h>
24
#include <simdjson.h>
25
26
#include "core/types.h"
27
28
namespace doris {
29
30
#include "common/compile_check_begin.h"
31
32
/// This class can be used as an argument for the template class FunctionJSON.
33
/// It provides ability to parse JSONs using simdjson library.
34
class SimdJSONParser {
35
public:
36
    class Array;
37
    class Object;
38
    /// References an element in a JSON document, representing a JSON null, boolean, string, number,
39
    /// array or object.
40
    class Element {
41
    public:
42
1.35M
        ALWAYS_INLINE Element() {} /// NOLINT
43
        ALWAYS_INLINE Element(const simdjson::dom::element& element_)
44
24.6M
                : element(element_) {} /// NOLINT
45
19.8M
        ALWAYS_INLINE bool isInt64() const {
46
19.8M
            return element.type() == simdjson::dom::element_type::INT64;
47
19.8M
        }
48
14.6M
        ALWAYS_INLINE bool isUInt64() const {
49
14.6M
            return element.type() == simdjson::dom::element_type::UINT64;
50
14.6M
        }
51
14.6M
        ALWAYS_INLINE bool isDouble() const {
52
14.6M
            return element.type() == simdjson::dom::element_type::DOUBLE;
53
14.6M
        }
54
11.8M
        ALWAYS_INLINE bool isString() const {
55
11.8M
            return element.type() == simdjson::dom::element_type::STRING;
56
11.8M
        }
57
24.8M
        ALWAYS_INLINE bool isArray() const {
58
24.8M
            return element.type() == simdjson::dom::element_type::ARRAY;
59
24.8M
        }
60
25.7M
        ALWAYS_INLINE bool isObject() const {
61
25.7M
            return element.type() == simdjson::dom::element_type::OBJECT;
62
25.7M
        }
63
20.2M
        ALWAYS_INLINE bool isBool() const {
64
20.2M
            return element.type() == simdjson::dom::element_type::BOOLEAN;
65
20.2M
        }
66
110k
        ALWAYS_INLINE bool isNull() const {
67
110k
            return element.type() == simdjson::dom::element_type::NULL_VALUE;
68
110k
        }
69
5.33M
        ALWAYS_INLINE Int64 getInt64() const { return element.get_int64().value_unsafe(); }
70
2.91M
        ALWAYS_INLINE double getDouble() const { return element.get_double().value_unsafe(); }
71
358k
        ALWAYS_INLINE bool getBool() const { return element.get_bool().value_unsafe(); }
72
11.7M
        ALWAYS_INLINE std::string_view getString() const {
73
11.7M
            return element.get_string().value_unsafe();
74
11.7M
        }
75
29
        ALWAYS_INLINE UInt64 getUInt64() const { return element.get_uint64().value_unsafe(); }
76
        ALWAYS_INLINE Array getArray() const;
77
        ALWAYS_INLINE Object getObject() const;
78
79
    private:
80
        simdjson::dom::element element;
81
    };
82
    /// References an array in a JSON document.
83
    class Array {
84
    public:
85
        class Iterator {
86
        public:
87
            ALWAYS_INLINE Iterator(const simdjson::dom::array::iterator& it_)
88
6.61M
                    : it(it_) {} /// NOLINT
89
3.57M
            ALWAYS_INLINE Element operator*() const { return *it; }
90
3.57M
            ALWAYS_INLINE Iterator& operator++() {
91
3.57M
                ++it;
92
3.57M
                return *this;
93
3.57M
            }
94
5.09M
            ALWAYS_INLINE friend bool operator!=(const Iterator& left, const Iterator& right) {
95
5.09M
                return left.it != right.it;
96
5.09M
            }
97
98
        private:
99
            simdjson::dom::array::iterator it;
100
        };
101
1.52M
        ALWAYS_INLINE Array(const simdjson::dom::array& array_) : array(array_) {} /// NOLINT
102
1.52M
        ALWAYS_INLINE Iterator begin() const { return array.begin(); }
103
5.09M
        ALWAYS_INLINE Iterator end() const { return array.end(); }
104
635k
        ALWAYS_INLINE size_t size() const { return array.size(); }
105
0
        ALWAYS_INLINE Element operator[](size_t index) const {
106
0
            assert(index < size());
107
0
            return array.at(index).value_unsafe();
108
0
        }
109
110
    private:
111
        simdjson::dom::array array;
112
    };
113
    using KeyValuePair = std::pair<std::string_view, Element>;
114
    /// References an object in a JSON document.
115
    class Object {
116
    public:
117
        class Iterator {
118
        public:
119
            ALWAYS_INLINE Iterator(const simdjson::dom::object::iterator& it_)
120
24.1M
                    : it(it_) {} /// NOLINT
121
19.8M
            ALWAYS_INLINE KeyValuePair operator*() const {
122
19.8M
                const auto& res = *it;
123
19.8M
                return {res.key, res.value};
124
19.8M
            }
125
20.0M
            ALWAYS_INLINE Iterator& operator++() {
126
20.0M
                ++it;
127
20.0M
                return *this;
128
20.0M
            }
129
0
            ALWAYS_INLINE Iterator operator++(int) {
130
0
                auto res = *this;
131
0
                ++it;
132
0
                return res;
133
0
            } /// NOLINT
134
21.9M
            ALWAYS_INLINE friend bool operator!=(const Iterator& left, const Iterator& right) {
135
21.9M
                return left.it != right.it;
136
21.9M
            }
137
0
            ALWAYS_INLINE friend bool operator==(const Iterator& left, const Iterator& right) {
138
0
                return !(left != right);
139
0
            }
140
141
        private:
142
            simdjson::dom::object::iterator it;
143
        };
144
2.19M
        ALWAYS_INLINE Object(const simdjson::dom::object& object_) : object(object_) {} /// NOLINT
145
2.19M
        ALWAYS_INLINE Iterator begin() const { return object.begin(); }
146
22.0M
        ALWAYS_INLINE Iterator end() const { return object.end(); }
147
4.07M
        ALWAYS_INLINE size_t size() const { return object.size(); }
148
        /// Optional: Provides access to an object's element by index.
149
0
        KeyValuePair operator[](size_t index) const {
150
0
            assert(index < size());
151
0
            auto it = object.begin();
152
0
            while (index--) {
153
0
                ++it;
154
0
            }
155
0
            const auto& res = *it;
156
0
            return {res.key, res.value};
157
0
        }
158
159
    private:
160
        simdjson::dom::object object;
161
    };
162
    /// Parses a JSON document, returns the reference to its root element if succeeded.
163
1.35M
    bool parse(const char* data, size_t size, Element& result) {
164
1.35M
        auto document = parser.parse(data, size);
165
1.35M
        if (document.error()) {
166
660
            return false;
167
660
        }
168
1.35M
        result = document.value_unsafe();
169
1.35M
        return true;
170
1.35M
    }
171
172
private:
173
    simdjson::dom::parser parser;
174
};
175
1.52M
inline ALWAYS_INLINE SimdJSONParser::Array SimdJSONParser::Element::getArray() const {
176
1.52M
    return element.get_array().value_unsafe();
177
1.52M
}
178
2.19M
inline ALWAYS_INLINE SimdJSONParser::Object SimdJSONParser::Element::getObject() const {
179
2.19M
    return element.get_object().value_unsafe();
180
2.19M
}
181
182
#include "common/compile_check_end.h"
183
184
} // namespace doris