/root/doris/be/src/util/jsonb_parser.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2014, Facebook, Inc. |
3 | | * All rights reserved. |
4 | | * |
5 | | * This source code is licensed under the BSD-style license found in the |
6 | | * LICENSE file in the root directory of this source tree. An additional grant |
7 | | * of patent rights can be found in the PATENTS file in the same directory. |
8 | | * |
9 | | */ |
10 | | |
11 | | /* |
12 | | * This file defines JsonbParserT (template) and JsonbParser. |
13 | | * |
14 | | * JsonbParserT is a template class which implements a JSON parser. |
15 | | * JsonbParserT parses JSON text, and serialize it to JSONB binary format |
16 | | * by using JsonbWriterT object. By default, JsonbParserT creates a new |
17 | | * JsonbWriterT object with an output stream object. However, you can also |
18 | | * pass in your JsonbWriterT or any stream object that implements some basic |
19 | | * interface of std::ostream (see JsonbStream.h). |
20 | | * |
21 | | * JsonbParser specializes JsonbParserT with JsonbOutStream type (see |
22 | | * JsonbStream.h). So unless you want to provide own a different output stream |
23 | | * type, use JsonbParser object. |
24 | | * |
25 | | * ** Parsing JSON ** |
26 | | * JsonbParserT parses JSON string, and directly serializes into JSONB |
27 | | * packed bytes. There are three ways to parse a JSON string: (1) using |
28 | | * c-string, (2) using string with len, (3) using std::istream object. You can |
29 | | * use custom streambuf to redirect output. JsonbOutBuffer is a streambuf used |
30 | | * internally if the input is raw character buffer. |
31 | | * |
32 | | * You can reuse an JsonbParserT object to parse/serialize multiple JSON |
33 | | * strings, and the previous JSONB will be overwritten. |
34 | | * |
35 | | * If parsing fails (returned false), the error code will be set to one of |
36 | | * JsonbErrType, and can be retrieved by calling getErrorCode(). |
37 | | * |
38 | | * ** External dictionary ** |
39 | | * During parsing a JSON string, you can pass a call-back function to map a key |
40 | | * string to an id, and store the dictionary id in JSONB to save space. The |
41 | | * purpose of using an external dictionary is more towards a collection of |
42 | | * documents (which has common keys) rather than a single document, so that |
43 | | * space saving will be significant. |
44 | | * |
45 | | * ** Endianness ** |
46 | | * Note: JSONB serialization doesn't assume endianness of the server. However |
47 | | * you will need to ensure that the endianness at the reader side is the same |
48 | | * as that at the writer side (if they are on different machines). Otherwise, |
49 | | * proper conversion is needed when a number value is returned to the |
50 | | * caller/writer. |
51 | | * |
52 | | * @author Tian Xia <tianx@fb.com> |
53 | | * |
54 | | * this file is copied from |
55 | | * https://github.com/facebook/mysql-5.6/blob/fb-mysql-5.6.35/fbson/FbsonJsonParser.h |
56 | | * and modified by Doris |
57 | | */ |
58 | | |
59 | | #ifndef JSONB_JSONBJSONPARSER_H |
60 | | #define JSONB_JSONBJSONPARSER_H |
61 | | |
62 | | #include <cmath> |
63 | | #include <limits> |
64 | | |
65 | | #include "jsonb_document.h" |
66 | | #include "jsonb_error.h" |
67 | | #include "jsonb_writer.h" |
68 | | #include "string_parser.hpp" |
69 | | |
70 | | namespace doris { |
71 | | |
72 | | const char* const kJsonDelim = " ,]}\t\r\n"; |
73 | | const char* const kWhiteSpace = " \t\n\r"; |
74 | | |
75 | | /* |
76 | | * Template JsonbParserT |
77 | | */ |
78 | | template <class OS_TYPE> |
79 | | class JsonbParserT { |
80 | | public: |
81 | 87.4k | JsonbParserT() : stream_pos_(0), err_(JsonbErrType::E_NONE) {} |
82 | | |
83 | | explicit JsonbParserT(OS_TYPE& os) : writer_(os), stream_pos_(0), err_(JsonbErrType::E_NONE) {} |
84 | | |
85 | | // parse a UTF-8 JSON string |
86 | | bool parse(const std::string& str, hDictInsert handler = nullptr) { |
87 | | return parse(str.c_str(), (unsigned int)str.size(), handler); |
88 | | } |
89 | | |
90 | | // parse a UTF-8 JSON c-style string (NULL terminated) |
91 | | bool parse(const char* c_str, hDictInsert handler = nullptr) { |
92 | | return parse(c_str, (unsigned int)strlen(c_str), handler); |
93 | | } |
94 | | |
95 | | // parse a UTF-8 JSON string with length |
96 | 87.6k | bool parse(const char* pch, unsigned int len, hDictInsert handler = nullptr) { |
97 | 87.6k | if (!pch || len == 0) { Branch (97:13): [True: 0, False: 87.6k]
Branch (97:21): [True: 0, False: 87.6k]
|
98 | 0 | err_ = JsonbErrType::E_EMPTY_DOCUMENT; |
99 | 0 | return false; |
100 | 0 | } |
101 | | |
102 | 87.6k | JsonbInBuffer sb(pch, len); |
103 | 87.6k | std::istream in(&sb); |
104 | 87.6k | return parse(in, handler); |
105 | 87.6k | } |
106 | | |
107 | | // parse UTF-8 JSON text from an input stream |
108 | 87.6k | bool parse(std::istream& in, hDictInsert handler = nullptr) { |
109 | 87.6k | bool res = false; |
110 | 87.6k | err_ = JsonbErrType::E_NONE; |
111 | 87.6k | stream_pos_ = 0; |
112 | | |
113 | | // reset output stream |
114 | 87.6k | writer_.reset(); |
115 | | |
116 | 87.6k | trim(in); |
117 | | |
118 | | // TODO(wzy): parsePrimitive should be implemented |
119 | 87.6k | if (in.peek() == '{') { Branch (119:13): [True: 490, False: 87.1k]
|
120 | 490 | skipChar(in); |
121 | 490 | res = parseObject(in, handler); |
122 | 87.1k | } else if (in.peek() == '[') { Branch (122:20): [True: 86.1k, False: 1.02k]
|
123 | 86.1k | skipChar(in); |
124 | 86.1k | res = parseArray(in, handler); |
125 | 86.1k | } else { |
126 | 1.02k | res = parsePrimitive(in, handler); |
127 | 1.02k | if (!res) err_ = handle_parse_failure(in); Branch (127:17): [True: 59, False: 964]
|
128 | 1.02k | } |
129 | | |
130 | 87.6k | trim(in); |
131 | 87.6k | if (res && !in.eof()) { Branch (131:13): [True: 86.7k, False: 868]
Branch (131:20): [True: 0, False: 86.7k]
|
132 | 0 | err_ = JsonbErrType::E_INVALID_DOCU; |
133 | 0 | return false; |
134 | 0 | } |
135 | | |
136 | 87.6k | return res; |
137 | 87.6k | } |
138 | | |
139 | 173k | JsonbWriterT<OS_TYPE>& getWriter() { return writer_; } |
140 | | |
141 | 70 | JsonbErrType getErrorCode() { return err_; } |
142 | | |
143 | | JsonbErrInfo getErrorInfo() { |
144 | | assert(err_ < JsonbErrType::E_NUM_ERRORS); |
145 | | |
146 | | JsonbErrInfo err_info; |
147 | | |
148 | | // stream_pos_ always points to the next char, so err_pos is 1-based |
149 | | err_info.err_pos = stream_pos_; |
150 | | err_info.err_msg = JsonbErrMsg::getErrMsg(err_); |
151 | | |
152 | | return err_info; |
153 | | } |
154 | | |
155 | | // clear error code |
156 | | void clearErr() { err_ = JsonbErrType::E_NONE; } |
157 | | |
158 | | private: |
159 | 0 | JsonbErrType handle_parse_value_failure(bool parse_res, std::istream& in) { |
160 | 0 | if (parse_res) { Branch (160:13): [True: 0, False: 0]
|
161 | 0 | trim(in); |
162 | 0 | if (!in.good()) { Branch (162:17): [True: 0, False: 0]
|
163 | 0 | return JsonbErrType::E_INVALID_DOCU_COMPAT; |
164 | 0 | } |
165 | 0 | } |
166 | 0 | return JsonbErrType::E_INVALID_DOCU; |
167 | 0 | ; |
168 | 0 | } |
169 | | |
170 | | // In case json is determined to be invalid at top level, |
171 | | // try to parse literal values. |
172 | | // We return a different error code E_INVALID_DOCU_COMPAT |
173 | | // in case the input json contains these values. |
174 | | // Returning a different error code will cause an |
175 | | // auditing on the caller. |
176 | | // This is mainly done because 8.0 JSON_VALID considers |
177 | | // this as a valid input. |
178 | 59 | JsonbErrType handle_parse_failure(std::istream& in) { |
179 | 59 | JsonbErrType error = JsonbErrType::E_INVALID_DOCU; |
180 | 59 | if (!writer_.writeStartArray()) { Branch (180:13): [True: 0, False: 59]
|
181 | 0 | return error; |
182 | 0 | } |
183 | | |
184 | 59 | switch (in.peek()) { |
185 | 0 | case 'n': Branch (185:9): [True: 0, False: 59]
|
186 | 0 | skipChar(in); |
187 | 0 | error = handle_parse_value_failure(parseNull(in), in); |
188 | 0 | break; |
189 | 0 | case 't': Branch (189:9): [True: 0, False: 59]
|
190 | 0 | skipChar(in); |
191 | 0 | error = handle_parse_value_failure(parseTrue(in), in); |
192 | 0 | break; |
193 | 0 | case 'f': Branch (193:9): [True: 0, False: 59]
|
194 | 0 | skipChar(in); |
195 | 0 | error = handle_parse_value_failure(parseFalse(in), in); |
196 | 0 | break; |
197 | 0 | case '"': Branch (197:9): [True: 0, False: 59]
|
198 | 0 | skipChar(in); |
199 | 0 | error = handle_parse_value_failure(parseString(in), in); |
200 | 0 | break; |
201 | 59 | default: Branch (201:9): [True: 59, False: 0]
|
202 | 59 | if (parseNumber(in)) { Branch (202:17): [True: 0, False: 59]
|
203 | 0 | trim(in); |
204 | 0 | if (in.eof()) { Branch (204:21): [True: 0, False: 0]
|
205 | 0 | error = JsonbErrType::E_INVALID_DOCU_COMPAT; |
206 | 0 | } |
207 | 0 | } |
208 | 59 | } |
209 | 59 | if (!writer_.writeEndArray()) { Branch (209:13): [True: 0, False: 59]
|
210 | 0 | return error; |
211 | 0 | } |
212 | | |
213 | 59 | return error; |
214 | 59 | } |
215 | | |
216 | | // parse primitive |
217 | 1.02k | bool parsePrimitive(std::istream& in, hDictInsert handler) { |
218 | 1.02k | bool res = false; |
219 | 1.02k | switch (in.peek()) { |
220 | 63 | case 'n': Branch (220:9): [True: 63, False: 960]
|
221 | 63 | skipChar(in); |
222 | 63 | res = parseNull(in); |
223 | 63 | break; |
224 | 63 | case 't': Branch (224:9): [True: 63, False: 960]
|
225 | 63 | skipChar(in); |
226 | 63 | res = parseTrue(in); |
227 | 63 | break; |
228 | 63 | case 'f': Branch (228:9): [True: 63, False: 960]
|
229 | 63 | skipChar(in); |
230 | 63 | res = parseFalse(in); |
231 | 63 | break; |
232 | 335 | case '"': Branch (232:9): [True: 335, False: 688]
|
233 | 335 | skipChar(in); |
234 | 335 | res = parseString(in); |
235 | 335 | break; |
236 | 499 | default: Branch (236:9): [True: 499, False: 524]
|
237 | 499 | res = parseNumber(in); |
238 | 1.02k | } |
239 | | |
240 | 1.02k | return res; |
241 | 1.02k | } |
242 | | |
243 | | // parse a JSON object (comma-separated list of key-value pairs) |
244 | 15.1k | bool parseObject(std::istream& in, hDictInsert handler) { |
245 | 15.1k | if (!writer_.writeStartObject()) { Branch (245:13): [True: 0, False: 15.1k]
|
246 | 0 | err_ = JsonbErrType::E_OUTPUT_FAIL; |
247 | 0 | return false; |
248 | 0 | } |
249 | | |
250 | 15.1k | trim(in); |
251 | | |
252 | 15.1k | if (in.peek() == '}') { Branch (252:13): [True: 446, False: 14.7k]
|
253 | 446 | skipChar(in); |
254 | | // empty object |
255 | 446 | if (!writer_.writeEndObject()) { Branch (255:17): [True: 0, False: 446]
|
256 | 0 | err_ = JsonbErrType::E_OUTPUT_FAIL; |
257 | 0 | return false; |
258 | 0 | } |
259 | 446 | return true; |
260 | 446 | } |
261 | | |
262 | 16.6k | while (in.good()) { Branch (262:16): [True: 16.6k, False: 0]
|
263 | 16.6k | if (nextChar(in) != '"') { Branch (263:17): [True: 11, False: 16.6k]
|
264 | 11 | err_ = JsonbErrType::E_INVALID_OBJ; |
265 | 11 | return false; |
266 | 11 | } |
267 | | |
268 | 16.6k | if (!parseKVPair(in, handler)) { Branch (268:17): [True: 9.40k, False: 7.23k]
|
269 | 9.40k | return false; |
270 | 9.40k | } |
271 | | |
272 | 7.23k | trim(in); |
273 | | |
274 | 7.23k | char ch = nextChar(in); |
275 | 7.23k | if (ch == '}') { Branch (275:17): [True: 5.30k, False: 1.93k]
|
276 | | // end of the object |
277 | 5.30k | if (!writer_.writeEndObject()) { Branch (277:21): [True: 0, False: 5.30k]
|
278 | 0 | err_ = JsonbErrType::E_OUTPUT_FAIL; |
279 | 0 | return false; |
280 | 0 | } |
281 | 5.30k | return true; |
282 | 5.30k | } else if (ch != ',') { Branch (282:24): [True: 0, False: 1.93k]
|
283 | 0 | err_ = JsonbErrType::E_INVALID_OBJ; |
284 | 0 | return false; |
285 | 0 | } |
286 | | |
287 | 1.93k | trim(in); |
288 | 1.93k | } |
289 | | |
290 | 0 | err_ = JsonbErrType::E_INVALID_OBJ; |
291 | 0 | return false; |
292 | 14.7k | } |
293 | | |
294 | | // parse a JSON array (comma-separated list of values) |
295 | 116k | bool parseArray(std::istream& in, hDictInsert handler) { |
296 | 116k | if (!writer_.writeStartArray()) { Branch (296:13): [True: 0, False: 116k]
|
297 | 0 | err_ = JsonbErrType::E_OUTPUT_FAIL; |
298 | 0 | return false; |
299 | 0 | } |
300 | | |
301 | 116k | trim(in); |
302 | | |
303 | 116k | if (in.peek() == ']') { Branch (303:13): [True: 986, False: 115k]
|
304 | 986 | skipChar(in); |
305 | | // empty array |
306 | 986 | if (!writer_.writeEndArray()) { Branch (306:17): [True: 0, False: 986]
|
307 | 0 | err_ = JsonbErrType::E_OUTPUT_FAIL; |
308 | 0 | return false; |
309 | 0 | } |
310 | 986 | return true; |
311 | 986 | } |
312 | | |
313 | 507k | while (in.good()) { Branch (313:16): [True: 507k, False: 0]
|
314 | 507k | if (!parseValue(in, handler)) { Branch (314:17): [True: 859, False: 506k]
|
315 | 859 | return false; |
316 | 859 | } |
317 | | |
318 | 506k | trim(in); |
319 | | |
320 | 506k | char ch = nextChar(in); |
321 | 506k | if (ch == ']') { Branch (321:17): [True: 114k, False: 391k]
|
322 | | // end of the array |
323 | 114k | if (!writer_.writeEndArray()) { Branch (323:21): [True: 0, False: 114k]
|
324 | 0 | err_ = JsonbErrType::E_OUTPUT_FAIL; |
325 | 0 | return false; |
326 | 0 | } |
327 | 114k | return true; |
328 | 391k | } else if (ch != ',') { Branch (328:24): [True: 0, False: 391k]
|
329 | 0 | err_ = JsonbErrType::E_INVALID_ARR; |
330 | 0 | return false; |
331 | 0 | } |
332 | | |
333 | 391k | trim(in); |
334 | 391k | } |
335 | | |
336 | 0 | err_ = JsonbErrType::E_INVALID_ARR; |
337 | 0 | return false; |
338 | 115k | } |
339 | | |
340 | | // parse a key-value pair, separated by ":" |
341 | 16.6k | bool parseKVPair(std::istream& in, hDictInsert handler) { |
342 | 16.6k | if (parseKey(in, handler) && parseValue(in, handler)) { Branch (342:13): [True: 16.4k, False: 185]
Branch (342:38): [True: 7.23k, False: 9.22k]
|
343 | 7.23k | return true; |
344 | 7.23k | } |
345 | | |
346 | 9.40k | return false; |
347 | 16.6k | } |
348 | | |
349 | | // parse a key (must be string) |
350 | 16.6k | bool parseKey(std::istream& in, hDictInsert handler) { |
351 | 16.6k | char key[JsonbKeyValue::sMaxKeyLen]; |
352 | 16.6k | int key_len = 0; |
353 | 109k | while (in.good() && in.peek() != '"' && key_len < JsonbKeyValue::sMaxKeyLen) { Branch (353:16): [True: 109k, False: 0]
Branch (353:29): [True: 92.7k, False: 16.4k]
Branch (353:49): [True: 92.5k, False: 185]
|
354 | 92.5k | char ch = nextChar(in); |
355 | 92.5k | if (ch == '\\') { Branch (355:17): [True: 3.77k, False: 88.7k]
|
356 | 3.77k | char escape_buffer[5]; // buffer for escape |
357 | 3.77k | int len; |
358 | 3.77k | if (!parseEscape(in, escape_buffer, len)) { Branch (358:21): [True: 0, False: 3.77k]
|
359 | 0 | err_ = JsonbErrType::E_INVALID_KEY_STRING; |
360 | 0 | return false; |
361 | 0 | } |
362 | 3.77k | if (key_len + len >= JsonbKeyValue::sMaxKeyLen) { Branch (362:21): [True: 0, False: 3.77k]
|
363 | 0 | err_ = JsonbErrType::E_INVALID_KEY_LENGTH; |
364 | 0 | return false; |
365 | 0 | } |
366 | 3.77k | memcpy(key + key_len, escape_buffer, len); |
367 | 3.77k | key_len += len; |
368 | 88.7k | } else { |
369 | 88.7k | key[key_len++] = ch; |
370 | 88.7k | } |
371 | 92.5k | } |
372 | | // The JSON key can be an empty string. |
373 | 16.6k | if (!in.good() || in.peek() != '"') { Branch (373:13): [True: 0, False: 16.6k]
Branch (373:27): [True: 185, False: 16.4k]
|
374 | 185 | if (key_len == JsonbKeyValue::sMaxKeyLen) Branch (374:17): [True: 185, False: 0]
|
375 | 185 | err_ = JsonbErrType::E_INVALID_KEY_LENGTH; |
376 | 0 | else |
377 | 0 | err_ = JsonbErrType::E_INVALID_KEY_STRING; |
378 | 185 | return false; |
379 | 185 | } |
380 | | |
381 | 16.4k | skipChar(in); // discard '"' |
382 | | |
383 | 16.4k | int key_id = -1; |
384 | 16.4k | if (handler) { Branch (384:13): [True: 0, False: 16.4k]
|
385 | 0 | key_id = handler(key, key_len); |
386 | 0 | } |
387 | | |
388 | 16.4k | if (key_id < 0) { Branch (388:13): [True: 16.4k, False: 0]
|
389 | 16.4k | writer_.writeKey(key, key_len); |
390 | 16.4k | } else { |
391 | 0 | writer_.writeKey(key_id); |
392 | 0 | } |
393 | | |
394 | 16.4k | trim(in); |
395 | | |
396 | 16.4k | if (nextChar(in) != ':') { Branch (396:13): [True: 0, False: 16.4k]
|
397 | 0 | err_ = JsonbErrType::E_INVALID_OBJ; |
398 | 0 | return false; |
399 | 0 | } |
400 | | |
401 | 16.4k | trim(in); |
402 | 16.4k | if (!in.good()) { Branch (402:13): [True: 0, False: 16.4k]
|
403 | 0 | err_ = JsonbErrType::E_INVALID_OBJ; |
404 | 0 | return false; |
405 | 0 | } |
406 | | |
407 | 16.4k | return true; |
408 | 16.4k | } |
409 | | |
410 | | // parse a value |
411 | 523k | bool parseValue(std::istream& in, hDictInsert handler) { |
412 | 523k | bool res = false; |
413 | | |
414 | 523k | switch (in.peek()) { |
415 | 0 | case 'N': Branch (415:9): [True: 0, False: 523k]
|
416 | 11.3k | case 'n': { Branch (416:9): [True: 11.3k, False: 512k]
|
417 | 11.3k | skipChar(in); |
418 | 11.3k | res = parseNull(in); |
419 | 11.3k | break; |
420 | 0 | } |
421 | 0 | case 'T': Branch (421:9): [True: 0, False: 523k]
|
422 | 119 | case 't': { Branch (422:9): [True: 119, False: 523k]
|
423 | 119 | skipChar(in); |
424 | 119 | res = parseTrue(in); |
425 | 119 | break; |
426 | 0 | } |
427 | 0 | case 'F': Branch (427:9): [True: 0, False: 523k]
|
428 | 120 | case 'f': { Branch (428:9): [True: 120, False: 523k]
|
429 | 120 | skipChar(in); |
430 | 120 | res = parseFalse(in); |
431 | 120 | break; |
432 | 0 | } |
433 | 276k | case '"': { Branch (433:9): [True: 276k, False: 247k]
|
434 | 276k | skipChar(in); |
435 | 276k | res = parseString(in); |
436 | 276k | break; |
437 | 0 | } |
438 | 14.7k | case '{': { Branch (438:9): [True: 14.7k, False: 509k]
|
439 | 14.7k | skipChar(in); |
440 | 14.7k | ++nesting_lvl_; |
441 | 14.7k | if (nesting_lvl_ >= MaxNestingLevel) { Branch (441:17): [True: 89, False: 14.6k]
|
442 | 89 | err_ = JsonbErrType::E_NESTING_LVL_OVERFLOW; |
443 | 89 | return false; |
444 | 89 | } |
445 | 14.6k | res = parseObject(in, handler); |
446 | 14.6k | if (res) { Branch (446:17): [True: 5.46k, False: 9.22k]
|
447 | 5.46k | --nesting_lvl_; |
448 | 5.46k | } |
449 | 14.6k | break; |
450 | 14.7k | } |
451 | 30.4k | case '[': { Branch (451:9): [True: 30.4k, False: 493k]
|
452 | 30.4k | skipChar(in); |
453 | 30.4k | ++nesting_lvl_; |
454 | 30.4k | if (nesting_lvl_ >= MaxNestingLevel) { Branch (454:17): [True: 0, False: 30.4k]
|
455 | 0 | err_ = JsonbErrType::E_NESTING_LVL_OVERFLOW; |
456 | 0 | return false; |
457 | 0 | } |
458 | 30.4k | res = parseArray(in, handler); |
459 | 30.4k | if (res) { Branch (459:17): [True: 30.1k, False: 248]
|
460 | 30.1k | --nesting_lvl_; |
461 | 30.1k | } |
462 | 30.4k | break; |
463 | 30.4k | } |
464 | 190k | default: { Branch (464:9): [True: 190k, False: 333k]
|
465 | 190k | res = parseNumber(in); |
466 | 190k | break; |
467 | 30.4k | } |
468 | 523k | } |
469 | | |
470 | 523k | return res; |
471 | 523k | } |
472 | | |
473 | | // parse NULL value |
474 | 11.4k | bool parseNull(std::istream& in) { |
475 | 11.4k | if (tolower(nextChar(in)) == 'u' && tolower(nextChar(in)) == 'l' && Branch (475:13): [True: 11.4k, False: 0]
Branch (475:45): [True: 11.4k, False: 0]
|
476 | 11.4k | tolower(nextChar(in)) == 'l') { Branch (476:13): [True: 11.4k, False: 0]
|
477 | 11.4k | writer_.writeNull(); |
478 | 11.4k | return true; |
479 | 11.4k | } |
480 | | |
481 | 0 | err_ = JsonbErrType::E_INVALID_SCALAR_VALUE; |
482 | 0 | return false; |
483 | 11.4k | } |
484 | | |
485 | | // parse TRUE value |
486 | 182 | bool parseTrue(std::istream& in) { |
487 | 182 | if (tolower(nextChar(in)) == 'r' && tolower(nextChar(in)) == 'u' && Branch (487:13): [True: 182, False: 0]
Branch (487:45): [True: 182, False: 0]
|
488 | 182 | tolower(nextChar(in)) == 'e') { Branch (488:13): [True: 182, False: 0]
|
489 | 182 | writer_.writeBool(true); |
490 | 182 | return true; |
491 | 182 | } |
492 | | |
493 | 0 | err_ = JsonbErrType::E_INVALID_SCALAR_VALUE; |
494 | 0 | return false; |
495 | 182 | } |
496 | | |
497 | | // parse FALSE value |
498 | 183 | bool parseFalse(std::istream& in) { |
499 | 183 | if (tolower(nextChar(in)) == 'a' && tolower(nextChar(in)) == 'l' && Branch (499:13): [True: 183, False: 0]
Branch (499:45): [True: 183, False: 0]
|
500 | 183 | tolower(nextChar(in)) == 's' && tolower(nextChar(in)) == 'e') { Branch (500:13): [True: 183, False: 0]
Branch (500:45): [True: 183, False: 0]
|
501 | 183 | writer_.writeBool(false); |
502 | 183 | return true; |
503 | 183 | } |
504 | | |
505 | 0 | err_ = JsonbErrType::E_INVALID_SCALAR_VALUE; |
506 | 0 | return false; |
507 | 183 | } |
508 | | |
509 | | /* |
510 | | This is a helper function to parse the hex value. hex_num means the |
511 | | number of digits needed to be parsed. If less than zero, then it will |
512 | | consider all the characters between current and any character in JsonDelim. |
513 | | */ |
514 | 0 | unsigned parseHexHelper(std::istream& in, uint64_t& val, unsigned hex_num = 17) { |
515 | | // We can't read more than 17 digits, so when read 17 digits, it's overflow |
516 | 0 | val = 0; |
517 | 0 | unsigned num_digits = 0; |
518 | 0 | char ch = tolower(in.peek()); |
519 | 0 | while (in.good() && !strchr(kJsonDelim, ch) && num_digits != hex_num) { Branch (519:16): [True: 0, False: 0]
Branch (519:29): [True: 0, False: 0]
Branch (519:56): [True: 0, False: 0]
|
520 | 0 | if (ch >= '0' && ch <= '9') { Branch (520:17): [True: 0, False: 0]
Branch (520:30): [True: 0, False: 0]
|
521 | 0 | val = (val << 4) + (ch - '0'); |
522 | 0 | } else if (ch >= 'a' && ch <= 'f') { Branch (522:24): [True: 0, False: 0]
Branch (522:37): [True: 0, False: 0]
|
523 | 0 | val = (val << 4) + (ch - 'a' + 10); |
524 | 0 | } else { |
525 | | // unrecognized hex digit |
526 | 0 | return 0; |
527 | 0 | } |
528 | 0 | skipChar(in); |
529 | 0 | ch = tolower(in.peek()); |
530 | 0 | ++num_digits; |
531 | 0 | } |
532 | 0 | return num_digits; |
533 | 0 | } |
534 | | |
535 | | // parse HEX value |
536 | 0 | bool parseHex4(std::istream& in, unsigned& h) { |
537 | 0 | uint64_t val; |
538 | 0 | if (4 == parseHexHelper(in, val, 4)) { Branch (538:13): [True: 0, False: 0]
|
539 | 0 | h = (unsigned)val; |
540 | 0 | return true; |
541 | 0 | } |
542 | 0 | return false; |
543 | 0 | } |
544 | | |
545 | | /* |
546 | | parse Escape char. |
547 | | */ |
548 | 26.2k | bool parseEscape(std::istream& in, char* out, int& len) { |
549 | | /* |
550 | | This is extracted from cJSON implementation. |
551 | | This is about the mask of the first byte in UTF-8. |
552 | | The mask is defined in: |
553 | | http://en.wikipedia.org/wiki/UTF-8#Description |
554 | | */ |
555 | 26.2k | const unsigned char firstByteMark[6] = {0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC}; |
556 | 26.2k | if (!in.good()) { Branch (556:13): [True: 0, False: 26.2k]
|
557 | 0 | return false; |
558 | 0 | } |
559 | 26.2k | char c = nextChar(in); |
560 | 26.2k | len = 1; |
561 | 26.2k | switch (c) { |
562 | | // \" \\ \/ \b \f \n \r \t |
563 | 9.63k | case '"': Branch (563:9): [True: 9.63k, False: 16.5k]
|
564 | 9.63k | *out = '"'; |
565 | 9.63k | return true; |
566 | 13.5k | case '\\': Branch (566:9): [True: 13.5k, False: 12.6k]
|
567 | 13.5k | *out = '\\'; |
568 | 13.5k | return true; |
569 | 0 | case '/': Branch (569:9): [True: 0, False: 26.2k]
|
570 | 0 | *out = '/'; |
571 | 0 | return true; |
572 | 611 | case 'b': Branch (572:9): [True: 611, False: 25.5k]
|
573 | 611 | *out = '\b'; |
574 | 611 | return true; |
575 | 608 | case 'f': Branch (575:9): [True: 608, False: 25.5k]
|
576 | 608 | *out = '\f'; |
577 | 608 | return true; |
578 | 607 | case 'n': Branch (578:9): [True: 607, False: 25.5k]
|
579 | 607 | *out = '\n'; |
580 | 607 | return true; |
581 | 608 | case 'r': Branch (581:9): [True: 608, False: 25.5k]
|
582 | 608 | *out = '\r'; |
583 | 608 | return true; |
584 | 616 | case 't': Branch (584:9): [True: 616, False: 25.5k]
|
585 | 616 | *out = '\t'; |
586 | 616 | return true; |
587 | 0 | case 'u': { Branch (587:9): [True: 0, False: 26.2k]
|
588 | 0 | unsigned uc; |
589 | 0 | if (!parseHex4(in, uc)) { Branch (589:17): [True: 0, False: 0]
|
590 | 0 | return false; |
591 | 0 | } |
592 | | /* |
593 | | For DC00 to DFFF, it should be low surrogates for UTF16. |
594 | | So if it display in the high bits, it's invalid. |
595 | | */ |
596 | 0 | if (uc >= 0xDC00 && uc <= 0xDFFF) { Branch (596:17): [True: 0, False: 0]
Branch (596:33): [True: 0, False: 0]
|
597 | 0 | return false; |
598 | 0 | } |
599 | | |
600 | | /* |
601 | | For D800 to DBFF, it's the high surrogates for UTF16. |
602 | | So it's utf-16, there must be another one between 0xDC00 |
603 | | and 0xDFFF. |
604 | | */ |
605 | 0 | if (uc >= 0xD800 && uc <= 0xDBFF) { Branch (605:17): [True: 0, False: 0]
Branch (605:33): [True: 0, False: 0]
|
606 | 0 | unsigned uc2; |
607 | |
|
608 | 0 | if (!in.good()) { Branch (608:21): [True: 0, False: 0]
|
609 | 0 | return false; |
610 | 0 | } |
611 | 0 | c = nextChar(in); |
612 | 0 | if (c != '\\') { Branch (612:21): [True: 0, False: 0]
|
613 | 0 | return false; |
614 | 0 | } |
615 | | |
616 | 0 | if (!in.good()) { Branch (616:21): [True: 0, False: 0]
|
617 | 0 | return false; |
618 | 0 | } |
619 | 0 | c = nextChar(in); |
620 | 0 | if (c != 'u') { Branch (620:21): [True: 0, False: 0]
|
621 | 0 | return false; |
622 | 0 | } |
623 | | |
624 | 0 | if (!parseHex4(in, uc2)) { Branch (624:21): [True: 0, False: 0]
|
625 | 0 | return false; |
626 | 0 | } |
627 | | /* |
628 | | Now we need the low surrogates for UTF16. It should be |
629 | | within 0xDC00 and 0xDFFF. |
630 | | */ |
631 | 0 | if (uc2 < 0xDC00 || uc2 > 0xDFFF) return false; Branch (631:21): [True: 0, False: 0]
Branch (631:37): [True: 0, False: 0]
|
632 | | /* |
633 | | For the character that not in the Basic Multilingual Plan, |
634 | | it's represented as twelve-character, encoding the UTF-16 |
635 | | surrogate pair. |
636 | | UTF16 is between 0x10000 and 0x10FFFF. The high surrogate |
637 | | present the high bits and the low surrogate present the |
638 | | lower 10 bits. |
639 | | For detailed explanation, please refer to: |
640 | | http://www.ecma-international.org/publications/files/ECMA-ST/ECMA-404.pdf |
641 | | Then it will be converted to UTF8. |
642 | | */ |
643 | 0 | uc = 0x10000 + (((uc & 0x3FF) << 10) | (uc2 & 0x3FF)); |
644 | 0 | } |
645 | | |
646 | | /* |
647 | | Get the length of the unicode. |
648 | | Please refer to http://en.wikipedia.org/wiki/UTF-8#Description. |
649 | | */ |
650 | 0 | if (uc < 0x80) Branch (650:17): [True: 0, False: 0]
|
651 | 0 | len = 1; |
652 | 0 | else if (uc < 0x800) Branch (652:22): [True: 0, False: 0]
|
653 | 0 | len = 2; |
654 | 0 | else if (uc < 0x10000) Branch (654:22): [True: 0, False: 0]
|
655 | 0 | len = 3; |
656 | 0 | else |
657 | 0 | len = 4; |
658 | 0 | out += len; |
659 | | /* |
660 | | Encode it. |
661 | | Please refer to http://en.wikipedia.org/wiki/UTF-8#Description. |
662 | | This part of code has a reference to cJSON. |
663 | | */ |
664 | 0 | switch (len) { Branch (664:21): [True: 0, False: 0]
|
665 | 0 | case 4: Branch (665:13): [True: 0, False: 0]
|
666 | 0 | *--out = ((uc | 0x80) & 0xBF); |
667 | 0 | uc >>= 6; |
668 | 0 | [[fallthrough]]; |
669 | 0 | case 3: Branch (669:13): [True: 0, False: 0]
|
670 | 0 | *--out = ((uc | 0x80) & 0xBF); |
671 | 0 | uc >>= 6; |
672 | 0 | [[fallthrough]]; |
673 | 0 | case 2: Branch (673:13): [True: 0, False: 0]
|
674 | 0 | *--out = ((uc | 0x80) & 0xBF); |
675 | 0 | uc >>= 6; |
676 | 0 | [[fallthrough]]; |
677 | 0 | case 1: Branch (677:13): [True: 0, False: 0]
|
678 | | // Mask the first byte according to the standard. |
679 | 0 | *--out = (uc | firstByteMark[len - 1]); |
680 | 0 | } |
681 | 0 | return true; |
682 | 0 | break; |
683 | 0 | } |
684 | 0 | default: Branch (684:9): [True: 0, False: 26.2k]
|
685 | 0 | return false; |
686 | 0 | break; |
687 | 26.2k | } |
688 | 26.2k | } |
689 | | |
690 | | // parse a string |
691 | 276k | bool parseString(std::istream& in) { |
692 | 276k | const int BUFFER_LEN = 4096; |
693 | 276k | if (!writer_.writeStartString()) { Branch (693:13): [True: 0, False: 276k]
|
694 | 0 | err_ = JsonbErrType::E_OUTPUT_FAIL; |
695 | 0 | return false; |
696 | 0 | } |
697 | | |
698 | | // write 4KB at a time |
699 | 276k | char buffer[BUFFER_LEN]; |
700 | 276k | int nread = 0; |
701 | 7.15M | while (in.good()) { Branch (701:16): [True: 7.15M, False: 0]
|
702 | 7.15M | char ch = nextChar(in); |
703 | 7.15M | if (ch == '"') { Branch (703:17): [True: 276k, False: 6.87M]
|
704 | | // write all remaining bytes in the buffer |
705 | 276k | if (nread > 0) { Branch (705:21): [True: 266k, False: 10.6k]
|
706 | 266k | if (!writer_.writeString(buffer, nread)) { Branch (706:25): [True: 0, False: 266k]
|
707 | 0 | err_ = JsonbErrType::E_OUTPUT_FAIL; |
708 | 0 | return false; |
709 | 0 | } |
710 | 266k | } |
711 | | // end writing string |
712 | 276k | if (!writer_.writeEndString()) { Branch (712:21): [True: 0, False: 276k]
|
713 | 0 | err_ = JsonbErrType::E_OUTPUT_FAIL; |
714 | 0 | return false; |
715 | 0 | } |
716 | 276k | return true; |
717 | 6.87M | } else if (ch == '\\') { Branch (717:24): [True: 22.4k, False: 6.85M]
|
718 | | // this is a escape char |
719 | 22.4k | char escape_buffer[5]; // buffer for escape |
720 | 22.4k | int len; |
721 | 22.4k | if (!parseEscape(in, escape_buffer, len)) { Branch (721:21): [True: 0, False: 22.4k]
|
722 | 0 | err_ = JsonbErrType::E_INVALID_STR; |
723 | 0 | return false; |
724 | 0 | } |
725 | | |
726 | | // Write each char to the buffer |
727 | 44.8k | for (int i = 0; i != len; ++i) { Branch (727:33): [True: 22.4k, False: 22.4k]
|
728 | 22.4k | buffer[nread++] = escape_buffer[i]; |
729 | 22.4k | if (nread == BUFFER_LEN) { Branch (729:25): [True: 0, False: 22.4k]
|
730 | 0 | if (!writer_.writeString(buffer, nread)) { Branch (730:29): [True: 0, False: 0]
|
731 | 0 | err_ = JsonbErrType::E_OUTPUT_FAIL; |
732 | 0 | return false; |
733 | 0 | } |
734 | 0 | nread = 0; |
735 | 0 | } |
736 | 22.4k | } |
737 | 6.85M | } else { |
738 | | // just a char |
739 | 6.85M | buffer[nread++] = ch; |
740 | 6.85M | if (nread == BUFFER_LEN) { Branch (740:21): [True: 9, False: 6.85M]
|
741 | | // flush buffer |
742 | 9 | if (!writer_.writeString(buffer, nread)) { Branch (742:25): [True: 0, False: 9]
|
743 | 0 | err_ = JsonbErrType::E_OUTPUT_FAIL; |
744 | 0 | return false; |
745 | 0 | } |
746 | 9 | nread = 0; |
747 | 9 | } |
748 | 6.85M | } |
749 | 7.15M | } |
750 | | |
751 | 0 | err_ = JsonbErrType::E_INVALID_STR; |
752 | 0 | return false; |
753 | 276k | } |
754 | | |
755 | | // parse a number |
756 | | // Number format can be hex, octal, or decimal (including float). |
757 | | // Only decimal can have (+/-) sign prefix. |
758 | 191k | bool parseNumber(std::istream& in) { |
759 | 191k | bool ret = false; |
760 | 191k | switch (in.peek()) { |
761 | 22.8k | case '0': { Branch (761:9): [True: 22.8k, False: 168k]
|
762 | 22.8k | skipChar(in); |
763 | | |
764 | 22.8k | if (in.peek() == 'x' || in.peek() == 'X') { Branch (764:17): [True: 0, False: 22.8k]
Branch (764:37): [True: 0, False: 22.8k]
|
765 | 0 | skipChar(in); |
766 | 0 | ret = parseHex(in); |
767 | 22.8k | } else if (in.peek() == '.') { Branch (767:24): [True: 0, False: 22.8k]
|
768 | 0 | skipChar(in); // remove '.' |
769 | 0 | num_buf_[0] = '.'; |
770 | 0 | ret = parseDouble(in, num_buf_ + 1); |
771 | 22.8k | } else { |
772 | 22.8k | ret = parseOctal(in); |
773 | 22.8k | } |
774 | | |
775 | 22.8k | break; |
776 | 0 | } |
777 | 391 | case '-': { Branch (777:9): [True: 391, False: 190k]
|
778 | 391 | skipChar(in); |
779 | 391 | ret = parseDecimal(in, true); |
780 | 391 | break; |
781 | 0 | } |
782 | 0 | case '+': Branch (782:9): [True: 0, False: 191k]
|
783 | 0 | skipChar(in); |
784 | | // fall through |
785 | 167k | default: Branch (785:9): [True: 167k, False: 23.2k]
|
786 | 167k | ret = parseDecimal(in); |
787 | 167k | break; |
788 | 191k | } |
789 | | |
790 | 191k | return ret; |
791 | 191k | } |
792 | | |
793 | | // parse a number in hex format |
794 | 0 | bool parseHex(std::istream& in) { |
795 | 0 | uint64_t val = 0; |
796 | 0 | int num_digits; |
797 | 0 | if (0 == (num_digits = parseHexHelper(in, val))) { Branch (797:13): [True: 0, False: 0]
|
798 | 0 | err_ = JsonbErrType::E_INVALID_HEX; |
799 | 0 | return false; |
800 | 0 | } |
801 | | |
802 | 0 | int size = 0; |
803 | 0 | if (num_digits <= 2) { Branch (803:13): [True: 0, False: 0]
|
804 | 0 | size = writer_.writeInt8((int8_t)val); |
805 | 0 | } else if (num_digits <= 4) { Branch (805:20): [True: 0, False: 0]
|
806 | 0 | size = writer_.writeInt16((int16_t)val); |
807 | 0 | } else if (num_digits <= 8) { Branch (807:20): [True: 0, False: 0]
|
808 | 0 | size = writer_.writeInt32((int32_t)val); |
809 | 0 | } else if (num_digits <= 16) { Branch (809:20): [True: 0, False: 0]
|
810 | 0 | size = writer_.writeInt64(val); |
811 | 0 | } else { |
812 | 0 | err_ = JsonbErrType::E_HEX_OVERFLOW; |
813 | 0 | return false; |
814 | 0 | } |
815 | | |
816 | 0 | if (size == 0) { Branch (816:13): [True: 0, False: 0]
|
817 | 0 | err_ = JsonbErrType::E_OUTPUT_FAIL; |
818 | 0 | return false; |
819 | 0 | } |
820 | | |
821 | 0 | return true; |
822 | 0 | } |
823 | | |
824 | | // parse a number in octal format |
825 | 22.8k | bool parseOctal(std::istream& in) { |
826 | 22.8k | int64_t val = 0; |
827 | 22.8k | char ch = in.peek(); |
828 | 22.8k | while (in.good() && !strchr(kJsonDelim, ch)) { Branch (828:16): [True: 22.8k, False: 42]
Branch (828:29): [True: 0, False: 22.8k]
|
829 | 0 | if (ch >= '0' && ch <= '7') { Branch (829:17): [True: 0, False: 0]
Branch (829:30): [True: 0, False: 0]
|
830 | 0 | val = val * 8 + (ch - '0'); |
831 | 0 | } else { |
832 | 0 | err_ = JsonbErrType::E_INVALID_OCTAL; |
833 | 0 | return false; |
834 | 0 | } |
835 | | |
836 | | // check if the number overflows |
837 | 0 | if (val < 0) { Branch (837:17): [True: 0, False: 0]
|
838 | 0 | err_ = JsonbErrType::E_OCTAL_OVERFLOW; |
839 | 0 | return false; |
840 | 0 | } |
841 | | |
842 | 0 | skipChar(in); |
843 | 0 | ch = in.peek(); |
844 | 0 | } |
845 | | |
846 | 22.8k | int size = 0; |
847 | 22.8k | if (val <= std::numeric_limits<int8_t>::max()) { Branch (847:13): [True: 22.8k, False: 0]
|
848 | 22.8k | size = writer_.writeInt8((int8_t)val); |
849 | 22.8k | } else if (val <= std::numeric_limits<int16_t>::max()) { Branch (849:20): [True: 0, False: 0]
|
850 | 0 | size = writer_.writeInt16((int16_t)val); |
851 | 0 | } else if (val <= std::numeric_limits<int32_t>::max()) { Branch (851:20): [True: 0, False: 0]
|
852 | 0 | size = writer_.writeInt32((int32_t)val); |
853 | 0 | } else { // val <= INT64_MAX |
854 | 0 | size = writer_.writeInt64(val); |
855 | 0 | } |
856 | | |
857 | 22.8k | if (size == 0) { Branch (857:13): [True: 0, False: 22.8k]
|
858 | 0 | err_ = JsonbErrType::E_OUTPUT_FAIL; |
859 | 0 | return false; |
860 | 0 | } |
861 | | |
862 | 22.8k | return true; |
863 | 22.8k | } |
864 | | |
865 | | // parse a number in decimal (including float) |
866 | 168k | bool parseDecimal(std::istream& in, bool neg = false) { |
867 | 168k | char ch = 0; |
868 | 168k | while (in.good() && (ch = in.peek()) == '0') skipChar(in); Branch (868:16): [True: 168k, False: 11]
Branch (868:29): [True: 158, False: 168k]
|
869 | | |
870 | 168k | char* pbuf = num_buf_; |
871 | 168k | if (neg) *(pbuf++) = '-'; Branch (871:13): [True: 391, False: 167k]
|
872 | | |
873 | 168k | char* save_pos = pbuf; |
874 | 1.18M | while (in.good() && !strchr(kJsonDelim, ch)) { Branch (874:16): [True: 1.18M, False: 322]
Branch (874:29): [True: 1.06M, False: 119k]
|
875 | 1.06M | *(pbuf++) = ch; |
876 | 1.06M | if (pbuf == end_buf_) { Branch (876:17): [True: 0, False: 1.06M]
|
877 | 0 | err_ = JsonbErrType::E_DECIMAL_OVERFLOW; |
878 | 0 | return false; |
879 | 0 | } |
880 | | |
881 | 1.06M | if (ch == '.') { Branch (881:17): [True: 47.8k, False: 1.01M]
|
882 | 47.8k | skipChar(in); // remove '.' |
883 | 47.8k | return parseDouble(in, pbuf); |
884 | 1.01M | } else if (ch == 'E' || ch == 'e') { Branch (884:24): [True: 0, False: 1.01M]
Branch (884:37): [True: 438, False: 1.01M]
|
885 | 438 | skipChar(in); // remove 'E' |
886 | 438 | return parseExponent(in, pbuf); |
887 | 1.01M | } else if (ch < '0' || ch > '9') { Branch (887:24): [True: 22, False: 1.01M]
Branch (887:36): [True: 74, False: 1.01M]
|
888 | 96 | err_ = JsonbErrType::E_INVALID_DECIMAL; |
889 | 96 | return false; |
890 | 96 | } |
891 | | |
892 | 1.01M | skipChar(in); |
893 | 1.01M | ch = in.peek(); |
894 | 1.01M | } |
895 | 119k | if (save_pos == pbuf) { Branch (895:13): [True: 169, False: 119k]
|
896 | 169 | err_ = JsonbErrType::E_INVALID_DECIMAL; // empty input |
897 | 169 | return false; |
898 | 169 | } |
899 | | |
900 | 119k | *pbuf = 0; // set null-terminator |
901 | 119k | StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS; |
902 | 119k | int128_t val = |
903 | 119k | StringParser::string_to_int<int128_t>(num_buf_, pbuf - num_buf_, &parse_result); |
904 | 119k | if (parse_result != StringParser::PARSE_SUCCESS) { Branch (904:13): [True: 0, False: 119k]
|
905 | 0 | VLOG_ROW << "debug string_to_int error for " << num_buf_ << " val=" << val Line | Count | Source | 38 | 0 | #define VLOG_ROW VLOG(10) |
|
906 | 0 | << " parse_result=" << parse_result; |
907 | 0 | err_ = JsonbErrType::E_DECIMAL_OVERFLOW; |
908 | 0 | return false; |
909 | 0 | } |
910 | | |
911 | 119k | int size = 0; |
912 | 119k | if (val >= std::numeric_limits<int8_t>::min() && Branch (912:13): [True: 119k, False: 232]
|
913 | 119k | val <= std::numeric_limits<int8_t>::max()) { Branch (913:13): [True: 45.4k, False: 74.0k]
|
914 | 45.4k | size = writer_.writeInt8((int8_t)val); |
915 | 74.2k | } else if (val >= std::numeric_limits<int16_t>::min() && Branch (915:20): [True: 74.0k, False: 232]
|
916 | 74.2k | val <= std::numeric_limits<int16_t>::max()) { Branch (916:20): [True: 20.2k, False: 53.7k]
|
917 | 20.2k | size = writer_.writeInt16((int16_t)val); |
918 | 53.9k | } else if (val >= std::numeric_limits<int32_t>::min() && Branch (918:20): [True: 53.7k, False: 232]
|
919 | 53.9k | val <= std::numeric_limits<int32_t>::max()) { Branch (919:20): [True: 20.0k, False: 33.6k]
|
920 | 20.0k | size = writer_.writeInt32((int32_t)val); |
921 | 33.8k | } else if (val >= std::numeric_limits<int64_t>::min() && Branch (921:20): [True: 33.8k, False: 0]
|
922 | 33.8k | val <= std::numeric_limits<int64_t>::max()) { Branch (922:20): [True: 33.8k, False: 0]
|
923 | 33.8k | size = writer_.writeInt64((int64_t)val); |
924 | 33.8k | } else { // INT128 |
925 | 0 | size = writer_.writeInt128(val); |
926 | 0 | } |
927 | | |
928 | 119k | if (size == 0) { Branch (928:13): [True: 0, False: 119k]
|
929 | 0 | err_ = JsonbErrType::E_OUTPUT_FAIL; |
930 | 0 | return false; |
931 | 0 | } |
932 | | |
933 | 119k | return true; |
934 | 119k | } |
935 | | |
936 | | // parse IEEE745 double precision |
937 | 47.8k | bool parseDouble(std::istream& in, char* pbuf) { |
938 | 47.8k | char* save_pos = pbuf; |
939 | 47.8k | char ch = in.peek(); |
940 | 385k | while (in.good() && !strchr(kJsonDelim, ch)) { Branch (940:16): [True: 385k, False: 66]
Branch (940:29): [True: 338k, False: 47.0k]
|
941 | 338k | *(pbuf++) = ch; |
942 | 338k | if (pbuf == end_buf_) { Branch (942:17): [True: 0, False: 338k]
|
943 | 0 | err_ = JsonbErrType::E_DOUBLE_OVERFLOW; |
944 | 0 | return false; |
945 | 0 | } |
946 | | |
947 | 338k | if (ch == 'e' || ch == 'E') { Branch (947:17): [True: 788, False: 337k]
Branch (947:30): [True: 0, False: 337k]
|
948 | 788 | skipChar(in); // remove 'E' |
949 | 788 | return parseExponent(in, pbuf); |
950 | 337k | } else if (ch < '0' || ch > '9') { Branch (950:24): [True: 0, False: 337k]
Branch (950:36): [True: 11, False: 337k]
|
951 | 11 | err_ = JsonbErrType::E_INVALID_DECIMAL; |
952 | 11 | return false; |
953 | 11 | } |
954 | | |
955 | 337k | skipChar(in); |
956 | 337k | ch = in.peek(); |
957 | 337k | } |
958 | 47.0k | if (save_pos == pbuf) { Branch (958:13): [True: 0, False: 47.0k]
|
959 | 0 | err_ = JsonbErrType::E_INVALID_DECIMAL; // empty input |
960 | 0 | return false; |
961 | 0 | } |
962 | | |
963 | 47.0k | *pbuf = 0; // set null-terminator |
964 | 47.0k | return internConvertBufferToDouble(num_buf_, pbuf - num_buf_); |
965 | 47.0k | } |
966 | | |
967 | | // parse the exponent part of a double number |
968 | 1.22k | bool parseExponent(std::istream& in, char* pbuf) { |
969 | 1.22k | char ch = in.peek(); |
970 | 1.22k | if (in.good()) { Branch (970:13): [True: 1.22k, False: 0]
|
971 | 1.22k | if (ch == '+' || ch == '-') { Branch (971:17): [True: 798, False: 428]
Branch (971:30): [True: 428, False: 0]
|
972 | 1.22k | *(pbuf++) = ch; |
973 | 1.22k | if (pbuf == end_buf_) { Branch (973:21): [True: 0, False: 1.22k]
|
974 | 0 | err_ = JsonbErrType::E_DOUBLE_OVERFLOW; |
975 | 0 | return false; |
976 | 0 | } |
977 | 1.22k | skipChar(in); |
978 | 1.22k | ch = in.peek(); |
979 | 1.22k | } |
980 | 1.22k | } |
981 | | |
982 | 1.22k | char* save_pos = pbuf; |
983 | 4.90k | while (in.good() && !strchr(kJsonDelim, ch)) { Branch (983:16): [True: 4.87k, False: 32]
Branch (983:29): [True: 3.67k, False: 1.19k]
|
984 | 3.67k | *(pbuf++) = ch; |
985 | 3.67k | if (pbuf == end_buf_) { Branch (985:17): [True: 0, False: 3.67k]
|
986 | 0 | err_ = JsonbErrType::E_DOUBLE_OVERFLOW; |
987 | 0 | return false; |
988 | 0 | } |
989 | | |
990 | 3.67k | if (ch < '0' || ch > '9') { Branch (990:17): [True: 0, False: 3.67k]
Branch (990:29): [True: 0, False: 3.67k]
|
991 | 0 | err_ = JsonbErrType::E_INVALID_EXPONENT; |
992 | 0 | return false; |
993 | 0 | } |
994 | | |
995 | 3.67k | skipChar(in); |
996 | 3.67k | ch = in.peek(); |
997 | 3.67k | } |
998 | 1.22k | if (save_pos == pbuf) { Branch (998:13): [True: 0, False: 1.22k]
|
999 | 0 | err_ = JsonbErrType::E_INVALID_EXPONENT; // empty input |
1000 | 0 | return false; |
1001 | 0 | } |
1002 | | |
1003 | 1.22k | *pbuf = 0; // set null-terminator |
1004 | 1.22k | return internConvertBufferToDouble(num_buf_, pbuf - num_buf_); |
1005 | 1.22k | } |
1006 | | |
1007 | | // call system function to parse double to string |
1008 | 48.3k | bool internConvertBufferToDouble(char* num_buf_, int len) { |
1009 | 48.3k | StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS; |
1010 | 48.3k | double val = StringParser::string_to_float<double>(num_buf_, len, &parse_result); |
1011 | 48.3k | if (parse_result != StringParser::PARSE_SUCCESS) { Branch (1011:13): [True: 366, False: 47.9k]
|
1012 | 366 | VLOG_ROW << "debug string_to_float error for " << num_buf_ << " val=" << val Line | Count | Source | 38 | 0 | #define VLOG_ROW VLOG(10) |
|
1013 | 0 | << " parse_result=" << parse_result; |
1014 | 366 | err_ = JsonbErrType::E_DECIMAL_OVERFLOW; |
1015 | 366 | return false; |
1016 | 366 | } |
1017 | | |
1018 | 47.9k | if (writer_.writeDouble(val) == 0) { Branch (1018:13): [True: 0, False: 47.9k]
|
1019 | 0 | err_ = JsonbErrType::E_OUTPUT_FAIL; |
1020 | 0 | return false; |
1021 | 0 | } |
1022 | | |
1023 | 47.9k | return true; |
1024 | 47.9k | } |
1025 | | |
1026 | 1.24M | void trim(std::istream& in) { |
1027 | 1.34M | while (in.good() && strchr(kWhiteSpace, in.peek())) { Branch (1027:16): [True: 1.34M, False: 451]
Branch (1027:29): [True: 97.4k, False: 1.24M]
|
1028 | 97.4k | skipChar(in); |
1029 | 97.4k | } |
1030 | 1.24M | } |
1031 | | |
1032 | | /* |
1033 | | * Helper functions to keep track of characters read. |
1034 | | * Do not rely on std::istream's tellg() which may not be implemented. |
1035 | | */ |
1036 | | |
1037 | 7.85M | char nextChar(std::istream& in) { |
1038 | 7.85M | ++stream_pos_; |
1039 | 7.85M | return in.get(); |
1040 | 7.85M | } |
1041 | | |
1042 | 1.96M | void skipChar(std::istream& in) { |
1043 | 1.96M | ++stream_pos_; |
1044 | 1.96M | in.ignore(); |
1045 | 1.96M | } |
1046 | | |
1047 | | private: |
1048 | | JsonbWriterT<OS_TYPE> writer_; |
1049 | | uint32_t stream_pos_; |
1050 | | JsonbErrType err_; |
1051 | | char num_buf_[512]; // buffer to hold number string |
1052 | | const char* end_buf_ = num_buf_ + sizeof(num_buf_) - 1; |
1053 | | uint32_t nesting_lvl_ = 0; |
1054 | | }; |
1055 | | |
1056 | | typedef JsonbParserT<JsonbOutStream> JsonbParser; |
1057 | | |
1058 | | } // namespace doris |
1059 | | |
1060 | | #endif // JSONB_JSONBJSONPARSER_H |