/root/doris/be/src/exprs/json_functions.h
| Line | Count | Source | 
| 1 |  | // Licensed to the Apache Software Foundation (ASF) under one | 
| 2 |  | // or more contributor license agreements.  See the NOTICE file | 
| 3 |  | // distributed with this work for additional information | 
| 4 |  | // regarding copyright ownership.  The ASF licenses this file | 
| 5 |  | // to you under the Apache License, Version 2.0 (the | 
| 6 |  | // "License"); you may not use this file except in compliance | 
| 7 |  | // with the License.  You may obtain a copy of the License at | 
| 8 |  | // | 
| 9 |  | //   http://www.apache.org/licenses/LICENSE-2.0 | 
| 10 |  | // | 
| 11 |  | // Unless required by applicable law or agreed to in writing, | 
| 12 |  | // software distributed under the License is distributed on an | 
| 13 |  | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | 
| 14 |  | // KIND, either express or implied.  See the License for the | 
| 15 |  | // specific language governing permissions and limitations | 
| 16 |  | // under the License. | 
| 17 |  |  | 
| 18 |  | #pragma once | 
| 19 |  |  | 
| 20 |  | #include <fmt/format.h> | 
| 21 |  | #include <rapidjson/document.h> | 
| 22 |  | #include <simdjson.h> // IWYU pragma: keep | 
| 23 |  |  | 
| 24 |  | #include <sstream> | 
| 25 |  | #include <string> | 
| 26 |  | #include <utility> | 
| 27 |  | #include <vector> | 
| 28 |  |  | 
| 29 |  | #include "common/status.h" | 
| 30 |  |  | 
| 31 |  | namespace simdjson { | 
| 32 |  | namespace fallback { | 
| 33 |  | namespace ondemand { | 
| 34 |  | class object; | 
| 35 |  | class value; | 
| 36 |  | } // namespace ondemand | 
| 37 |  | } // namespace fallback | 
| 38 |  | } // namespace simdjson | 
| 39 |  |  | 
| 40 |  | namespace doris { | 
| 41 |  |  | 
| 42 |  | enum JsonFunctionType { | 
| 43 |  |     JSON_FUN_INT = 0, | 
| 44 |  |     JSON_FUN_DOUBLE, | 
| 45 |  |     JSON_FUN_STRING, | 
| 46 |  |  | 
| 47 |  |     JSON_FUN_UNKNOWN //The last | 
| 48 |  | }; | 
| 49 |  |  | 
| 50 |  | struct JsonPath { | 
| 51 |  |     std::string key; // key of a json object | 
| 52 |  |     int idx;         // array index of a json array, -1 means not set, -2 means * | 
| 53 |  |     bool is_valid;   // true if the path is successfully parsed | 
| 54 |  |  | 
| 55 |  |     JsonPath(const std::string& key_, int idx_, bool is_valid_) | 
| 56 | 0 |             : key(key_), idx(idx_), is_valid(is_valid_) {} | 
| 57 |  |  | 
| 58 |  |     JsonPath(std::string&& key_, int idx_, bool is_valid_) | 
| 59 | 22 |             : key(std::move(key_)), idx(idx_), is_valid(is_valid_) {} | 
| 60 |  |  | 
| 61 | 0 |     std::string to_string() const { | 
| 62 | 0 |         std::stringstream ss; | 
| 63 | 0 |         if (!is_valid) { | 
| 64 | 0 |             return "INVALID"; | 
| 65 | 0 |         } | 
| 66 | 0 |         if (!key.empty()) { | 
| 67 | 0 |             ss << key; | 
| 68 | 0 |         } | 
| 69 | 0 |         if (idx == -2) { | 
| 70 | 0 |             ss << "[*]"; | 
| 71 | 0 |         } else if (idx > -1) { | 
| 72 | 0 |             ss << "[" << idx << "]"; | 
| 73 | 0 |         } | 
| 74 | 0 |         return ss.str(); | 
| 75 | 0 |     } | 
| 76 |  |  | 
| 77 | 0 |     std::string debug_string() const { | 
| 78 | 0 |         return fmt::format("key:{}, idx:{}, valid:{}", key, idx, is_valid); | 
| 79 | 0 |     } | 
| 80 |  | }; | 
| 81 |  |  | 
| 82 |  | class JsonFunctions { | 
| 83 |  | public: | 
| 84 |  |     /** | 
| 85 |  |      * The `document` parameter must be has parsed. | 
| 86 |  |      * return Value Is Array object | 
| 87 |  |      * wrap_explicitly is set to true when the returned Array is wrapped actively. | 
| 88 |  |      */ | 
| 89 |  |     static rapidjson::Value* get_json_array_from_parsed_json( | 
| 90 |  |             const std::vector<JsonPath>& parsed_paths, rapidjson::Value* document, | 
| 91 |  |             rapidjson::Document::AllocatorType& mem_allocator, bool* wrap_explicitly); | 
| 92 |  |  | 
| 93 |  |     // this is only for test, it will parse the json path inside, | 
| 94 |  |     // so that we can easily pass a json path as string. | 
| 95 |  |     static rapidjson::Value* get_json_array_from_parsed_json( | 
| 96 |  |             const std::string& jsonpath, rapidjson::Value* document, | 
| 97 |  |             rapidjson::Document::AllocatorType& mem_allocator, bool* wrap_explicitly); | 
| 98 |  |  | 
| 99 |  |     static rapidjson::Value* get_json_object_from_parsed_json( | 
| 100 |  |             const std::vector<JsonPath>& parsed_paths, rapidjson::Value* document, | 
| 101 |  |             rapidjson::Document::AllocatorType& mem_allocator); | 
| 102 |  |  | 
| 103 |  |     static void parse_json_paths(const std::string& path_strings, | 
| 104 |  |                                  std::vector<JsonPath>* parsed_paths); | 
| 105 |  |     // extract_from_object extracts value from object according to the json path. | 
| 106 |  |     // Now, we do not support complete functions of json path. | 
| 107 |  |     // Eg. city[*].id is not supported in this function | 
| 108 |  |     static Status extract_from_object(simdjson::ondemand::object& obj, | 
| 109 |  |                                       const std::vector<JsonPath>& jsonpath, | 
| 110 |  |                                       simdjson::ondemand::value* value) noexcept; | 
| 111 |  |     // src:    {"a" : "b" {"c" : 1}, "e" : 123} | 
| 112 |  |     // dst:    {"a" : "b" {"d" : 1}} | 
| 113 |  |     // merged: {"a" : "b" : {"c" : 1, "d" : 1}, "e" : 123} | 
| 114 |  |     static void merge_objects(rapidjson::Value& dst_object, rapidjson::Value& src_object, | 
| 115 |  |                               rapidjson::Document::AllocatorType& allocator); | 
| 116 |  |  | 
| 117 |  |     static std::string print_json_value(const rapidjson::Value& value); | 
| 118 |  |  | 
| 119 |  |     static bool is_root_path(const std::vector<JsonPath>& json_path); | 
| 120 |  |  | 
| 121 |  | private: | 
| 122 |  |     static rapidjson::Value* match_value(const std::vector<JsonPath>& parsed_paths, | 
| 123 |  |                                          rapidjson::Value* document, | 
| 124 |  |                                          rapidjson::Document::AllocatorType& mem_allocator, | 
| 125 |  |                                          bool is_insert_null = false); | 
| 126 |  |     static void get_parsed_paths(const std::vector<std::string>& path_exprs, | 
| 127 |  |                                  std::vector<JsonPath>* parsed_paths); | 
| 128 |  | }; | 
| 129 |  | } // namespace doris |