/root/doris/be/src/util/string_util.h
| Line | Count | Source | 
| 1 |  | // Licensed to the Apache Software Foundation (ASF) under one | 
| 2 |  | // or more contributor license agreements.  See the NOTICE file | 
| 3 |  | // distributed with this work for additional information | 
| 4 |  | // regarding copyright ownership.  The ASF licenses this file | 
| 5 |  | // to you under the Apache License, Version 2.0 (the | 
| 6 |  | // "License"); you may not use this file except in compliance | 
| 7 |  | // with the License.  You may obtain a copy of the License at | 
| 8 |  | // | 
| 9 |  | //   http://www.apache.org/licenses/LICENSE-2.0 | 
| 10 |  | // | 
| 11 |  | // Unless required by applicable law or agreed to in writing, | 
| 12 |  | // software distributed under the License is distributed on an | 
| 13 |  | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | 
| 14 |  | // KIND, either express or implied.  See the License for the | 
| 15 |  | // specific language governing permissions and limitations | 
| 16 |  | // under the License. | 
| 17 |  |  | 
| 18 |  | #pragma once | 
| 19 |  |  | 
| 20 |  | #include <strings.h> | 
| 21 |  |  | 
| 22 |  | #include <algorithm> | 
| 23 |  | #include <boost/token_functions.hpp> | 
| 24 |  | #include <boost/tokenizer.hpp> | 
| 25 |  | #include <cctype> | 
| 26 |  | #include <cstddef> | 
| 27 |  | #include <map> | 
| 28 |  | #include <set> | 
| 29 |  | #include <sstream> | 
| 30 |  | #include <string> | 
| 31 |  | #include <unordered_map> | 
| 32 |  | #include <unordered_set> | 
| 33 |  | #include <vector> | 
| 34 |  |  | 
| 35 |  | #include "common/exception.h" | 
| 36 |  | #include "common/status.h" | 
| 37 |  |  | 
| 38 |  | namespace doris { | 
| 39 |  |  | 
| 40 | 792k | inline std::string to_lower(const std::string& input) { | 
| 41 | 792k |     std::string output; | 
| 42 | 792k |     output.resize(input.size()); | 
| 43 | 792k |     std::transform(input.begin(), input.end(), output.begin(), | 
| 44 | 792k |                    [](unsigned char c) { return std::tolower(c); }); | 
| 45 | 792k |     return output; | 
| 46 | 792k | } | 
| 47 |  |  | 
| 48 | 29 | inline std::string to_upper(const std::string& input) { | 
| 49 | 29 |     std::string output; | 
| 50 | 29 |     output.resize(input.size()); | 
| 51 | 29 |     std::transform(input.begin(), input.end(), output.begin(), | 
| 52 | 137 |                    [](unsigned char c) { return std::toupper(c); }); | 
| 53 | 29 |     return output; | 
| 54 | 29 | } | 
| 55 |  |  | 
| 56 | 195 | inline bool iequal(const std::string& lhs, const std::string& rhs) { | 
| 57 | 195 |     if (lhs.size() != rhs.size()) { | 
| 58 | 139 |         return false; | 
| 59 | 139 |     } | 
| 60 | 56 |     return to_lower(lhs) == to_lower(rhs); | 
| 61 | 195 | } | 
| 62 |  |  | 
| 63 | 42 | inline bool starts_with(const std::string& value, const std::string& beginning) { | 
| 64 | 42 |     return value.find(beginning) == 0; | 
| 65 | 42 | } | 
| 66 |  |  | 
| 67 | 8 | inline bool ends_with(std::string const& value, std::string const& ending) { | 
| 68 | 8 |     if (ending.size() > value.size()) { | 
| 69 | 1 |         return false; | 
| 70 | 1 |     } | 
| 71 | 7 |     return std::equal(ending.rbegin(), ending.rend(), value.rbegin()); | 
| 72 | 8 | } | 
| 73 |  |  | 
| 74 | 0 | inline std::string_view trim(std::string_view sv) { | 
| 75 | 0 |     size_t start = 0; | 
| 76 | 0 |     size_t end = sv.size(); | 
| 77 |  | 
 | 
| 78 | 0 |     while (start < end && std::isspace(static_cast<unsigned char>(sv[start]))) { | 
| 79 | 0 |         ++start; | 
| 80 | 0 |     } | 
| 81 |  | 
 | 
| 82 | 0 |     while (end > start && std::isspace(static_cast<unsigned char>(sv[end - 1]))) { | 
| 83 | 0 |         --end; | 
| 84 | 0 |     } | 
| 85 |  | 
 | 
| 86 | 0 |     return sv.substr(start, end - start); | 
| 87 | 0 | } | 
| 88 |  |  | 
| 89 | 45 | inline std::vector<std::string> split(const std::string& s, const std::string& delim) { | 
| 90 | 45 |     std::vector<std::string> out; | 
| 91 | 45 |     size_t pos {}; | 
| 92 |  |  | 
| 93 | 6.54k |     for (size_t find = 0; (find = s.find(delim, pos)) != std::string::npos; | 
| 94 | 6.50k |          pos = find + delim.size()) { | 
| 95 | 6.50k |         out.emplace_back(s.data() + pos, s.data() + find); | 
| 96 | 6.50k |     } | 
| 97 |  |  | 
| 98 | 45 |     out.emplace_back(s.data() + pos, s.data() + s.size()); | 
| 99 | 45 |     return out; | 
| 100 | 45 | } | 
| 101 |  |  | 
| 102 |  | template <typename T> | 
| 103 | 108 | std::string join(const std::vector<T>& elems, const std::string& delim) { | 
| 104 | 108 |     std::stringstream ss; | 
| 105 | 191 |     for (size_t i = 0; i < elems.size(); ++i) { | 
| 106 | 83 |         if (i != 0) { | 
| 107 | 29 |             ss << delim.c_str(); | 
| 108 | 29 |         } | 
| 109 | 83 |         ss << elems[i]; | 
| 110 | 83 |     } | 
| 111 | 108 |     return ss.str(); | 
| 112 | 108 | } | 
| 113 |  |  | 
| 114 |  | struct StringCaseHasher { | 
| 115 |  | public: | 
| 116 | 1.61k |     std::size_t operator()(const std::string& value) const { | 
| 117 | 1.61k |         std::string lower_value = to_lower(value); | 
| 118 | 1.61k |         return std::hash<std::string>()(lower_value); | 
| 119 | 1.61k |     } | 
| 120 |  | }; | 
| 121 |  |  | 
| 122 |  | struct StringCaseEqual { | 
| 123 |  | public: | 
| 124 | 1.28k |     bool operator()(const std::string& lhs, const std::string& rhs) const { | 
| 125 | 1.28k |         if (lhs.size() != rhs.size()) { | 
| 126 | 0 |             return false; | 
| 127 | 0 |         } | 
| 128 | 1.28k |         return strncasecmp(lhs.c_str(), rhs.c_str(), lhs.size()) == 0; | 
| 129 | 1.28k |     } | 
| 130 |  | }; | 
| 131 |  |  | 
| 132 |  | struct StringCaseLess { | 
| 133 |  | public: | 
| 134 | 38 |     bool operator()(const std::string& lhs, const std::string& rhs) const { | 
| 135 | 38 |         size_t common_size = std::min(lhs.size(), rhs.size()); | 
| 136 | 38 |         auto cmp = strncasecmp(lhs.c_str(), rhs.c_str(), common_size); | 
| 137 | 38 |         if (cmp == 0) { | 
| 138 | 30 |             return lhs.size() < rhs.size(); | 
| 139 | 30 |         } | 
| 140 | 8 |         return cmp < 0; | 
| 141 | 38 |     } | 
| 142 |  | }; | 
| 143 |  |  | 
| 144 |  | size_t hash_of_path(const std::string& identifier, const std::string& path); | 
| 145 |  | Result<int> safe_stoi(const std::string& input, const std::string& name); | 
| 146 |  | using StringCaseSet = std::set<std::string, StringCaseLess>; | 
| 147 |  | using StringCaseUnorderedSet = std::unordered_set<std::string, StringCaseHasher, StringCaseEqual>; | 
| 148 |  | template <class T> | 
| 149 |  | using StringCaseMap = std::map<std::string, T, StringCaseLess>; | 
| 150 |  | template <class T> | 
| 151 |  | using StringCaseUnorderedMap = | 
| 152 |  |         std::unordered_map<std::string, T, StringCaseHasher, StringCaseEqual>; | 
| 153 |  |  | 
| 154 |  | template <typename T> | 
| 155 | 0 | auto get_json_token(T& path_string) { | 
| 156 | 0 |     try { | 
| 157 | 0 |         return boost::tokenizer<boost::escaped_list_separator<char>>( | 
| 158 | 0 |                 path_string, boost::escaped_list_separator<char>("\\", ".", "\"")); | 
| 159 | 0 |     } catch (const boost::escaped_list_error& err) { | 
| 160 | 0 |         throw doris::Exception(ErrorCode::INVALID_JSON_PATH, "meet error {}", err.what()); | 
| 161 | 0 |     } | 
| 162 | 0 | } | 
| 163 |  |  | 
| 164 |  | #ifdef USE_LIBCPP | 
| 165 |  | template <> | 
| 166 |  | auto get_json_token(std::string_view& path_string) = delete; | 
| 167 |  | #endif | 
| 168 |  |  | 
| 169 |  | } // namespace doris |