/root/doris/be/src/util/string_util.h

Source
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.

#pragma once

#include <strings.h>

#include <algorithm>
#include <boost/token_functions.hpp>
#include <boost/tokenizer.hpp>
#include <cctype>
#include <cstddef>
#include <map>
#include <set>
#include <sstream>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <vector>

#include "common/exception.h"
#include "common/status.h"

namespace doris {

inline std::string to_lower(const std::string& input) {
    std::string output;
    output.resize(input.size());
    std::transform(input.begin(), input.end(), output.begin(),
                   [](unsigned char c) { return std::tolower(c); });
    return output;
}

inline std::string to_upper(const std::string& input) {
    std::string output;
    output.resize(input.size());
    std::transform(input.begin(), input.end(), output.begin(),
                   [](unsigned char c) { return std::toupper(c); });
    return output;
}

inline bool iequal(const std::string& lhs, const std::string& rhs) {
    if (lhs.size() != rhs.size()) {
        return false;
    }
    return to_lower(lhs) == to_lower(rhs);
}

inline bool starts_with(const std::string& value, const std::string& beginning) {
    return value.find(beginning) == 0;
}

inline bool ends_with(std::string const& value, std::string const& ending) {
    if (ending.size() > value.size()) {
        return false;
    }
    return std::equal(ending.rbegin(), ending.rend(), value.rbegin());
}

inline std::string_view trim(std::string_view sv) {
    size_t start = 0;
    size_t end = sv.size();

    while (start < end && std::isspace(static_cast<unsigned char>(sv[start]))) {
        ++start;
    }

    while (end > start && std::isspace(static_cast<unsigned char>(sv[end - 1]))) {
        --end;
    }

    return sv.substr(start, end - start);
}

inline std::vector<std::string> split(const std::string& s, const std::string& delim) {
    std::vector<std::string> out;
    size_t pos {};

    for (size_t find = 0; (find = s.find(delim, pos)) != std::string::npos;
         pos = find + delim.size()) {
        out.emplace_back(s.data() + pos, s.data() + find);
    }

    out.emplace_back(s.data() + pos, s.data() + s.size());
    return out;
}

template <typename T>
std::string join(const std::vector<T>& elems, const std::string& delim) {
    std::stringstream ss;
    for (size_t i = 0; i < elems.size(); ++i) {
        if (i != 0) {
            ss << delim.c_str();
        }
        ss << elems[i];
    }
    return ss.str();
}

struct StringCaseHasher {
public:
    std::size_t operator()(const std::string& value) const {
        std::string lower_value = to_lower(value);
        return std::hash<std::string>()(lower_value);
    }
};

struct StringCaseEqual {
public:
    bool operator()(const std::string& lhs, const std::string& rhs) const {
        if (lhs.size() != rhs.size()) {
            return false;
        }
        return strncasecmp(lhs.c_str(), rhs.c_str(), lhs.size()) == 0;
    }
};

struct StringCaseLess {
public:
    bool operator()(const std::string& lhs, const std::string& rhs) const {
        size_t common_size = std::min(lhs.size(), rhs.size());
        auto cmp = strncasecmp(lhs.c_str(), rhs.c_str(), common_size);
        if (cmp == 0) {
            return lhs.size() < rhs.size();
        }
        return cmp < 0;
    }
};

size_t hash_of_path(const std::string& identifier, const std::string& path);
Result<int> safe_stoi(const std::string& input, const std::string& name);
using StringCaseSet = std::set<std::string, StringCaseLess>;
using StringCaseUnorderedSet = std::unordered_set<std::string, StringCaseHasher, StringCaseEqual>;
template <class T>
using StringCaseMap = std::map<std::string, T, StringCaseLess>;
template <class T>
using StringCaseUnorderedMap =
        std::unordered_map<std::string, T, StringCaseHasher, StringCaseEqual>;

template <typename T>
auto get_json_token(T& path_string) {
    try {
        return boost::tokenizer<boost::escaped_list_separator<char>>(
                path_string, boost::escaped_list_separator<char>("\\", ".", "\""));
    } catch (const boost::escaped_list_error& err) {
        throw doris::Exception(ErrorCode::INVALID_JSON_PATH, "meet error {}", err.what());
    }
}

#ifdef USE_LIBCPP
template <>
auto get_json_token(std::string_view& path_string) = delete;
#endif

} // namespace doris

Line	Count	Source
1		// Licensed to the Apache Software Foundation (ASF) under one
2		// or more contributor license agreements. See the NOTICE file
3		// distributed with this work for additional information
4		// regarding copyright ownership. The ASF licenses this file
5		// to you under the Apache License, Version 2.0 (the
6		// "License"); you may not use this file except in compliance
7		// with the License. You may obtain a copy of the License at
8		//
9		// http://www.apache.org/licenses/LICENSE-2.0
10		//
11		// Unless required by applicable law or agreed to in writing,
12		// software distributed under the License is distributed on an
13		// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14		// KIND, either express or implied. See the License for the
15		// specific language governing permissions and limitations
16		// under the License.
17
18		#pragma once
19
20		#include <strings.h>
21
22		#include <algorithm>
23		#include <boost/token_functions.hpp>
24		#include <boost/tokenizer.hpp>
25		#include <cctype>
26		#include <cstddef>
27		#include <map>
28		#include <set>
29		#include <sstream>
30		#include <string>
31		#include <unordered_map>
32		#include <unordered_set>
33		#include <vector>
34
35		#include "common/exception.h"
36		#include "common/status.h"
37
38		namespace doris {
39
40	792k	inline std::string to_lower(const std::string& input) {
41	792k	std::string output;
42	792k	output.resize(input.size());
43	792k	std::transform(input.begin(), input.end(), output.begin(),
44	792k	[](unsigned char c) { return std::tolower(c); });
45	792k	return output;
46	792k	}
47
48	29	inline std::string to_upper(const std::string& input) {
49	29	std::string output;
50	29	output.resize(input.size());
51	29	std::transform(input.begin(), input.end(), output.begin(),
52	137	[](unsigned char c) { return std::toupper(c); });
53	29	return output;
54	29	}
55
56	195	inline bool iequal(const std::string& lhs, const std::string& rhs) {
57	195	if (lhs.size() != rhs.size()) {
58	139	return false;
59	139	}
60	56	return to_lower(lhs) == to_lower(rhs);
61	195	}
62
63	42	inline bool starts_with(const std::string& value, const std::string& beginning) {
64	42	return value.find(beginning) == 0;
65	42	}
66
67	8	inline bool ends_with(std::string const& value, std::string const& ending) {
68	8	if (ending.size() > value.size()) {
69	1	return false;
70	1	}
71	7	return std::equal(ending.rbegin(), ending.rend(), value.rbegin());
72	8	}
73
74	0	inline std::string_view trim(std::string_view sv) {
75	0	size_t start = 0;
76	0	size_t end = sv.size();
77
78	0	while (start < end && std::isspace(static_cast<unsigned char>(sv[start]))) {
79	0	++start;
80	0	}
81
82	0	while (end > start && std::isspace(static_cast<unsigned char>(sv[end - 1]))) {
83	0	--end;
84	0	}
85
86	0	return sv.substr(start, end - start);
87	0	}
88
89	45	inline std::vector<std::string> split(const std::string& s, const std::string& delim) {
90	45	std::vector<std::string> out;
91	45	size_t pos {};
92
93	6.54k	for (size_t find = 0; (find = s.find(delim, pos)) != std::string::npos;
94	6.50k	pos = find + delim.size()) {
95	6.50k	out.emplace_back(s.data() + pos, s.data() + find);
96	6.50k	}
97
98	45	out.emplace_back(s.data() + pos, s.data() + s.size());
99	45	return out;
100	45	}
101
102		template <typename T>
103	108	std::string join(const std::vector<T>& elems, const std::string& delim) {
104	108	std::stringstream ss;
105	191	for (size_t i = 0; i < elems.size(); ++i) {
106	83	if (i != 0) {
107	29	ss << delim.c_str();
108	29	}
109	83	ss << elems[i];
110	83	}
111	108	return ss.str();
112	108	}
113
114		struct StringCaseHasher {
115		public:
116	1.61k	std::size_t operator()(const std::string& value) const {
117	1.61k	std::string lower_value = to_lower(value);
118	1.61k	return std::hash<std::string>()(lower_value);
119	1.61k	}
120		};
121
122		struct StringCaseEqual {
123		public:
124	1.28k	bool operator()(const std::string& lhs, const std::string& rhs) const {
125	1.28k	if (lhs.size() != rhs.size()) {
126	0	return false;
127	0	}
128	1.28k	return strncasecmp(lhs.c_str(), rhs.c_str(), lhs.size()) == 0;
129	1.28k	}
130		};
131
132		struct StringCaseLess {
133		public:
134	38	bool operator()(const std::string& lhs, const std::string& rhs) const {
135	38	size_t common_size = std::min(lhs.size(), rhs.size());
136	38	auto cmp = strncasecmp(lhs.c_str(), rhs.c_str(), common_size);
137	38	if (cmp == 0) {
138	30	return lhs.size() < rhs.size();
139	30	}
140	8	return cmp < 0;
141	38	}
142		};
143
144		size_t hash_of_path(const std::string& identifier, const std::string& path);
145		Result<int> safe_stoi(const std::string& input, const std::string& name);
146		using StringCaseSet = std::set<std::string, StringCaseLess>;
147		using StringCaseUnorderedSet = std::unordered_set<std::string, StringCaseHasher, StringCaseEqual>;
148		template <class T>
149		using StringCaseMap = std::map<std::string, T, StringCaseLess>;
150		template <class T>
151		using StringCaseUnorderedMap =
152		std::unordered_map<std::string, T, StringCaseHasher, StringCaseEqual>;
153
154		template <typename T>
155	0	auto get_json_token(T& path_string) {
156	0	try {
157	0	return boost::tokenizer<boost::escaped_list_separator<char>>(
158	0	path_string, boost::escaped_list_separator<char>("\\", ".", "\""));
159	0	} catch (const boost::escaped_list_error& err) {
160	0	throw doris::Exception(ErrorCode::INVALID_JSON_PATH, "meet error {}", err.what());
161	0	}
162	0	}
163
164		#ifdef USE_LIBCPP
165		template <>
166		auto get_json_token(std::string_view& path_string) = delete;
167		#endif
168
169		} // namespace doris

Coverage Report

Created: 2025-10-28 13:31