/root/doris/be/src/util/string_util.h

Source (jump to first uncovered line)
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.

#pragma once

#include <strings.h>

#include <algorithm>
#include <boost/token_functions.hpp>
#include <boost/tokenizer.hpp>
#include <cctype>
#include <cstddef>
#include <map>
#include <set>
#include <sstream>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <vector>

#include "common/exception.h"
#include "common/status.h"

namespace doris {

inline std::string to_lower(const std::string& input) {
    std::string output;
    output.resize(input.size());
    std::transform(input.begin(), input.end(), output.begin(),
                   [](unsigned char c) { return std::tolower(c); });
    return output;
}

inline std::string to_upper(const std::string& input) {
    std::string output;
    output.resize(input.size());
    std::transform(input.begin(), input.end(), output.begin(),
                   [](unsigned char c) { return std::toupper(c); });
    return output;
}

inline bool iequal(const std::string& lhs, const std::string& rhs) {
    if (lhs.size() != rhs.size()) {
        return false;
    }
    return to_lower(lhs) == to_lower(rhs);
}

inline bool starts_with(const std::string& value, const std::string& beginning) {
    return value.find(beginning) == 0;
}

inline bool ends_with(std::string const& value, std::string const& ending) {
    if (ending.size() > value.size()) {
        return false;
    }
    return std::equal(ending.rbegin(), ending.rend(), value.rbegin());
}

inline std::vector<std::string> split(const std::string& s, const std::string& delim) {
    std::vector<std::string> out;
    size_t pos {};

    for (size_t find = 0; (find = s.find(delim, pos)) != std::string::npos;
         pos = find + delim.size()) {
        out.emplace_back(s.data() + pos, s.data() + find);
    }

    out.emplace_back(s.data() + pos, s.data() + s.size());
    return out;
}

template <typename T>
std::string join(const std::vector<T>& elems, const std::string& delim) {
    std::stringstream ss;
    for (size_t i = 0; i < elems.size(); ++i) {
        if (i != 0) {
            ss << delim.c_str();
        }
        ss << elems[i];
    }
    return ss.str();
}

struct StringCaseHasher {
public:
    std::size_t operator()(const std::string& value) const {
        std::string lower_value = to_lower(value);
        return std::hash<std::string>()(lower_value);
    }
};

struct StringCaseEqual {
public:
    bool operator()(const std::string& lhs, const std::string& rhs) const {
        if (lhs.size() != rhs.size()) {
            return false;
        }
        return strncasecmp(lhs.c_str(), rhs.c_str(), lhs.size()) == 0;
    }
};

struct StringCaseLess {
public:
    bool operator()(const std::string& lhs, const std::string& rhs) const {
        size_t common_size = std::min(lhs.size(), rhs.size());
        auto cmp = strncasecmp(lhs.c_str(), rhs.c_str(), common_size);
        if (cmp == 0) {
            return lhs.size() < rhs.size();
        }
        return cmp < 0;
    }
};

size_t hash_of_path(const std::string& identifier, const std::string& path);

using StringCaseSet = std::set<std::string, StringCaseLess>;
using StringCaseUnorderedSet = std::unordered_set<std::string, StringCaseHasher, StringCaseEqual>;
template <class T>
using StringCaseMap = std::map<std::string, T, StringCaseLess>;
template <class T>
using StringCaseUnorderedMap =
        std::unordered_map<std::string, T, StringCaseHasher, StringCaseEqual>;

template <typename T>
auto get_json_token(T& path_string) {
    try {
        return boost::tokenizer<boost::escaped_list_separator<char>>(
                path_string, boost::escaped_list_separator<char>("\\", ".", "\""));
    } catch (const boost::escaped_list_error& err) {
        throw doris::Exception(ErrorCode::INVALID_JSON_PATH, "meet error {}", err.what());
    }
}

#ifdef USE_LIBCPP
template <>
auto get_json_token(std::string_view& path_string) = delete;
#endif

} // namespace doris

Line	Count	Source (jump to first uncovered line)
1		// Licensed to the Apache Software Foundation (ASF) under one
2		// or more contributor license agreements. See the NOTICE file
3		// distributed with this work for additional information
4		// regarding copyright ownership. The ASF licenses this file
5		// to you under the Apache License, Version 2.0 (the
6		// "License"); you may not use this file except in compliance
7		// with the License. You may obtain a copy of the License at
8		//
9		// http://www.apache.org/licenses/LICENSE-2.0
10		//
11		// Unless required by applicable law or agreed to in writing,
12		// software distributed under the License is distributed on an
13		// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14		// KIND, either express or implied. See the License for the
15		// specific language governing permissions and limitations
16		// under the License.
17
18		#pragma once
19
20		#include <strings.h>
21
22		#include <algorithm>
23		#include <boost/token_functions.hpp>
24		#include <boost/tokenizer.hpp>
25		#include <cctype>
26		#include <cstddef>
27		#include <map>
28		#include <set>
29		#include <sstream>
30		#include <string>
31		#include <unordered_map>
32		#include <unordered_set>
33		#include <vector>
34
35		#include "common/exception.h"
36		#include "common/status.h"
37
38		namespace doris {
39
40	5.79k	inline std::string to_lower(const std::string& input) {
41	5.79k	std::string output;
42	5.79k	output.resize(input.size());
43	5.79k	std::transform(input.begin(), input.end(), output.begin(),
44	35.1k	[](unsigned char c) { return std::tolower(c); });
45	5.79k	return output;
46	5.79k	}
47
48	29	inline std::string to_upper(const std::string& input) {
49	29	std::string output;
50	29	output.resize(input.size());
51	29	std::transform(input.begin(), input.end(), output.begin(),
52	137	[](unsigned char c) { return std::toupper(c); });
53	29	return output;
54	29	}
55
56	49	inline bool iequal(const std::string& lhs, const std::string& rhs) {
57	49	if (lhs.size() != rhs.size()) {
58	22	return false;
59	22	}
60	27	return to_lower(lhs) == to_lower(rhs);
61	49	}
62
63	9	inline bool starts_with(const std::string& value, const std::string& beginning) {
64	9	return value.find(beginning) == 0;
65	9	}
66
67	8	inline bool ends_with(std::string const& value, std::string const& ending) {
68	8	if (ending.size() > value.size()) {
69	1	return false;
70	1	}
71	7	return std::equal(ending.rbegin(), ending.rend(), value.rbegin());
72	8	}
73
74	7	inline std::vector<std::string> split(const std::string& s, const std::string& delim) {
75	7	std::vector<std::string> out;
76	7	size_t pos {};
77
78	20	for (size_t find = 0; (find = s.find(delim, pos)) != std::string::npos;
79	13	pos = find + delim.size()) {
80	13	out.emplace_back(s.data() + pos, s.data() + find);
81	13	}
82
83	7	out.emplace_back(s.data() + pos, s.data() + s.size());
84	7	return out;
85	7	}
86
87		template <typename T>
88	3	std::string join(const std::vector<T>& elems, const std::string& delim) {
89	3	std::stringstream ss;
90	13	for (size_t i = 0; i < elems.size(); ++i) {
91	10	if (i != 0) {
92	8	ss << delim.c_str();
93	8	}
94	10	ss << elems[i];
95	10	}
96	3	return ss.str();
97	3	}
98
99		struct StringCaseHasher {
100		public:
101	1.59k	std::size_t operator()(const std::string& value) const {
102	1.59k	std::string lower_value = to_lower(value);
103	1.59k	return std::hash<std::string>()(lower_value);
104	1.59k	}
105		};
106
107		struct StringCaseEqual {
108		public:
109	1.28k	bool operator()(const std::string& lhs, const std::string& rhs) const {
110	1.28k	if (lhs.size() != rhs.size()) {
111	0	return false;
112	0	}
113	1.28k	return strncasecmp(lhs.c_str(), rhs.c_str(), lhs.size()) == 0;
114	1.28k	}
115		};
116
117		struct StringCaseLess {
118		public:
119	38	bool operator()(const std::string& lhs, const std::string& rhs) const {
120	38	size_t common_size = std::min(lhs.size(), rhs.size());
121	38	auto cmp = strncasecmp(lhs.c_str(), rhs.c_str(), common_size);
122	38	if (cmp == 0) {
123	30	return lhs.size() < rhs.size();
124	30	}
125	8	return cmp < 0;
126	38	}
127		};
128
129		size_t hash_of_path(const std::string& identifier, const std::string& path);
130
131		using StringCaseSet = std::set<std::string, StringCaseLess>;
132		using StringCaseUnorderedSet = std::unordered_set<std::string, StringCaseHasher, StringCaseEqual>;
133		template <class T>
134		using StringCaseMap = std::map<std::string, T, StringCaseLess>;
135		template <class T>
136		using StringCaseUnorderedMap =
137		std::unordered_map<std::string, T, StringCaseHasher, StringCaseEqual>;
138
139		template <typename T>
140	14	auto get_json_token(T& path_string) {
141	14	try {
142	14	return boost::tokenizer<boost::escaped_list_separator<char>>(
143	14	path_string, boost::escaped_list_separator<char>("\\", ".", "\""));
144	14	} catch (const boost::escaped_list_error& err) {
145	0	throw doris::Exception(ErrorCode::INVALID_JSON_PATH, "meet error {}", err.what());
146	0	}
147	14	}
148
149		#ifdef USE_LIBCPP
150		template <>
151		auto get_json_token(std::string_view& path_string) = delete;
152		#endif
153
154		} // namespace doris

Coverage Report

Created: 2024-11-18 11:49