/root/doris/be/src/gutil/strings/strip.h
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright 2011 Google Inc. All Rights Reserved. |
2 | | // Refactored from contributions of various authors in strings/strutil.h |
3 | | // |
4 | | // This file contains functions that remove a defined part from the string, |
5 | | // i.e., strip the string. |
6 | | |
7 | | #pragma once |
8 | | |
9 | | #include <stddef.h> |
10 | | |
11 | | #include <string> |
12 | | using std::string; |
13 | | |
14 | | #include "gutil/strings/ascii_ctype.h" |
15 | | #include "gutil/strings/stringpiece.h" |
16 | | |
17 | | // Given a string and a putative prefix, returns the string minus the |
18 | | // prefix string if the prefix matches, otherwise the original |
19 | | // string. |
20 | | string StripPrefixString(StringPiece str, const StringPiece& prefix); |
21 | | |
22 | | // Like StripPrefixString, but return true if the prefix was |
23 | | // successfully matched. Write the output to *result. |
24 | | // It is safe for result to point back to the input string. |
25 | | bool TryStripPrefixString(StringPiece str, const StringPiece& prefix, string* result); |
26 | | |
27 | | // Given a string and a putative suffix, returns the string minus the |
28 | | // suffix string if the suffix matches, otherwise the original |
29 | | // string. |
30 | | string StripSuffixString(StringPiece str, const StringPiece& suffix); |
31 | | |
32 | | // Like StripSuffixString, but return true if the suffix was |
33 | | // successfully matched. Write the output to *result. |
34 | | // It is safe for result to point back to the input string. |
35 | | bool TryStripSuffixString(StringPiece str, const StringPiece& suffix, string* result); |
36 | | |
37 | | // ---------------------------------------------------------------------- |
38 | | // StripString |
39 | | // Replaces any occurrence of the character 'remove' (or the characters |
40 | | // in 'remove') with the character 'replacewith'. |
41 | | // Good for keeping html characters or protocol characters (\t) out |
42 | | // of places where they might cause a problem. |
43 | | // ---------------------------------------------------------------------- |
44 | 0 | inline void StripString(char* str, char remove, char replacewith) { |
45 | 0 | for (; *str; str++) { |
46 | 0 | if (*str == remove) *str = replacewith; |
47 | 0 | } |
48 | 0 | } |
49 | | |
50 | | void StripString(char* str, StringPiece remove, char replacewith); |
51 | | void StripString(char* str, int len, StringPiece remove, char replacewith); |
52 | | void StripString(string* s, StringPiece remove, char replacewith); |
53 | | |
54 | | // ---------------------------------------------------------------------- |
55 | | // StripDupCharacters |
56 | | // Replaces any repeated occurrence of the character 'dup_char' |
57 | | // with single occurrence. e.g., |
58 | | // StripDupCharacters("a//b/c//d", '/', 0) => "a/b/c/d" |
59 | | // Return the number of characters removed |
60 | | // ---------------------------------------------------------------------- |
61 | | int StripDupCharacters(string* s, char dup_char, int start_pos); |
62 | | |
63 | | // ---------------------------------------------------------------------- |
64 | | // StripWhiteSpace |
65 | | // "Removes" whitespace from both sides of string. Pass in a pointer to an |
66 | | // array of characters, and its length. The function changes the pointer |
67 | | // and length to refer to a substring that does not contain leading or |
68 | | // trailing spaces; it does not modify the string itself. If the caller is |
69 | | // using NUL-terminated strings, it is the caller's responsibility to insert |
70 | | // the NUL character at the end of the substring." |
71 | | // |
72 | | // Note: to be completely type safe, this function should be |
73 | | // parameterized as a template: template<typename anyChar> void |
74 | | // StripWhiteSpace(anyChar** str, int* len), where the expectation |
75 | | // is that anyChar could be char, const char, w_char, const w_char, |
76 | | // unicode_char, or any other character type we want. However, we |
77 | | // just provided a version for char and const char. C++ is |
78 | | // inconvenient, but correct, here. Ask Amit is you want to know |
79 | | // the type safety details. |
80 | | // ---------------------------------------------------------------------- |
81 | | void StripWhiteSpace(const char** str, int* len); |
82 | | |
83 | | //------------------------------------------------------------------------ |
84 | | // StripTrailingWhitespace() |
85 | | // Removes whitespace at the end of the string *s. |
86 | | //------------------------------------------------------------------------ |
87 | | void StripTrailingWhitespace(string* s); |
88 | | |
89 | | //------------------------------------------------------------------------ |
90 | | // StripTrailingNewline(string*) |
91 | | // Strips the very last trailing newline or CR+newline from its |
92 | | // input, if one exists. Useful for dealing with MapReduce's text |
93 | | // input mode, which appends '\n' to each map input. Returns true |
94 | | // if a newline was stripped. |
95 | | //------------------------------------------------------------------------ |
96 | | bool StripTrailingNewline(string* s); |
97 | | |
98 | 0 | inline void StripWhiteSpace(char** str, int* len) { |
99 | 0 | // The "real" type for StripWhiteSpace is ForAll char types C, take |
100 | 0 | // (C, int) as input and return (C, int) as output. We're using the |
101 | 0 | // cast here to assert that we can take a char*, even though the |
102 | 0 | // function thinks it's assigning to const char*. |
103 | 0 | StripWhiteSpace(const_cast<const char**>(str), len); |
104 | 0 | } |
105 | | |
106 | 2.51k | inline void StripWhiteSpace(StringPiece* str) { |
107 | 2.51k | const char* data = str->data(); |
108 | 2.51k | int len = str->size(); |
109 | 2.51k | StripWhiteSpace(&data, &len); |
110 | 2.51k | str->set(data, len); |
111 | 2.51k | } |
112 | | |
113 | | void StripWhiteSpace(string* str); |
114 | | |
115 | | namespace strings { |
116 | | |
117 | | template <typename Collection> |
118 | | void StripWhiteSpaceInCollection(Collection* collection) { |
119 | | for (typename Collection::iterator it = collection->begin(); it != collection->end(); ++it) |
120 | | StripWhiteSpace(&(*it)); |
121 | | } |
122 | | |
123 | | } // namespace strings |
124 | | |
125 | | // ---------------------------------------------------------------------- |
126 | | // StripLeadingWhiteSpace |
127 | | // "Removes" whitespace from beginning of string. Returns ptr to first |
128 | | // non-whitespace character if one is present, NULL otherwise. Assumes |
129 | | // "line" is null-terminated. |
130 | | // ---------------------------------------------------------------------- |
131 | | |
132 | 0 | inline const char* StripLeadingWhiteSpace(const char* line) { |
133 | | // skip leading whitespace |
134 | 0 | while (ascii_isspace(*line)) ++line; |
135 | |
|
136 | 0 | if ('\0' == *line) // end of line, no non-whitespace |
137 | 0 | return NULL; |
138 | | |
139 | 0 | return line; |
140 | 0 | } |
141 | | |
142 | | // StripLeadingWhiteSpace for non-const strings. |
143 | 0 | inline char* StripLeadingWhiteSpace(char* line) { |
144 | 0 | return const_cast<char*>(StripLeadingWhiteSpace(const_cast<const char*>(line))); |
145 | 0 | } |
146 | | |
147 | | void StripLeadingWhiteSpace(string* str); |
148 | | |
149 | | // Remove leading, trailing, and duplicate internal whitespace. |
150 | | void RemoveExtraWhitespace(string* s); |
151 | | |
152 | | // ---------------------------------------------------------------------- |
153 | | // SkipLeadingWhiteSpace |
154 | | // Returns str advanced past white space characters, if any. |
155 | | // Never returns NULL. "str" must be terminated by a null character. |
156 | | // ---------------------------------------------------------------------- |
157 | 0 | inline const char* SkipLeadingWhiteSpace(const char* str) { |
158 | 0 | while (ascii_isspace(*str)) ++str; |
159 | 0 | return str; |
160 | 0 | } |
161 | | |
162 | 0 | inline char* SkipLeadingWhiteSpace(char* str) { |
163 | 0 | while (ascii_isspace(*str)) ++str; |
164 | 0 | return str; |
165 | 0 | } |
166 | | |
167 | | // ---------------------------------------------------------------------- |
168 | | // StripCurlyBraces |
169 | | // Strips everything enclosed in pairs of curly braces and the curly |
170 | | // braces. Doesn't touch open braces. It doesn't handle nested curly |
171 | | // braces. This is used for removing things like {:stopword} from |
172 | | // queries. |
173 | | // StripBrackets does the same, but allows the caller to specify different |
174 | | // left and right bracket characters, such as '(' and ')'. |
175 | | // ---------------------------------------------------------------------- |
176 | | |
177 | | void StripCurlyBraces(string* s); |
178 | | void StripBrackets(char left, char right, string* s); |
179 | | |
180 | | // ---------------------------------------------------------------------- |
181 | | // StripMarkupTags |
182 | | // Strips everything enclosed in pairs of angle brackets and the angle |
183 | | // brackets. |
184 | | // This is used for stripping strings of markup; e.g. going from |
185 | | // "the quick <b>brown</b> fox" to "the quick brown fox." |
186 | | // If you want to skip entire sections of markup (e.g. the word "brown" |
187 | | // too in that example), see webutil/pageutil/pageutil.h . |
188 | | // This function was designed for stripping the bold tags (inserted by the |
189 | | // docservers) from the titles of news stories being returned by RSS. |
190 | | // This implementation DOES NOT cover all cases in html documents |
191 | | // like tags that contain quoted angle-brackets, or HTML comment. |
192 | | // For example <IMG SRC = "foo.gif" ALT = "A > B"> |
193 | | // or <!-- <A comment> --> |
194 | | // See "perldoc -q html" |
195 | | // ---------------------------------------------------------------------- |
196 | | |
197 | | void StripMarkupTags(string* s); |
198 | | string OutputWithMarkupTagsStripped(const string& s); |
199 | | |
200 | | // ---------------------------------------------------------------------- |
201 | | // TrimStringLeft |
202 | | // Removes any occurrences of the characters in 'remove' from the start |
203 | | // of the string. Returns the number of chars trimmed. |
204 | | // ---------------------------------------------------------------------- |
205 | | int TrimStringLeft(string* s, const StringPiece& remove); |
206 | | |
207 | | // ---------------------------------------------------------------------- |
208 | | // TrimStringRight |
209 | | // Removes any occurrences of the characters in 'remove' from the end |
210 | | // of the string. Returns the number of chars trimmed. |
211 | | // ---------------------------------------------------------------------- |
212 | | int TrimStringRight(string* s, const StringPiece& remove); |
213 | | |
214 | | // ---------------------------------------------------------------------- |
215 | | // TrimString |
216 | | // Removes any occurrences of the characters in 'remove' from either |
217 | | // end of the string. |
218 | | // ---------------------------------------------------------------------- |
219 | 0 | inline int TrimString(string* s, const StringPiece& remove) { |
220 | 0 | return TrimStringRight(s, remove) + TrimStringLeft(s, remove); |
221 | 0 | } |
222 | | |
223 | | // ---------------------------------------------------------------------- |
224 | | // TrimRunsInString |
225 | | // Removes leading and trailing runs, and collapses middle |
226 | | // runs of a set of characters into a single character (the |
227 | | // first one specified in 'remove'). Useful for collapsing |
228 | | // runs of repeated delimiters, whitespace, etc. E.g., |
229 | | // TrimRunsInString(&s, " :,()") removes leading and trailing |
230 | | // delimiter chars and collapses and converts internal runs |
231 | | // of delimiters to single ' ' characters, so, for example, |
232 | | // " a:(b):c " -> "a b c" |
233 | | // "first,last::(area)phone, ::zip" -> "first last area phone zip" |
234 | | // ---------------------------------------------------------------------- |
235 | | void TrimRunsInString(string* s, StringPiece remove); |
236 | | |
237 | | // ---------------------------------------------------------------------- |
238 | | // RemoveNullsInString |
239 | | // Removes any internal \0 characters from the string. |
240 | | // ---------------------------------------------------------------------- |
241 | | void RemoveNullsInString(string* s); |
242 | | |
243 | | // ---------------------------------------------------------------------- |
244 | | // strrm() |
245 | | // memrm() |
246 | | // Remove all occurrences of a given character from a string. |
247 | | // Returns the new length. |
248 | | // ---------------------------------------------------------------------- |
249 | | |
250 | | int strrm(char* str, char c); |
251 | | int memrm(char* str, int strlen, char c); |
252 | | |
253 | | // ---------------------------------------------------------------------- |
254 | | // strrmm() |
255 | | // Remove all occurrences of a given set of characters from a string. |
256 | | // Returns the new length. |
257 | | // ---------------------------------------------------------------------- |
258 | | int strrmm(char* str, const char* chars); |
259 | | int strrmm(string* str, const string& chars); |