/root/doris/be/src/gutil/strings/escaping.h
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright 2006 Google Inc. All Rights Reserved. |
2 | | // Authors: Numerous. Principal maintainers are csilvers and zunger. |
3 | | // |
4 | | // This is a grab-bag file for string utilities involved in escaping and |
5 | | // unescaping strings in various ways. Who knew there were so many? |
6 | | // |
7 | | // NOTE: Although the functions declared here have been imported into |
8 | | // the global namespace, the using statements are slated for removal. |
9 | | // Do not refer to these symbols without properly namespace-qualifying |
10 | | // them with "strings::". Of course you may also use "using" statements |
11 | | // within a .cc file. |
12 | | // |
13 | | // There are more escaping functions in: |
14 | | // webutil/html/tagutils.h (Escaping strings for HTML, PRE, JavaScript, etc.) |
15 | | // webutil/url/url.h (Escaping for URL's, both RFC-2396 and other methods) |
16 | | // template/template_modifiers.h (All sorts of stuff) |
17 | | // util/regex/re2/re2.h (Escaping for literals within regular expressions |
18 | | // - see RE2::QuoteMeta). |
19 | | // And probably many more places, as well. |
20 | | |
21 | | #pragma once |
22 | | |
23 | | #include <stddef.h> |
24 | | |
25 | | #include <string> |
26 | | using std::string; |
27 | | #include <vector> |
28 | | using std::vector; |
29 | | |
30 | | #include "common/logging.h" |
31 | | |
32 | | #include "gutil/strings/ascii_ctype.h" |
33 | | #include "gutil/strings/charset.h" |
34 | | #include "gutil/strings/stringpiece.h" |
35 | | |
36 | | namespace strings { |
37 | | |
38 | | // ---------------------------------------------------------------------- |
39 | | // EscapeStrForCSV() |
40 | | // Escapes the quotes in 'src' by doubling them. This is necessary |
41 | | // for generating CSV files (see SplitCSVLine). |
42 | | // Returns the number of characters written into dest (not counting |
43 | | // the \0) or -1 if there was insufficient space. |
44 | | // |
45 | | // Example: [some "string" to test] --> [some ""string"" to test] |
46 | | // ---------------------------------------------------------------------- |
47 | | int EscapeStrForCSV(const char* src, char* dest, int dest_len); |
48 | | |
49 | | // ---------------------------------------------------------------------- |
50 | | // UnescapeCEscapeSequences() |
51 | | // Copies "source" to "dest", rewriting C-style escape sequences |
52 | | // -- '\n', '\r', '\\', '\ooo', etc -- to their ASCII |
53 | | // equivalents. "dest" must be sufficiently large to hold all |
54 | | // the characters in the rewritten string (i.e. at least as large |
55 | | // as strlen(source) + 1 should be safe, since the replacements |
56 | | // are always shorter than the original escaped sequences). It's |
57 | | // safe for source and dest to be the same. RETURNS the length |
58 | | // of dest. |
59 | | // |
60 | | // It allows hex sequences \xhh, or generally \xhhhhh with an |
61 | | // arbitrary number of hex digits, but all of them together must |
62 | | // specify a value of a single byte (e.g. \x0045 is equivalent |
63 | | // to \x45, and \x1234 is erroneous). If the value is too large, |
64 | | // it is truncated to 8 bits and an error is set. This is also |
65 | | // true of octal values that exceed 0xff. |
66 | | // |
67 | | // It also allows escape sequences of the form \uhhhh (exactly four |
68 | | // hex digits, upper or lower case) or \Uhhhhhhhh (exactly eight |
69 | | // hex digits, upper or lower case) to specify a Unicode code |
70 | | // point. The dest array will contain the UTF8-encoded version of |
71 | | // that code-point (e.g., if source contains \u2019, then dest will |
72 | | // contain the three bytes 0xE2, 0x80, and 0x99). For the inverse |
73 | | // transformation, use UniLib::UTF8EscapeString |
74 | | // (util/utf8/public/unilib.h), not CEscapeString. |
75 | | // |
76 | | // Errors: In the first form of the call, errors are reported with |
77 | | // LOG(ERROR). The same is true for the second form of the call if |
78 | | // the pointer to the string vector is NULL; otherwise, error |
79 | | // messages are stored in the vector. In either case, the effect on |
80 | | // the dest array is not defined, but rest of the source will be |
81 | | // processed. |
82 | | // |
83 | | // *** DEPRECATED: Use CUnescape() in new code *** |
84 | | // ---------------------------------------------------------------------- |
85 | | int UnescapeCEscapeSequences(const char* source, char* dest); |
86 | | int UnescapeCEscapeSequences(const char* source, char* dest, vector<string>* errors); |
87 | | |
88 | | // ---------------------------------------------------------------------- |
89 | | // UnescapeCEscapeString() |
90 | | // This does the same thing as UnescapeCEscapeSequences, but creates |
91 | | // a new string. The caller does not need to worry about allocating |
92 | | // a dest buffer. This should be used for non performance critical |
93 | | // tasks such as printing debug messages. It is safe for src and dest |
94 | | // to be the same. |
95 | | // |
96 | | // The second call stores its errors in a supplied string vector. |
97 | | // If the string vector pointer is NULL, it reports the errors with LOG(). |
98 | | // |
99 | | // In the first and second calls, the length of dest is returned. In the |
100 | | // the third call, the new string is returned. |
101 | | // |
102 | | // *** DEPRECATED: Use CUnescape() in new code *** |
103 | | // ---------------------------------------------------------------------- |
104 | | int UnescapeCEscapeString(const string& src, string* dest); |
105 | | int UnescapeCEscapeString(const string& src, string* dest, vector<string>* errors); |
106 | | string UnescapeCEscapeString(const string& src); |
107 | | |
108 | | // ---------------------------------------------------------------------- |
109 | | // CUnescape() |
110 | | // Copies "source" to "dest", rewriting C-style escape sequences |
111 | | // -- '\n', '\r', '\\', '\ooo', etc -- to their ASCII |
112 | | // equivalents. "dest" must be sufficiently large to hold all |
113 | | // the characters in the rewritten string (i.e. at least as large |
114 | | // as source.size() should be safe, since the replacements |
115 | | // are never longer than the original escaped sequences). It's |
116 | | // safe for source and dest to be the same. RETURNS true if |
117 | | // conversion was successful, false otherwise. Stores the size of |
118 | | // the result in 'dest_len'. |
119 | | // |
120 | | // It allows hex sequences \xhh, or generally \xhhhhh with an |
121 | | // arbitrary number of hex digits, but all of them together must |
122 | | // specify a value of a single byte (e.g. \x0045 is equivalent |
123 | | // to \x45, and \x1234 is erroneous). If the value is too large, |
124 | | // an error is set. This is also true of octal values that exceed 0xff. |
125 | | // |
126 | | // It also allows escape sequences of the form \uhhhh (exactly four |
127 | | // hex digits, upper or lower case) or \Uhhhhhhhh (exactly eight |
128 | | // hex digits, upper or lower case) to specify a Unicode code |
129 | | // point. The dest array will contain the UTF8-encoded version of |
130 | | // that code-point (e.g., if source contains \u2019, then dest will |
131 | | // contain the three bytes 0xE2, 0x80, and 0x99). For the inverse |
132 | | // transformation, use UniLib::UTF8EscapeString |
133 | | // (util/utf8/public/unilib.h), not CEscapeString. |
134 | | // |
135 | | // Errors: Sets the description of the first encountered error in |
136 | | // 'error'. To disable error reporting, set 'error' to NULL. |
137 | | // ---------------------------------------------------------------------- |
138 | | bool CUnescape(const StringPiece& source, char* dest, int* dest_len, string* error); |
139 | | |
140 | | bool CUnescape(const StringPiece& source, string* dest, string* error); |
141 | | |
142 | | // A version with no error reporting. |
143 | 0 | inline bool CUnescape(const StringPiece& source, string* dest) { |
144 | 0 | return CUnescape(source, dest, NULL); |
145 | 0 | } |
146 | | |
147 | | // ---------------------------------------------------------------------- |
148 | | // CUnescapeForNullTerminatedString() |
149 | | // |
150 | | // This has the same behavior as CUnescape, except that each octal, hex, |
151 | | // or Unicode escape sequence that resolves to a null character ('\0') |
152 | | // is left in its original escaped form. The result is a |
153 | | // display-formatted string that can be interpreted as a null-terminated |
154 | | // const char* and will not be cut short if it contains embedded null |
155 | | // characters. |
156 | | // |
157 | | // ---------------------------------------------------------------------- |
158 | | |
159 | | bool CUnescapeForNullTerminatedString(const StringPiece& source, char* dest, int* dest_len, |
160 | | string* error); |
161 | | |
162 | | bool CUnescapeForNullTerminatedString(const StringPiece& source, string* dest, string* error); |
163 | | |
164 | | // A version with no error reporting. |
165 | 0 | inline bool CUnescapeForNullTerminatedString(const StringPiece& source, string* dest) { |
166 | 0 | return CUnescapeForNullTerminatedString(source, dest, NULL); |
167 | 0 | } |
168 | | |
169 | | // ---------------------------------------------------------------------- |
170 | | // CEscapeString() |
171 | | // CHexEscapeString() |
172 | | // Utf8SafeCEscapeString() |
173 | | // Utf8SafeCHexEscapeString() |
174 | | // Copies 'src' to 'dest', escaping dangerous characters using |
175 | | // C-style escape sequences. This is very useful for preparing query |
176 | | // flags. 'src' and 'dest' should not overlap. The 'Hex' version uses |
177 | | // hexadecimal rather than octal sequences. The 'Utf8Safe' version |
178 | | // doesn't touch UTF-8 bytes. |
179 | | // Returns the number of bytes written to 'dest' (not including the \0) |
180 | | // or -1 if there was insufficient space. |
181 | | // |
182 | | // Currently only \n, \r, \t, ", ', \ and !ascii_isprint() chars are escaped. |
183 | | // ---------------------------------------------------------------------- |
184 | | int CEscapeString(const char* src, int src_len, char* dest, int dest_len); |
185 | | int CHexEscapeString(const char* src, int src_len, char* dest, int dest_len); |
186 | | int Utf8SafeCEscapeString(const char* src, int src_len, char* dest, int dest_len); |
187 | | int Utf8SafeCHexEscapeString(const char* src, int src_len, char* dest, int dest_len); |
188 | | |
189 | | // ---------------------------------------------------------------------- |
190 | | // CEscape() |
191 | | // CHexEscape() |
192 | | // Utf8SafeCEscape() |
193 | | // Utf8SafeCHexEscape() |
194 | | // More convenient form of CEscapeString: returns result as a "string". |
195 | | // This version is slower than CEscapeString() because it does more |
196 | | // allocation. However, it is much more convenient to use in |
197 | | // non-speed-critical code like logging messages etc. |
198 | | // ---------------------------------------------------------------------- |
199 | | string CEscape(const StringPiece& src); |
200 | | string CHexEscape(const StringPiece& src); |
201 | | string Utf8SafeCEscape(const StringPiece& src); |
202 | | string Utf8SafeCHexEscape(const StringPiece& src); |
203 | | |
204 | | // ---------------------------------------------------------------------- |
205 | | // BackslashEscape() |
206 | | // Given a string and a list of characters to escape, replace any |
207 | | // instance of one of those characters with \ + that character. For |
208 | | // example, when exporting maps to /varz, label values need to have |
209 | | // all dots escaped. Appends the result to dest. |
210 | | // BackslashUnescape() |
211 | | // Replace \ + any of the indicated "unescape me" characters with just |
212 | | // that character. Appends the result to dest. |
213 | | // |
214 | | // IMPORTANT: |
215 | | // This function does not escape \ by default, so if you do not include |
216 | | // it in the chars to escape you will most certainly get an undesirable |
217 | | // result. That is, it won't be a reversible operation: |
218 | | // string src = "foo\\:bar"; |
219 | | // BackslashUnescape(BackslashEscape(src, ":"), ":") == "foo\\\\:bar" |
220 | | // On the other hand, for all strings "src", the following is true: |
221 | | // BackslashUnescape(BackslashEscape(src, ":\\"), ":\\") == src |
222 | | // ---------------------------------------------------------------------- |
223 | | void BackslashEscape(const StringPiece& src, const strings::CharSet& to_escape, string* dest); |
224 | | void BackslashUnescape(const StringPiece& src, const strings::CharSet& to_unescape, string* dest); |
225 | | |
226 | 0 | inline string BackslashEscape(const StringPiece& src, const strings::CharSet& to_escape) { |
227 | 0 | string s; |
228 | 0 | BackslashEscape(src, to_escape, &s); |
229 | 0 | return s; |
230 | 0 | } |
231 | | |
232 | 0 | inline string BackslashUnescape(const StringPiece& src, const strings::CharSet& to_unescape) { |
233 | 0 | string s; |
234 | 0 | BackslashUnescape(src, to_unescape, &s); |
235 | 0 | return s; |
236 | 0 | } |
237 | | |
238 | | // ---------------------------------------------------------------------- |
239 | | // QuotedPrintableUnescape() |
240 | | // Check out http://www.cis.ohio-state.edu/htbin/rfc/rfc2045.html for |
241 | | // more details, only briefly implemented. But from the web... |
242 | | // Quoted-printable is an encoding method defined in the MIME |
243 | | // standard. It is used primarily to encode 8-bit text (such as text |
244 | | // that includes foreign characters) into 7-bit US ASCII, creating a |
245 | | // document that is mostly readable by humans, even in its encoded |
246 | | // form. All MIME compliant applications can decode quoted-printable |
247 | | // text, though they may not necessarily be able to properly display the |
248 | | // document as it was originally intended. As quoted-printable encoding |
249 | | // is implemented most commonly, printable ASCII characters (values 33 |
250 | | // through 126, excluding 61), tabs and spaces that do not appear at the |
251 | | // end of lines, and end-of-line characters are not encoded. Other |
252 | | // characters are represented by an equal sign (=) immediately followed |
253 | | // by that character's hexadecimal value. Lines that are longer than 76 |
254 | | // characters are shortened by line breaks, with the equal sign marking |
255 | | // where the breaks occurred. |
256 | | // |
257 | | // Note that QuotedPrintableUnescape is different from 'Q'-encoding as |
258 | | // defined in rfc2047. In particular, This does not treat '_'s as spaces. |
259 | | // |
260 | | // See QEncodingUnescape(). |
261 | | // |
262 | | // Copies "src" to "dest", rewriting quoted printable escape sequences |
263 | | // =XX to their ASCII equivalents. src is not null terminated, instead |
264 | | // specify len. I recommend that slen<szdest, but we honor szdest |
265 | | // anyway. |
266 | | // RETURNS the length of dest. |
267 | | // ---------------------------------------------------------------------- |
268 | | int QuotedPrintableUnescape(const char* src, int slen, char* dest, int szdest); |
269 | | |
270 | | // ---------------------------------------------------------------------- |
271 | | // QEncodingUnescape() |
272 | | // This is very similar to QuotedPrintableUnescape except that we convert |
273 | | // '_'s into spaces. (See RFC 2047) |
274 | | // http://www.faqs.org/rfcs/rfc2047.html. |
275 | | // |
276 | | // Copies "src" to "dest", rewriting q-encoding escape sequences |
277 | | // =XX to their ASCII equivalents. src is not null terminated, instead |
278 | | // specify len. I recommend that slen<szdest, but we honour szdest |
279 | | // anyway. |
280 | | // RETURNS the length of dest. |
281 | | // ---------------------------------------------------------------------- |
282 | | int QEncodingUnescape(const char* src, int slen, char* dest, int szdest); |
283 | | |
284 | | // ---------------------------------------------------------------------- |
285 | | // Base64Unescape() |
286 | | // WebSafeBase64Unescape() |
287 | | // Copies "src" to "dest", where src is in base64 and is written to its |
288 | | // ASCII equivalents. src is not null terminated, instead specify len. |
289 | | // I recommend that slen<szdest, but we honor szdest anyway. |
290 | | // RETURNS the length of dest, or -1 if src contains invalid chars. |
291 | | // The WebSafe variation use '-' instead of '+' and '_' instead of '/'. |
292 | | // The variations that store into a string clear the string first, and |
293 | | // return false (with dest empty) if src contains invalid chars; for |
294 | | // these versions src and dest must be different strings. |
295 | | // ---------------------------------------------------------------------- |
296 | | int Base64Unescape(const char* src, int slen, char* dest, int szdest); |
297 | | bool Base64Unescape(const char* src, int slen, string* dest); |
298 | 0 | inline bool Base64Unescape(const string& src, string* dest) { |
299 | 0 | return Base64Unescape(src.data(), src.size(), dest); |
300 | 0 | } |
301 | | |
302 | | int WebSafeBase64Unescape(const char* src, int slen, char* dest, int szdest); |
303 | | bool WebSafeBase64Unescape(const char* src, int slen, string* dest); |
304 | 0 | inline bool WebSafeBase64Unescape(const string& src, string* dest) { |
305 | 0 | return WebSafeBase64Unescape(src.data(), src.size(), dest); |
306 | 0 | } |
307 | | |
308 | | // Return the length to use for the output buffer given to the base64 escape |
309 | | // routines. Make sure to use the same value for do_padding in both. |
310 | | // This function may return incorrect results if given input_len values that |
311 | | // are extremely high, which should happen rarely. |
312 | | int CalculateBase64EscapedLen(int input_len, bool do_padding); |
313 | | // Use this version when calling Base64Escape without a do_padding arg. |
314 | | int CalculateBase64EscapedLen(int input_len); |
315 | | |
316 | | // ---------------------------------------------------------------------- |
317 | | // Base64Escape() |
318 | | // WebSafeBase64Escape() |
319 | | // Encode "src" to "dest" using base64 encoding. |
320 | | // src is not null terminated, instead specify len. |
321 | | // 'dest' should have at least CalculateBase64EscapedLen() length. |
322 | | // RETURNS the length of dest. |
323 | | // The WebSafe variation use '-' instead of '+' and '_' instead of '/' |
324 | | // so that we can place the out in the URL or cookies without having |
325 | | // to escape them. It also has an extra parameter "do_padding", |
326 | | // which when set to false will prevent padding with "=". |
327 | | // ---------------------------------------------------------------------- |
328 | | int Base64Escape(const unsigned char* src, int slen, char* dest, int szdest); |
329 | | int WebSafeBase64Escape(const unsigned char* src, int slen, char* dest, int szdest, |
330 | | bool do_padding); |
331 | | // Encode src into dest with padding. |
332 | | void Base64Escape(const string& src, string* dest); |
333 | | // Encode src into dest web-safely without padding. |
334 | | void WebSafeBase64Escape(const string& src, string* dest); |
335 | | // Encode src into dest web-safely with padding. |
336 | | void WebSafeBase64EscapeWithPadding(const string& src, string* dest); |
337 | | |
338 | | void Base64Escape(const unsigned char* src, int szsrc, string* dest, bool do_padding); |
339 | | void WebSafeBase64Escape(const unsigned char* src, int szsrc, string* dest, bool do_padding); |
340 | | |
341 | | // ---------------------------------------------------------------------- |
342 | | // Base32Unescape() |
343 | | // Copies "src" to "dest", where src is in base32 and is written to its |
344 | | // ASCII equivalents. src is not null terminated, instead specify len. |
345 | | // RETURNS the length of dest, or -1 if src contains invalid chars. |
346 | | // ---------------------------------------------------------------------- |
347 | | int Base32Unescape(const char* src, int slen, char* dest, int szdest); |
348 | | bool Base32Unescape(const char* src, int slen, string* dest); |
349 | 0 | inline bool Base32Unescape(const string& src, string* dest) { |
350 | 0 | return Base32Unescape(src.data(), src.size(), dest); |
351 | 0 | } |
352 | | |
353 | | // ---------------------------------------------------------------------- |
354 | | // Base32Escape() |
355 | | // Encode "src" to "dest" using base32 encoding. |
356 | | // src is not null terminated, instead specify len. |
357 | | // 'dest' should have at least CalculateBase32EscapedLen() length. |
358 | | // RETURNS the length of dest. RETURNS 0 if szsrc is zero, or szdest is |
359 | | // too small to fit the fully encoded result. 'dest' is padded with '='. |
360 | | // |
361 | | // Note that this is "Base 32 Encoding" from RFC 4648 section 6. |
362 | | // ---------------------------------------------------------------------- |
363 | | int Base32Escape(const unsigned char* src, size_t szsrc, char* dest, size_t szdest); |
364 | | bool Base32Escape(const string& src, string* dest); |
365 | | |
366 | | // ---------------------------------------------------------------------- |
367 | | // Base32HexEscape() |
368 | | // Encode "src" to "dest" using base32hex encoding. |
369 | | // src is not null terminated, instead specify len. |
370 | | // 'dest' should have at least CalculateBase32EscapedLen() length. |
371 | | // RETURNS the length of dest. RETURNS 0 if szsrc is zero, or szdest is |
372 | | // too small to fit the fully encoded result. 'dest' is padded with '='. |
373 | | // |
374 | | // Note that this is "Base 32 Encoding with Extended Hex Alphabet" |
375 | | // from RFC 4648 section 7. |
376 | | // ---------------------------------------------------------------------- |
377 | | int Base32HexEscape(const unsigned char* src, size_t szsrc, char* dest, size_t szdest); |
378 | | bool Base32HexEscape(const string& src, string* dest); |
379 | | |
380 | | // Return the length to use for the output buffer given to the base32 escape |
381 | | // routines. This function may return incorrect results if given input_len |
382 | | // values that are extremely high, which should happen rarely. |
383 | | int CalculateBase32EscapedLen(size_t input_len); |
384 | | |
385 | | // ---------------------------------------------------------------------- |
386 | | // EightBase32DigitsToTenHexDigits() |
387 | | // TenHexDigitsToEightBase32Digits() |
388 | | // Convert base32 to and from hex. |
389 | | // |
390 | | // for EightBase32DigitsToTenHexDigits(): |
391 | | // *in must point to 8 base32 digits. |
392 | | // *out must point to 10 bytes. |
393 | | // |
394 | | // for TenHexDigitsToEightBase32Digits(): |
395 | | // *in must point to 10 hex digits. |
396 | | // *out must point to 8 bytes. |
397 | | // |
398 | | // Note that the Base64 functions above are different. They convert base64 |
399 | | // to and from binary data. We convert to and from string representations |
400 | | // of hex. They deal with arbitrary lengths and we deal with single, |
401 | | // whole base32 quanta. |
402 | | // |
403 | | // See RFC3548 at http://www.ietf.org/rfc/rfc3548.txt |
404 | | // for details on base32. |
405 | | // ---------------------------------------------------------------------- |
406 | | void EightBase32DigitsToTenHexDigits(const char* in, char* out); |
407 | | void TenHexDigitsToEightBase32Digits(const char* in, char* out); |
408 | | |
409 | | // ---------------------------------------------------------------------- |
410 | | // EightBase32DigitsToFiveBytes() |
411 | | // FiveBytesToEightBase32Digits() |
412 | | // Convert base32 to and from binary |
413 | | // |
414 | | // for EightBase32DigitsToTenHexDigits(): |
415 | | // *in must point to 8 base32 digits. |
416 | | // *out must point to 5 bytes. |
417 | | // |
418 | | // for TenHexDigitsToEightBase32Digits(): |
419 | | // *in must point to 5 bytes. |
420 | | // *out must point to 8 bytes. |
421 | | // |
422 | | // Note that the Base64 functions above are different. They deal with |
423 | | // arbitrary lengths and we deal with single, whole base32 quanta. |
424 | | // ---------------------------------------------------------------------- |
425 | | void EightBase32DigitsToFiveBytes(const char* in, unsigned char* bytes_out); |
426 | | void FiveBytesToEightBase32Digits(const unsigned char* in_bytes, char* out); |
427 | | |
428 | | // ---------------------------------------------------------------------- |
429 | | // EscapeFileName() |
430 | | // UnescapeFileName() |
431 | | // Utility functions to (un)escape strings to make them suitable for use in |
432 | | // filenames. Characters not in [a-zA-Z0-9-_.] will be escaped into %XX. |
433 | | // E.g: "Hello, world!" will be escaped as "Hello%2c%20world%21" |
434 | | // |
435 | | // NB that this function escapes slashes, so the output will be a flat |
436 | | // filename and will not keep the directory structure. Slashes are replaced |
437 | | // with '~', instead of a %XX sequence to make it easier for people to |
438 | | // understand the escaped form when the original string is a file path. |
439 | | // |
440 | | // WARNING: filenames produced by these functions may not be compatible with |
441 | | // Colossus FS. In particular, the '%' character has a special meaning in |
442 | | // CFS. |
443 | | // |
444 | | // The versions that receive a string for the output will append to it. |
445 | | // ---------------------------------------------------------------------- |
446 | | void EscapeFileName(const StringPiece& src, string* dst); |
447 | | void UnescapeFileName(const StringPiece& src, string* dst); |
448 | 0 | inline string EscapeFileName(const StringPiece& src) { |
449 | 0 | string r; |
450 | 0 | EscapeFileName(src, &r); |
451 | 0 | return r; |
452 | 0 | } |
453 | 0 | inline string UnescapeFileName(const StringPiece& src) { |
454 | 0 | string r; |
455 | 0 | UnescapeFileName(src, &r); |
456 | 0 | return r; |
457 | 0 | } |
458 | | |
459 | | // ---------------------------------------------------------------------- |
460 | | // Here are a couple utility methods to change ints to hex chars & back |
461 | | // ---------------------------------------------------------------------- |
462 | | |
463 | 0 | inline int int_to_hex_digit(int i) { |
464 | 0 | DCHECK((i >= 0) && (i <= 15)); |
465 | 0 | return ((i < 10) ? (i + '0') : ((i - 10) + 'A')); |
466 | 0 | } |
467 | | |
468 | 0 | inline int int_to_lower_hex_digit(int i) { |
469 | 0 | DCHECK((i >= 0) && (i <= 15)); |
470 | 0 | return (i < 10) ? (i + '0') : ((i - 10) + 'a'); |
471 | 0 | } |
472 | | |
473 | 0 | inline int hex_digit_to_int(char c) { |
474 | | /* Assume ASCII. */ |
475 | 0 | DCHECK('0' == 0x30 && 'A' == 0x41 && 'a' == 0x61); |
476 | 0 | DCHECK(ascii_isxdigit(c)); |
477 | 0 | int x = static_cast<unsigned char>(c); |
478 | 0 | if (x > '9') { |
479 | 0 | x += 9; |
480 | 0 | } |
481 | 0 | return x & 0xf; |
482 | 0 | } |
483 | | |
484 | | // ---------------------------------------------------------------------- |
485 | | // a2b_hex() |
486 | | // Description: Ascii-to-Binary hex conversion. This converts |
487 | | // 2*'num' hexadecimal characters to 'num' binary data. |
488 | | // Return value: 'num' bytes of binary data (via the 'to' argument) |
489 | | // ---------------------------------------------------------------------- |
490 | | void a2b_hex(const char* from, unsigned char* to, int num); |
491 | | void a2b_hex(const char* from, char* to, int num); |
492 | | void a2b_hex(const char* from, string* to, int num); |
493 | | string a2b_hex(const string& a); |
494 | | |
495 | | // ---------------------------------------------------------------------- |
496 | | // a2b_bin() |
497 | | // Description: Ascii-to-Binary binary conversion. This converts |
498 | | // a.size() binary characters (ascii '0' or '1') to |
499 | | // ceil(a.size()/8) bytes of binary data. The first character is |
500 | | // considered the most significant if byte_order_msb is set. a is |
501 | | // considered to be padded with trailing 0s if its size is not a |
502 | | // multiple of 8. |
503 | | // Return value: ceil(a.size()/8) bytes of binary data |
504 | | // ---------------------------------------------------------------------- |
505 | | string a2b_bin(const string& a, bool byte_order_msb); |
506 | | |
507 | | // ---------------------------------------------------------------------- |
508 | | // b2a_hex() |
509 | | // Description: Binary-to-Ascii hex conversion. This converts |
510 | | // 'num' bytes of binary to a 2*'num'-character hexadecimal representation |
511 | | // Return value: 2*'num' characters of ascii text (via the 'to' argument) |
512 | | // ---------------------------------------------------------------------- |
513 | | void b2a_hex(const unsigned char* from, char* to, int num); |
514 | | void b2a_hex(const unsigned char* from, string* to, int num); |
515 | | |
516 | | // ---------------------------------------------------------------------- |
517 | | // b2a_hex() |
518 | | // Description: Binary-to-Ascii hex conversion. This converts |
519 | | // 'num' bytes of binary to a 2*'num'-character hexadecimal representation |
520 | | // Return value: 2*'num' characters of ascii string |
521 | | // ---------------------------------------------------------------------- |
522 | | string b2a_hex(const char* from, int num); |
523 | | string b2a_hex(const StringPiece& b); |
524 | | |
525 | | // ---------------------------------------------------------------------- |
526 | | // b2a_bin() |
527 | | // Description: Binary-to-Ascii binary conversion. This converts |
528 | | // b.size() bytes of binary to a 8*b.size() character representation |
529 | | // (ascii '0' or '1'). The highest order bit in each byte is returned |
530 | | // first in the string if byte_order_msb is set. |
531 | | // Return value: 8*b.size() characters of ascii text |
532 | | // ---------------------------------------------------------------------- |
533 | | string b2a_bin(const string& b, bool byte_order_msb); |
534 | | |
535 | | // ---------------------------------------------------------------------- |
536 | | // ShellEscape |
537 | | // Make a shell command argument from a string. |
538 | | // Returns a Bourne shell string literal such that, once the shell finishes |
539 | | // expanding the argument, the argument passed on to the program being |
540 | | // run will be the same as whatever you passed in. |
541 | | // NOTE: This is "ported" from python2.2's commands.mkarg(); it should be |
542 | | // safe for Bourne shell syntax (i.e. sh, bash), but mileage may vary |
543 | | // with other shells. |
544 | | // ---------------------------------------------------------------------- |
545 | | string ShellEscape(StringPiece src); |
546 | | |
547 | | // Runs ShellEscape() on the arguments, concatenates them with a space, and |
548 | | // returns the resulting string. |
549 | | template <class InputIterator> |
550 | | string ShellEscapeCommandLine(InputIterator begin, const InputIterator& end) { |
551 | | string result; |
552 | | for (; begin != end; ++begin) { |
553 | | if (!result.empty()) result.append(" "); |
554 | | result.append(ShellEscape(*begin)); |
555 | | } |
556 | | return result; |
557 | | } |
558 | | |
559 | | // Reads at most bytes_to_read from binary_string and writes it to |
560 | | // ascii_string in lower case hex. |
561 | | void ByteStringToAscii(const string& binary_string, int bytes_to_read, string* ascii_string); |
562 | | |
563 | 0 | inline string ByteStringToAscii(const string& binary_string, int bytes_to_read) { |
564 | 0 | string result; |
565 | 0 | ByteStringToAscii(binary_string, bytes_to_read, &result); |
566 | 0 | return result; |
567 | 0 | } |
568 | | |
569 | | // Converts the hex from ascii_string into binary data and |
570 | | // writes the binary data into binary_string. |
571 | | // Empty input successfully converts to empty output. |
572 | | // Returns false and may modify output if it is |
573 | | // unable to parse the hex string. |
574 | | bool ByteStringFromAscii(const string& ascii_string, string* binary_string); |
575 | | |
576 | | // Clean up a multi-line string to conform to Unix line endings. |
577 | | // Reads from src and appends to dst, so usually dst should be empty. |
578 | | // If there is no line ending at the end of a non-empty string, it can |
579 | | // be added automatically. |
580 | | // |
581 | | // Four different types of input are correctly handled: |
582 | | // |
583 | | // - Unix/Linux files: line ending is LF, pass through unchanged |
584 | | // |
585 | | // - DOS/Windows files: line ending is CRLF: convert to LF |
586 | | // |
587 | | // - Legacy Mac files: line ending is CR: convert to LF |
588 | | // |
589 | | // - Garbled files: random line endings, covert gracefully |
590 | | // lonely CR, lonely LF, CRLF: convert to LF |
591 | | // |
592 | | // @param src The multi-line string to convert |
593 | | // @param dst The converted string is appended to this string |
594 | | // @param auto_end_last_line Automatically terminate the last line |
595 | | // |
596 | | // Limitations: |
597 | | // |
598 | | // This does not do the right thing for CRCRLF files created by |
599 | | // broken programs that do another Unix->DOS conversion on files |
600 | | // that are already in CRLF format. |
601 | | void CleanStringLineEndings(const string& src, string* dst, bool auto_end_last_line); |
602 | | |
603 | | // Same as above, but transforms the argument in place. |
604 | | void CleanStringLineEndings(string* str, bool auto_end_last_line); |
605 | | |
606 | | } // namespace strings |
607 | | |
608 | | // The following functions used to be defined in strutil.h in the top-level |
609 | | // namespace, so we alias them here. Do not add new functions here. |
610 | | // |
611 | | // Talk to him if you want to help. |
612 | | // |
613 | | // DEPRECATED(mec): Using these names in the global namespace is deprecated. |
614 | | // Use the strings:: names. |
615 | | |
616 | | using strings::EscapeStrForCSV; |
617 | | using strings::UnescapeCEscapeSequences; |
618 | | using strings::UnescapeCEscapeString; |
619 | | using strings::CEscapeString; |
620 | | using strings::CHexEscapeString; |
621 | | using strings::CEscape; |
622 | | using strings::CHexEscape; |
623 | | using strings::BackslashEscape; |
624 | | using strings::BackslashUnescape; |
625 | | using strings::QuotedPrintableUnescape; |
626 | | using strings::QEncodingUnescape; |
627 | | using strings::Base64Unescape; |
628 | | using strings::WebSafeBase64Unescape; |
629 | | using strings::CalculateBase64EscapedLen; |
630 | | using strings::Base64Escape; |
631 | | using strings::WebSafeBase64Escape; |
632 | | using strings::WebSafeBase64EscapeWithPadding; |
633 | | using strings::Base32Escape; |
634 | | using strings::Base32HexEscape; |
635 | | using strings::CalculateBase32EscapedLen; |
636 | | using strings::EightBase32DigitsToTenHexDigits; |
637 | | using strings::TenHexDigitsToEightBase32Digits; |
638 | | using strings::EightBase32DigitsToFiveBytes; |
639 | | using strings::FiveBytesToEightBase32Digits; |
640 | | using strings::int_to_hex_digit; |
641 | | using strings::int_to_lower_hex_digit; |
642 | | using strings::hex_digit_to_int; |
643 | | using strings::a2b_hex; |
644 | | using strings::a2b_bin; |
645 | | using strings::b2a_hex; |
646 | | using strings::b2a_bin; |
647 | | using strings::ShellEscape; |
648 | | using strings::ShellEscapeCommandLine; |
649 | | using strings::ByteStringFromAscii; |
650 | | using strings::ByteStringToAscii; |
651 | | using strings::CleanStringLineEndings; |