/root/doris/be/src/gutil/strings/numbers.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright 2010 Google Inc. All Rights Reserved. |
2 | | // Refactored from contributions of various authors in strings/strutil.cc |
3 | | // |
4 | | // This file contains string processing functions related to |
5 | | // numeric values. |
6 | | |
7 | | #include "gutil/strings/numbers.h" |
8 | | |
9 | | #include <assert.h> |
10 | | #include <ctype.h> |
11 | | #include <errno.h> |
12 | | #include <float.h> // for DBL_DIG and FLT_DIG |
13 | | #include <math.h> // for HUGE_VAL |
14 | | #include <stdio.h> |
15 | | #include <stdlib.h> |
16 | | #include <string.h> |
17 | | #include <inttypes.h> |
18 | | #include <sys/types.h> |
19 | | #include <limits> |
20 | | #include <ostream> |
21 | | |
22 | | #include "common/exception.h" |
23 | | |
24 | | using std::numeric_limits; |
25 | | #include <string> |
26 | | |
27 | | using std::string; |
28 | | |
29 | | #include <fmt/compile.h> |
30 | | #include <fmt/format.h> |
31 | | |
32 | | #include "common/logging.h" |
33 | | |
34 | | #include "gutil/integral_types.h" |
35 | | #include "gutil/strings/ascii_ctype.h" |
36 | | #include "gutil/strtoint.h" |
37 | | |
38 | | namespace { |
39 | | |
40 | | // Represents integer values of digits. |
41 | | // Uses 36 to indicate an invalid character since we support |
42 | | // bases up to 36. |
43 | | static const int8 kAsciiToInt[256] = { |
44 | | 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, // 16 36s. |
45 | | 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, |
46 | | 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 36, 36, |
47 | | 36, 36, 36, 36, 36, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, |
48 | | 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 36, 36, 36, 36, 36, 10, 11, 12, 13, 14, 15, 16, |
49 | | 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 36, 36, |
50 | | 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, |
51 | | 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, |
52 | | 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, |
53 | | 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, |
54 | | 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, |
55 | | 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36}; |
56 | | |
57 | | // Input format based on POSIX.1-2008 strtol |
58 | | // http://pubs.opengroup.org/onlinepubs/9699919799/functions/strtol.html |
59 | | template <typename IntType> |
60 | 33 | bool safe_int_internal(const char* start, const char* end, int base, IntType* value_p) { |
61 | | // Consume whitespace. |
62 | 34 | while (start < end && ascii_isspace(start[0])) { |
63 | 1 | ++start; |
64 | 1 | } |
65 | 33 | while (start < end && ascii_isspace(end[-1])) { |
66 | 0 | --end; |
67 | 0 | } |
68 | 33 | if (start >= end) { |
69 | 3 | return false; |
70 | 3 | } |
71 | | |
72 | | // Consume sign. |
73 | 30 | const bool negative = (start[0] == '-'); |
74 | 30 | if (negative || start[0] == '+') { |
75 | 7 | ++start; |
76 | 7 | if (start >= end) { |
77 | 0 | return false; |
78 | 0 | } |
79 | 7 | } |
80 | | |
81 | | // Consume base-dependent prefix. |
82 | | // base 0: "0x" -> base 16, "0" -> base 8, default -> base 10 |
83 | | // base 16: "0x" -> base 16 |
84 | | // Also validate the base. |
85 | 30 | if (base == 0) { |
86 | 0 | if (end - start >= 2 && start[0] == '0' && (start[1] == 'x' || start[1] == 'X')) { |
87 | 0 | base = 16; |
88 | 0 | start += 2; |
89 | 0 | } else if (end - start >= 1 && start[0] == '0') { |
90 | 0 | base = 8; |
91 | 0 | start += 1; |
92 | 0 | } else { |
93 | 0 | base = 10; |
94 | 0 | } |
95 | 30 | } else if (base == 16) { |
96 | 0 | if (end - start >= 2 && start[0] == '0' && (start[1] == 'x' || start[1] == 'X')) { |
97 | 0 | start += 2; |
98 | 0 | } |
99 | 30 | } else if (base >= 2 && base <= 36) { |
100 | | // okay |
101 | 30 | } else { |
102 | 0 | return false; |
103 | 0 | } |
104 | | |
105 | | // Consume digits. |
106 | | // |
107 | | // The classic loop: |
108 | | // |
109 | | // for each digit |
110 | | // value = value * base + digit |
111 | | // value *= sign |
112 | | // |
113 | | // The classic loop needs overflow checking. It also fails on the most |
114 | | // negative integer, -2147483648 in 32-bit two's complement representation. |
115 | | // |
116 | | // My improved loop: |
117 | | // |
118 | | // if (!negative) |
119 | | // for each digit |
120 | | // value = value * base |
121 | | // value = value + digit |
122 | | // else |
123 | | // for each digit |
124 | | // value = value * base |
125 | | // value = value - digit |
126 | | // |
127 | | // Overflow checking becomes simple. |
128 | | // |
129 | | // I present the positive code first for easier reading. |
130 | 30 | IntType value = 0; |
131 | 30 | if (!negative) { |
132 | 23 | const IntType vmax = std::numeric_limits<IntType>::max(); |
133 | 23 | assert(vmax > 0); |
134 | 0 | assert(vmax >= base); |
135 | 0 | const IntType vmax_over_base = vmax / base; |
136 | | // loop over digits |
137 | | // loop body is interleaved for perf, not readability |
138 | 148 | for (; start < end; ++start) { |
139 | 134 | unsigned char c = static_cast<unsigned char>(start[0]); |
140 | 134 | int digit = kAsciiToInt[c]; |
141 | 134 | if (value > vmax_over_base) return false; |
142 | 134 | value *= base; |
143 | 134 | if (digit >= base) return false; |
144 | 128 | if (value > vmax - digit) return false; |
145 | 125 | value += digit; |
146 | 125 | } |
147 | 23 | } else { |
148 | 7 | const IntType vmin = std::numeric_limits<IntType>::min(); |
149 | 7 | assert(vmin < 0); |
150 | 0 | assert(vmin <= 0 - base); |
151 | 0 | IntType vmin_over_base = vmin / base; |
152 | | // 2003 c++ standard [expr.mul] |
153 | | // "... the sign of the remainder is implementation-defined." |
154 | | // Although (vmin/base)*base + vmin%base is always vmin. |
155 | | // 2011 c++ standard tightens the spec but we cannot rely on it. |
156 | 7 | if (vmin % base > 0) { |
157 | 0 | vmin_over_base += 1; |
158 | 0 | } |
159 | | // loop over digits |
160 | | // loop body is interleaved for perf, not readability |
161 | 53 | for (; start < end; ++start) { |
162 | 46 | unsigned char c = static_cast<unsigned char>(start[0]); |
163 | 46 | int digit = kAsciiToInt[c]; |
164 | 46 | if (value < vmin_over_base) return false; |
165 | 46 | value *= base; |
166 | 46 | if (digit >= base) return false; |
167 | 46 | if (value < vmin + digit) return false; |
168 | 46 | value -= digit; |
169 | 46 | } |
170 | 7 | } |
171 | | |
172 | | // Store output. |
173 | 21 | *value_p = value; |
174 | 21 | return true; |
175 | 30 | } numbers.cc:_ZN12_GLOBAL__N_117safe_int_internalIiEEbPKcS2_iPT_ Line | Count | Source | 60 | 21 | bool safe_int_internal(const char* start, const char* end, int base, IntType* value_p) { | 61 | | // Consume whitespace. | 62 | 21 | while (start < end && ascii_isspace(start[0])) { | 63 | 0 | ++start; | 64 | 0 | } | 65 | 21 | while (start < end && ascii_isspace(end[-1])) { | 66 | 0 | --end; | 67 | 0 | } | 68 | 21 | if (start >= end) { | 69 | 1 | return false; | 70 | 1 | } | 71 | | | 72 | | // Consume sign. | 73 | 20 | const bool negative = (start[0] == '-'); | 74 | 20 | if (negative || start[0] == '+') { | 75 | 5 | ++start; | 76 | 5 | if (start >= end) { | 77 | 0 | return false; | 78 | 0 | } | 79 | 5 | } | 80 | | | 81 | | // Consume base-dependent prefix. | 82 | | // base 0: "0x" -> base 16, "0" -> base 8, default -> base 10 | 83 | | // base 16: "0x" -> base 16 | 84 | | // Also validate the base. | 85 | 20 | if (base == 0) { | 86 | 0 | if (end - start >= 2 && start[0] == '0' && (start[1] == 'x' || start[1] == 'X')) { | 87 | 0 | base = 16; | 88 | 0 | start += 2; | 89 | 0 | } else if (end - start >= 1 && start[0] == '0') { | 90 | 0 | base = 8; | 91 | 0 | start += 1; | 92 | 0 | } else { | 93 | 0 | base = 10; | 94 | 0 | } | 95 | 20 | } else if (base == 16) { | 96 | 0 | if (end - start >= 2 && start[0] == '0' && (start[1] == 'x' || start[1] == 'X')) { | 97 | 0 | start += 2; | 98 | 0 | } | 99 | 20 | } else if (base >= 2 && base <= 36) { | 100 | | // okay | 101 | 20 | } else { | 102 | 0 | return false; | 103 | 0 | } | 104 | | | 105 | | // Consume digits. | 106 | | // | 107 | | // The classic loop: | 108 | | // | 109 | | // for each digit | 110 | | // value = value * base + digit | 111 | | // value *= sign | 112 | | // | 113 | | // The classic loop needs overflow checking. It also fails on the most | 114 | | // negative integer, -2147483648 in 32-bit two's complement representation. | 115 | | // | 116 | | // My improved loop: | 117 | | // | 118 | | // if (!negative) | 119 | | // for each digit | 120 | | // value = value * base | 121 | | // value = value + digit | 122 | | // else | 123 | | // for each digit | 124 | | // value = value * base | 125 | | // value = value - digit | 126 | | // | 127 | | // Overflow checking becomes simple. | 128 | | // | 129 | | // I present the positive code first for easier reading. | 130 | 20 | IntType value = 0; | 131 | 20 | if (!negative) { | 132 | 15 | const IntType vmax = std::numeric_limits<IntType>::max(); | 133 | 15 | assert(vmax > 0); | 134 | 0 | assert(vmax >= base); | 135 | 0 | const IntType vmax_over_base = vmax / base; | 136 | | // loop over digits | 137 | | // loop body is interleaved for perf, not readability | 138 | 80 | for (; start < end; ++start) { | 139 | 70 | unsigned char c = static_cast<unsigned char>(start[0]); | 140 | 70 | int digit = kAsciiToInt[c]; | 141 | 70 | if (value > vmax_over_base) return false; | 142 | 70 | value *= base; | 143 | 70 | if (digit >= base) return false; | 144 | 67 | if (value > vmax - digit) return false; | 145 | 65 | value += digit; | 146 | 65 | } | 147 | 15 | } else { | 148 | 5 | const IntType vmin = std::numeric_limits<IntType>::min(); | 149 | 5 | assert(vmin < 0); | 150 | 0 | assert(vmin <= 0 - base); | 151 | 0 | IntType vmin_over_base = vmin / base; | 152 | | // 2003 c++ standard [expr.mul] | 153 | | // "... the sign of the remainder is implementation-defined." | 154 | | // Although (vmin/base)*base + vmin%base is always vmin. | 155 | | // 2011 c++ standard tightens the spec but we cannot rely on it. | 156 | 5 | if (vmin % base > 0) { | 157 | 0 | vmin_over_base += 1; | 158 | 0 | } | 159 | | // loop over digits | 160 | | // loop body is interleaved for perf, not readability | 161 | 31 | for (; start < end; ++start) { | 162 | 26 | unsigned char c = static_cast<unsigned char>(start[0]); | 163 | 26 | int digit = kAsciiToInt[c]; | 164 | 26 | if (value < vmin_over_base) return false; | 165 | 26 | value *= base; | 166 | 26 | if (digit >= base) return false; | 167 | 26 | if (value < vmin + digit) return false; | 168 | 26 | value -= digit; | 169 | 26 | } | 170 | 5 | } | 171 | | | 172 | | // Store output. | 173 | 15 | *value_p = value; | 174 | 15 | return true; | 175 | 20 | } |
numbers.cc:_ZN12_GLOBAL__N_117safe_int_internalIlEEbPKcS2_iPT_ Line | Count | Source | 60 | 12 | bool safe_int_internal(const char* start, const char* end, int base, IntType* value_p) { | 61 | | // Consume whitespace. | 62 | 13 | while (start < end && ascii_isspace(start[0])) { | 63 | 1 | ++start; | 64 | 1 | } | 65 | 12 | while (start < end && ascii_isspace(end[-1])) { | 66 | 0 | --end; | 67 | 0 | } | 68 | 12 | if (start >= end) { | 69 | 2 | return false; | 70 | 2 | } | 71 | | | 72 | | // Consume sign. | 73 | 10 | const bool negative = (start[0] == '-'); | 74 | 10 | if (negative || start[0] == '+') { | 75 | 2 | ++start; | 76 | 2 | if (start >= end) { | 77 | 0 | return false; | 78 | 0 | } | 79 | 2 | } | 80 | | | 81 | | // Consume base-dependent prefix. | 82 | | // base 0: "0x" -> base 16, "0" -> base 8, default -> base 10 | 83 | | // base 16: "0x" -> base 16 | 84 | | // Also validate the base. | 85 | 10 | if (base == 0) { | 86 | 0 | if (end - start >= 2 && start[0] == '0' && (start[1] == 'x' || start[1] == 'X')) { | 87 | 0 | base = 16; | 88 | 0 | start += 2; | 89 | 0 | } else if (end - start >= 1 && start[0] == '0') { | 90 | 0 | base = 8; | 91 | 0 | start += 1; | 92 | 0 | } else { | 93 | 0 | base = 10; | 94 | 0 | } | 95 | 10 | } else if (base == 16) { | 96 | 0 | if (end - start >= 2 && start[0] == '0' && (start[1] == 'x' || start[1] == 'X')) { | 97 | 0 | start += 2; | 98 | 0 | } | 99 | 10 | } else if (base >= 2 && base <= 36) { | 100 | | // okay | 101 | 10 | } else { | 102 | 0 | return false; | 103 | 0 | } | 104 | | | 105 | | // Consume digits. | 106 | | // | 107 | | // The classic loop: | 108 | | // | 109 | | // for each digit | 110 | | // value = value * base + digit | 111 | | // value *= sign | 112 | | // | 113 | | // The classic loop needs overflow checking. It also fails on the most | 114 | | // negative integer, -2147483648 in 32-bit two's complement representation. | 115 | | // | 116 | | // My improved loop: | 117 | | // | 118 | | // if (!negative) | 119 | | // for each digit | 120 | | // value = value * base | 121 | | // value = value + digit | 122 | | // else | 123 | | // for each digit | 124 | | // value = value * base | 125 | | // value = value - digit | 126 | | // | 127 | | // Overflow checking becomes simple. | 128 | | // | 129 | | // I present the positive code first for easier reading. | 130 | 10 | IntType value = 0; | 131 | 10 | if (!negative) { | 132 | 8 | const IntType vmax = std::numeric_limits<IntType>::max(); | 133 | 8 | assert(vmax > 0); | 134 | 0 | assert(vmax >= base); | 135 | 0 | const IntType vmax_over_base = vmax / base; | 136 | | // loop over digits | 137 | | // loop body is interleaved for perf, not readability | 138 | 68 | for (; start < end; ++start) { | 139 | 64 | unsigned char c = static_cast<unsigned char>(start[0]); | 140 | 64 | int digit = kAsciiToInt[c]; | 141 | 64 | if (value > vmax_over_base) return false; | 142 | 64 | value *= base; | 143 | 64 | if (digit >= base) return false; | 144 | 61 | if (value > vmax - digit) return false; | 145 | 60 | value += digit; | 146 | 60 | } | 147 | 8 | } else { | 148 | 2 | const IntType vmin = std::numeric_limits<IntType>::min(); | 149 | 2 | assert(vmin < 0); | 150 | 0 | assert(vmin <= 0 - base); | 151 | 0 | IntType vmin_over_base = vmin / base; | 152 | | // 2003 c++ standard [expr.mul] | 153 | | // "... the sign of the remainder is implementation-defined." | 154 | | // Although (vmin/base)*base + vmin%base is always vmin. | 155 | | // 2011 c++ standard tightens the spec but we cannot rely on it. | 156 | 2 | if (vmin % base > 0) { | 157 | 0 | vmin_over_base += 1; | 158 | 0 | } | 159 | | // loop over digits | 160 | | // loop body is interleaved for perf, not readability | 161 | 22 | for (; start < end; ++start) { | 162 | 20 | unsigned char c = static_cast<unsigned char>(start[0]); | 163 | 20 | int digit = kAsciiToInt[c]; | 164 | 20 | if (value < vmin_over_base) return false; | 165 | 20 | value *= base; | 166 | 20 | if (digit >= base) return false; | 167 | 20 | if (value < vmin + digit) return false; | 168 | 20 | value -= digit; | 169 | 20 | } | 170 | 2 | } | 171 | | | 172 | | // Store output. | 173 | 6 | *value_p = value; | 174 | 6 | return true; | 175 | 10 | } |
|
176 | | |
177 | | } // anonymous namespace |
178 | | |
179 | 0 | bool safe_strto32_base(const char* startptr, const int buffer_size, int32* v, int base) { |
180 | 0 | return safe_int_internal<int32>(startptr, startptr + buffer_size, base, v); |
181 | 0 | } |
182 | | |
183 | 0 | bool safe_strto64_base(const char* startptr, const int buffer_size, int64* v, int base) { |
184 | 0 | return safe_int_internal<int64>(startptr, startptr + buffer_size, base, v); |
185 | 0 | } |
186 | | |
187 | 21 | bool safe_strto32(const char* startptr, const int buffer_size, int32* value) { |
188 | 21 | return safe_int_internal<int32>(startptr, startptr + buffer_size, 10, value); |
189 | 21 | } |
190 | | |
191 | 12 | bool safe_strto64(const char* startptr, const int buffer_size, int64* value) { |
192 | 12 | return safe_int_internal<int64>(startptr, startptr + buffer_size, 10, value); |
193 | 12 | } |
194 | | |
195 | 0 | bool safe_strto32_base(const char* str, int32* value, int base) { |
196 | 0 | char* endptr; |
197 | 0 | errno = 0; // errno only gets set on errors |
198 | 0 | *value = strto32(str, &endptr, base); |
199 | 0 | if (endptr != str) { |
200 | 0 | while (ascii_isspace(*endptr)) ++endptr; |
201 | 0 | } |
202 | 0 | return *str != '\0' && *endptr == '\0' && errno == 0; |
203 | 0 | } |
204 | | |
205 | 0 | bool safe_strto64_base(const char* str, int64* value, int base) { |
206 | 0 | char* endptr; |
207 | 0 | errno = 0; // errno only gets set on errors |
208 | 0 | *value = strto64(str, &endptr, base); |
209 | 0 | if (endptr != str) { |
210 | 0 | while (ascii_isspace(*endptr)) ++endptr; |
211 | 0 | } |
212 | 0 | return *str != '\0' && *endptr == '\0' && errno == 0; |
213 | 0 | } |
214 | | |
215 | 0 | bool safe_strtou32_base(const char* str, uint32* value, int base) { |
216 | | // strtoul does not give any errors on negative numbers, so we have to |
217 | | // search the string for '-' manually. |
218 | 0 | while (ascii_isspace(*str)) ++str; |
219 | 0 | if (*str == '-') return false; |
220 | | |
221 | 0 | char* endptr; |
222 | 0 | errno = 0; // errno only gets set on errors |
223 | 0 | *value = strtou32(str, &endptr, base); |
224 | 0 | if (endptr != str) { |
225 | 0 | while (ascii_isspace(*endptr)) ++endptr; |
226 | 0 | } |
227 | 0 | return *str != '\0' && *endptr == '\0' && errno == 0; |
228 | 0 | } |
229 | | |
230 | 0 | bool safe_strtou64_base(const char* str, uint64* value, int base) { |
231 | | // strtou64 does not give any errors on negative numbers, so we have to |
232 | | // search the string for '-' manually. |
233 | 0 | while (ascii_isspace(*str)) ++str; |
234 | 0 | if (*str == '-') return false; |
235 | | |
236 | 0 | char* endptr; |
237 | 0 | errno = 0; // errno only gets set on errors |
238 | 0 | *value = strtou64(str, &endptr, base); |
239 | 0 | if (endptr != str) { |
240 | 0 | while (ascii_isspace(*endptr)) ++endptr; |
241 | 0 | } |
242 | 0 | return *str != '\0' && *endptr == '\0' && errno == 0; |
243 | 0 | } |
244 | | |
245 | | // ---------------------------------------------------------------------- |
246 | | // u64tostr_base36() |
247 | | // Converts unsigned number to string representation in base-36. |
248 | | // -------------------------------------------------------------------- |
249 | 0 | size_t u64tostr_base36(uint64 number, size_t buf_size, char* buffer) { |
250 | 0 | CHECK_GT(buf_size, 0); |
251 | 0 | CHECK(buffer); |
252 | 0 | static const char kAlphabet[] = "0123456789abcdefghijklmnopqrstuvwxyz"; |
253 | |
|
254 | 0 | buffer[buf_size - 1] = '\0'; |
255 | 0 | size_t result_size = 1; |
256 | |
|
257 | 0 | do { |
258 | 0 | if (buf_size == result_size) { // Ran out of space. |
259 | 0 | return 0; |
260 | 0 | } |
261 | 0 | int remainder = number % 36; |
262 | 0 | number /= 36; |
263 | 0 | buffer[buf_size - result_size - 1] = kAlphabet[remainder]; |
264 | 0 | result_size++; |
265 | 0 | } while (number); |
266 | | |
267 | 0 | memmove(buffer, buffer + buf_size - result_size, result_size); |
268 | |
|
269 | 0 | return result_size - 1; |
270 | 0 | } |
271 | | |
272 | | // Generate functions that wrap safe_strtoXXX_base. |
273 | | #define GEN_SAFE_STRTO(name, type) \ |
274 | 0 | bool name##_base(const string& str, type* value, int base) { \ |
275 | 0 | return name##_base(str.c_str(), value, base); \ |
276 | 0 | } \ Unexecuted instantiation: _Z17safe_strto32_baseRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPii Unexecuted instantiation: _Z18safe_strtou32_baseRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPji Unexecuted instantiation: _Z17safe_strto64_baseRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPli Unexecuted instantiation: _Z18safe_strtou64_baseRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPmi |
277 | 0 | bool name(const char* str, type* value) { return name##_base(str, value, 10); } \ Unexecuted instantiation: _Z12safe_strto32PKcPi Unexecuted instantiation: _Z13safe_strtou32PKcPj Unexecuted instantiation: _Z12safe_strto64PKcPl Unexecuted instantiation: _Z13safe_strtou64PKcPm |
278 | 0 | bool name(const string& str, type* value) { return name##_base(str.c_str(), value, 10); } Unexecuted instantiation: _Z12safe_strto32RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPi Unexecuted instantiation: _Z13safe_strtou32RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPj Unexecuted instantiation: _Z12safe_strto64RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPl Unexecuted instantiation: _Z13safe_strtou64RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPm |
279 | | GEN_SAFE_STRTO(safe_strto32, int32); |
280 | | GEN_SAFE_STRTO(safe_strtou32, uint32); |
281 | | GEN_SAFE_STRTO(safe_strto64, int64); |
282 | | GEN_SAFE_STRTO(safe_strtou64, uint64); |
283 | | #undef GEN_SAFE_STRTO |
284 | | |
285 | 18.0M | bool safe_strtof(const char* str, float* value) { |
286 | 18.0M | char* endptr; |
287 | | #ifdef _MSC_VER // has no strtof() |
288 | | *value = strtod(str, &endptr); |
289 | | #else |
290 | 18.0M | *value = strtof(str, &endptr); |
291 | 18.0M | #endif |
292 | 18.0M | if (endptr != str) { |
293 | 18.0M | while (ascii_isspace(*endptr)) ++endptr; |
294 | 18.0M | } |
295 | | // Ignore range errors from strtod/strtof. |
296 | | // The values it returns on underflow and |
297 | | // overflow are the right fallback in a |
298 | | // robust setting. |
299 | 18.0M | return *str != '\0' && *endptr == '\0'; |
300 | 18.0M | } |
301 | | |
302 | 0 | bool safe_strtod(const char* str, double* value) { |
303 | 0 | char* endptr; |
304 | 0 | *value = strtod(str, &endptr); |
305 | 0 | if (endptr != str) { |
306 | 0 | while (ascii_isspace(*endptr)) ++endptr; |
307 | 0 | } |
308 | | // Ignore range errors from strtod. The values it |
309 | | // returns on underflow and overflow are the right |
310 | | // fallback in a robust setting. |
311 | 0 | return *str != '\0' && *endptr == '\0'; |
312 | 0 | } |
313 | | |
314 | 11 | bool safe_strtof(const string& str, float* value) { |
315 | 11 | return safe_strtof(str.c_str(), value); |
316 | 11 | } |
317 | | |
318 | 0 | bool safe_strtod(const string& str, double* value) { |
319 | 0 | return safe_strtod(str.c_str(), value); |
320 | 0 | } |
321 | | |
322 | | // ---------------------------------------------------------------------- |
323 | | // SimpleDtoa() |
324 | | // SimpleFtoa() |
325 | | // DoubleToBuffer() |
326 | | // FloatToBuffer() |
327 | | // We want to print the value without losing precision, but we also do |
328 | | // not want to print more digits than necessary. This turns out to be |
329 | | // trickier than it sounds. Numbers like 0.2 cannot be represented |
330 | | // exactly in binary. If we print 0.2 with a very large precision, |
331 | | // e.g. "%.50g", we get "0.2000000000000000111022302462515654042363167". |
332 | | // On the other hand, if we set the precision too low, we lose |
333 | | // significant digits when printing numbers that actually need them. |
334 | | // It turns out there is no precision value that does the right thing |
335 | | // for all numbers. |
336 | | // |
337 | | // Our strategy is to first try printing with a precision that is never |
338 | | // over-precise, then parse the result with strtod() to see if it |
339 | | // matches. If not, we print again with a precision that will always |
340 | | // give a precise result, but may use more digits than necessary. |
341 | | // |
342 | | // An arguably better strategy would be to use the algorithm described |
343 | | // in "How to Print Floating-Point Numbers Accurately" by Steele & |
344 | | // White, e.g. as implemented by David M. Gay's dtoa(). It turns out, |
345 | | // however, that the following implementation is about as fast as |
346 | | // DMG's code. Furthermore, DMG's code locks mutexes, which means it |
347 | | // will not scale well on multi-core machines. DMG's code is slightly |
348 | | // more accurate (in that it will never use more digits than |
349 | | // necessary), but this is probably irrelevant for most users. |
350 | | // |
351 | | // Rob Pike and Ken Thompson also have an implementation of dtoa() in |
352 | | // third_party/fmt/fltfmt.cc. Their implementation is similar to this |
353 | | // one in that it makes guesses and then uses strtod() to check them. |
354 | | // Their implementation is faster because they use their own code to |
355 | | // generate the digits in the first place rather than use snprintf(), |
356 | | // thus avoiding format string parsing overhead. However, this makes |
357 | | // it considerably more complicated than the following implementation, |
358 | | // and it is embedded in a larger library. If speed turns out to be |
359 | | // an issue, we could re-implement this in terms of their |
360 | | // implementation. |
361 | | // ---------------------------------------------------------------------- |
362 | 11 | int DoubleToBuffer(double value, int width, char* buffer) { |
363 | | // DBL_DIG is 15 for IEEE-754 doubles, which are used on almost all |
364 | | // platforms these days. Just in case some system exists where DBL_DIG |
365 | | // is significantly larger -- and risks overflowing our buffer -- we have |
366 | | // this assert. |
367 | 11 | COMPILE_ASSERT(DBL_DIG < 20, DBL_DIG_is_too_big); |
368 | | |
369 | 11 | int snprintf_result = snprintf(buffer, width, "%.*g", DBL_DIG, value); |
370 | | |
371 | | // The snprintf should never overflow because the buffer is significantly |
372 | | // larger than the precision we asked for. |
373 | 11 | DCHECK(snprintf_result > 0 && snprintf_result < width); |
374 | | |
375 | 11 | if (strtod(buffer, nullptr) != value) { |
376 | 3 | snprintf_result = snprintf(buffer, width, "%.*g", DBL_DIG + 2, value); |
377 | | |
378 | | // Should never overflow; see above. |
379 | 3 | DCHECK(snprintf_result > 0 && snprintf_result < width); |
380 | 3 | } |
381 | | |
382 | 11 | return snprintf_result; |
383 | 11 | } |
384 | | |
385 | 18.0M | int FloatToBuffer(float value, int width, char* buffer) { |
386 | | // FLT_DIG is 6 for IEEE-754 floats, which are used on almost all |
387 | | // platforms these days. Just in case some system exists where FLT_DIG |
388 | | // is significantly larger -- and risks overflowing our buffer -- we have |
389 | | // this assert. |
390 | 18.0M | COMPILE_ASSERT(FLT_DIG < 10, FLT_DIG_is_too_big); |
391 | | |
392 | 18.0M | int snprintf_result = snprintf(buffer, width, "%.*g", FLT_DIG, value); |
393 | | |
394 | | // The snprintf should never overflow because the buffer is significantly |
395 | | // larger than the precision we asked for. |
396 | 18.0M | DCHECK(snprintf_result > 0 && snprintf_result < width); |
397 | | |
398 | 18.0M | float parsed_value; |
399 | 18.0M | if (!safe_strtof(buffer, &parsed_value) || parsed_value != value) { |
400 | 10 | snprintf_result = snprintf(buffer, width, "%.*g", FLT_DIG + 2, value); |
401 | | |
402 | | // Should never overflow; see above. |
403 | 10 | DCHECK(snprintf_result > 0 && snprintf_result < width); |
404 | 10 | } |
405 | | |
406 | 18.0M | return snprintf_result; |
407 | 18.0M | } |
408 | | |
409 | 862 | int FastDoubleToBuffer(double value, char* buffer) { |
410 | 862 | auto end = fmt::format_to(buffer, FMT_COMPILE("{}"), value); |
411 | 862 | *end = '\0'; |
412 | 862 | return end - buffer; |
413 | 862 | } |
414 | | |
415 | 761 | int FastFloatToBuffer(float value, char* buffer) { |
416 | 761 | auto* end = fmt::format_to(buffer, FMT_COMPILE("{}"), value); |
417 | 761 | *end = '\0'; |
418 | 761 | return end - buffer; |
419 | 761 | } |
420 | | |
421 | | // ---------------------------------------------------------------------- |
422 | | // SimpleItoaWithCommas() |
423 | | // Description: converts an integer to a string. |
424 | | // Puts commas every 3 spaces. |
425 | | // Faster than printf("%d")? |
426 | | // |
427 | | // Return value: string |
428 | | // ---------------------------------------------------------------------- |
429 | | |
430 | 27 | char* SimpleItoaWithCommas(int64_t i, char* buffer, int32_t buffer_size) { |
431 | | // 19 digits, 6 commas, and sign are good for 64-bit or smaller ints. |
432 | 27 | char* p = buffer + buffer_size; |
433 | | // Need to use uint64 instead of int64 to correctly handle |
434 | | // -9,223,372,036,854,775,808. |
435 | 27 | uint64 n = i; |
436 | 27 | if (i < 0) n = 0 - n; |
437 | 27 | *--p = '0' + n % 10; // this case deals with the number "0" |
438 | 27 | n /= 10; |
439 | 68 | while (n) { |
440 | 63 | *--p = '0' + n % 10; |
441 | 63 | n /= 10; |
442 | 63 | if (n == 0) break; |
443 | | |
444 | 46 | *--p = '0' + n % 10; |
445 | 46 | n /= 10; |
446 | 46 | if (n == 0) break; |
447 | | |
448 | 41 | *--p = ','; |
449 | 41 | *--p = '0' + n % 10; |
450 | 41 | n /= 10; |
451 | | // For this unrolling, we check if n == 0 in the main while loop |
452 | 41 | } |
453 | 27 | if (i < 0) *--p = '-'; |
454 | 27 | return p; |
455 | 27 | } |
456 | | |
457 | 29 | char* SimpleItoaWithCommas(__int128_t i, char* buffer, int32_t buffer_size) { |
458 | | // 39 digits, 12 commas, and sign are good for 128-bit or smaller ints. |
459 | 29 | char* p = buffer + buffer_size; |
460 | | // Need to use uint128 instead of int128 to correctly handle |
461 | | // -170,141,183,460,469,231,731,687,303,715,884,105,728. |
462 | 29 | __uint128_t n = i; |
463 | 29 | if (i < 0) n = 0 - n; |
464 | 29 | *--p = '0' + n % 10; // this case deals with the number "0" |
465 | 29 | n /= 10; |
466 | 74 | while (n) { |
467 | 64 | *--p = '0' + n % 10; |
468 | 64 | n /= 10; |
469 | 64 | if (n == 0) break; |
470 | | |
471 | 56 | *--p = '0' + n % 10; |
472 | 56 | n /= 10; |
473 | 56 | if (n == 0) break; |
474 | | |
475 | 45 | *--p = ','; |
476 | 45 | *--p = '0' + n % 10; |
477 | 45 | n /= 10; |
478 | | // For this unrolling, we check if n == 0 in the main while loop |
479 | 45 | } |
480 | 29 | if (i < 0) *--p = '-'; |
481 | 29 | return p; |
482 | 29 | } |