/root/doris/be/src/gutil/strings/numbers.cc

Source (jump to first uncovered line)
// Copyright 2010 Google Inc. All Rights Reserved.
// Refactored from contributions of various authors in strings/strutil.cc
//
// This file contains string processing functions related to
// numeric values.

#include "gutil/strings/numbers.h"

#include <fmt/compile.h>
#include <fmt/format.h>

#include <cfloat>

#include "common/logging.h"
#include "gutil/integral_types.h"
#include "gutil/strings/ascii_ctype.h"
#include "gutil/strtoint.h"

bool safe_strtof(const char* str, float* value) {
    char* endptr;
#ifdef _MSC_VER // has no strtof()
    *value = strtod(str, &endptr);
#else
    *value = strtof(str, &endptr);
#endif
    if (endptr != str) {
        while (ascii_isspace(*endptr)) ++endptr;
    }
    // Ignore range errors from strtod/strtof.
    // The values it returns on underflow and
    // overflow are the right fallback in a
    // robust setting.
    return *str != '\0' && *endptr == '\0';
}

bool safe_strtod(const char* str, double* value) {
    char* endptr;
    *value = strtod(str, &endptr);
    if (endptr != str) {
        while (ascii_isspace(*endptr)) ++endptr;
    }
    // Ignore range errors from strtod.  The values it
    // returns on underflow and overflow are the right
    // fallback in a robust setting.
    return *str != '\0' && *endptr == '\0';
}

bool safe_strtof(const string& str, float* value) {
    return safe_strtof(str.c_str(), value);
}

bool safe_strtod(const string& str, double* value) {
    return safe_strtod(str.c_str(), value);
}

// ----------------------------------------------------------------------
// SimpleDtoa()
// SimpleFtoa()
// DoubleToBuffer()
// FloatToBuffer()
//    We want to print the value without losing precision, but we also do
//    not want to print more digits than necessary.  This turns out to be
//    trickier than it sounds.  Numbers like 0.2 cannot be represented
//    exactly in binary.  If we print 0.2 with a very large precision,
//    e.g. "%.50g", we get "0.2000000000000000111022302462515654042363167".
//    On the other hand, if we set the precision too low, we lose
//    significant digits when printing numbers that actually need them.
//    It turns out there is no precision value that does the right thing
//    for all numbers.
//
//    Our strategy is to first try printing with a precision that is never
//    over-precise, then parse the result with strtod() to see if it
//    matches.  If not, we print again with a precision that will always
//    give a precise result, but may use more digits than necessary.
//
//    An arguably better strategy would be to use the algorithm described
//    in "How to Print Floating-Point Numbers Accurately" by Steele &
//    White, e.g. as implemented by David M. Gay's dtoa().  It turns out,
//    however, that the following implementation is about as fast as
//    DMG's code.  Furthermore, DMG's code locks mutexes, which means it
//    will not scale well on multi-core machines.  DMG's code is slightly
//    more accurate (in that it will never use more digits than
//    necessary), but this is probably irrelevant for most users.
//
//    Rob Pike and Ken Thompson also have an implementation of dtoa() in
//    third_party/fmt/fltfmt.cc.  Their implementation is similar to this
//    one in that it makes guesses and then uses strtod() to check them.
//    Their implementation is faster because they use their own code to
//    generate the digits in the first place rather than use snprintf(),
//    thus avoiding format string parsing overhead.  However, this makes
//    it considerably more complicated than the following implementation,
//    and it is embedded in a larger library.  If speed turns out to be
//    an issue, we could re-implement this in terms of their
//    implementation.
// ----------------------------------------------------------------------
int DoubleToBuffer(double value, int width, char* buffer) {
    // DBL_DIG is 15 for IEEE-754 doubles, which are used on almost all
    // platforms these days.  Just in case some system exists where DBL_DIG
    // is significantly larger -- and risks overflowing our buffer -- we have
    // this assert.
    COMPILE_ASSERT(DBL_DIG < 20, DBL_DIG_is_too_big);

    int snprintf_result = snprintf(buffer, width, "%.*g", DBL_DIG, value);

    // The snprintf should never overflow because the buffer is significantly
    // larger than the precision we asked for.
    DCHECK(snprintf_result > 0 && snprintf_result < width);

    if (strtod(buffer, nullptr) != value) {
        snprintf_result = snprintf(buffer, width, "%.*g", DBL_DIG + 2, value);

        // Should never overflow; see above.
        DCHECK(snprintf_result > 0 && snprintf_result < width);
    }

    return snprintf_result;
}

int FloatToBuffer(float value, int width, char* buffer) {
    // FLT_DIG is 6 for IEEE-754 floats, which are used on almost all
    // platforms these days.  Just in case some system exists where FLT_DIG
    // is significantly larger -- and risks overflowing our buffer -- we have
    // this assert.
    COMPILE_ASSERT(FLT_DIG < 10, FLT_DIG_is_too_big);

    int snprintf_result = snprintf(buffer, width, "%.*g", FLT_DIG, value);

    // The snprintf should never overflow because the buffer is significantly
    // larger than the precision we asked for.
    DCHECK(snprintf_result > 0 && snprintf_result < width);

    float parsed_value;
    if (!safe_strtof(buffer, &parsed_value) || parsed_value != value) {
        snprintf_result = snprintf(buffer, width, "%.*g", FLT_DIG + 2, value);

        // Should never overflow; see above.
        DCHECK(snprintf_result > 0 && snprintf_result < width);
    }

    return snprintf_result;
}

int FastDoubleToBuffer(double value, char* buffer) {
    auto end = fmt::format_to(buffer, FMT_COMPILE("{}"), value);
    *end = '\0';
    return end - buffer;
}

int FastFloatToBuffer(float value, char* buffer) {
    auto* end = fmt::format_to(buffer, FMT_COMPILE("{}"), value);
    *end = '\0';
    return end - buffer;
}

// ----------------------------------------------------------------------
// SimpleItoaWithCommas()
//    Description: converts an integer to a string.
//    Puts commas every 3 spaces.
//    Faster than printf("%d")?
//
//    Return value: string
// ----------------------------------------------------------------------

char* SimpleItoaWithCommas(int64_t i, char* buffer, int32_t buffer_size) {
    // 19 digits, 6 commas, and sign are good for 64-bit or smaller ints.
    char* p = buffer + buffer_size;
    // Need to use uint64 instead of int64 to correctly handle
    // -9,223,372,036,854,775,808.
    uint64 n = i;
    if (i < 0) n = 0 - n;
    *--p = '0' + n % 10; // this case deals with the number "0"
    n /= 10;
    while (n) {
        *--p = '0' + n % 10;
        n /= 10;
        if (n == 0) break;

        *--p = '0' + n % 10;
        n /= 10;
        if (n == 0) break;

        *--p = ',';
        *--p = '0' + n % 10;
        n /= 10;
        // For this unrolling, we check if n == 0 in the main while loop
    }
    if (i < 0) *--p = '-';
    return p;
}

char* SimpleItoaWithCommas(__int128_t i, char* buffer, int32_t buffer_size) {
    // 39 digits, 12 commas, and sign are good for 128-bit or smaller ints.
    char* p = buffer + buffer_size;
    // Need to use uint128 instead of int128 to correctly handle
    // -170,141,183,460,469,231,731,687,303,715,884,105,728.
    __uint128_t n = i;
    if (i < 0) n = 0 - n;
    *--p = '0' + n % 10; // this case deals with the number "0"
    n /= 10;
    while (n) {
        *--p = '0' + n % 10;
        n /= 10;
        if (n == 0) break;

        *--p = '0' + n % 10;
        n /= 10;
        if (n == 0) break;

        *--p = ',';
        *--p = '0' + n % 10;
        n /= 10;
        // For this unrolling, we check if n == 0 in the main while loop
    }
    if (i < 0) *--p = '-';
    return p;
}

Coverage Report

Created: 2025-06-20 03:12

Line	Count	Source (jump to first uncovered line)
1		// Copyright 2010 Google Inc. All Rights Reserved.
2		// Refactored from contributions of various authors in strings/strutil.cc
3		//
4		// This file contains string processing functions related to
5		// numeric values.
6
7		#include "gutil/strings/numbers.h"
8
9		#include <fmt/compile.h>
10		#include <fmt/format.h>
11
12		#include <cfloat>
13
14		#include "common/logging.h"
15		#include "gutil/integral_types.h"
16		#include "gutil/strings/ascii_ctype.h"
17		#include "gutil/strtoint.h"
18
19	36.1M	bool safe_strtof(const char* str, float* value) {
20	36.1M	char* endptr;
21		#ifdef _MSC_VER // has no strtof()
22		*value = strtod(str, &endptr);
23		#else
24	36.1M	*value = strtof(str, &endptr);
25	36.1M	#endif
26	36.1M	if (endptr != str) {
27	36.1M	while (ascii_isspace(*endptr)) ++endptr;
28	36.1M	}
29		// Ignore range errors from strtod/strtof.
30		// The values it returns on underflow and
31		// overflow are the right fallback in a
32		// robust setting.
33	36.1M	return str != '\0' && endptr == '\0';
34	36.1M	}
35
36	0	bool safe_strtod(const char* str, double* value) {
37	0	char* endptr;
38	0	*value = strtod(str, &endptr);
39	0	if (endptr != str) {
40	0	while (ascii_isspace(*endptr)) ++endptr;
41	0	}
42		// Ignore range errors from strtod. The values it
43		// returns on underflow and overflow are the right
44		// fallback in a robust setting.
45	0	return str != '\0' && endptr == '\0';
46	0	}
47
48	0	bool safe_strtof(const string& str, float* value) {
49	0	return safe_strtof(str.c_str(), value);
50	0	}
51
52	0	bool safe_strtod(const string& str, double* value) {
53	0	return safe_strtod(str.c_str(), value);
54	0	}
55
56		// ----------------------------------------------------------------------
57		// SimpleDtoa()
58		// SimpleFtoa()
59		// DoubleToBuffer()
60		// FloatToBuffer()
61		// We want to print the value without losing precision, but we also do
62		// not want to print more digits than necessary. This turns out to be
63		// trickier than it sounds. Numbers like 0.2 cannot be represented
64		// exactly in binary. If we print 0.2 with a very large precision,
65		// e.g. "%.50g", we get "0.2000000000000000111022302462515654042363167".
66		// On the other hand, if we set the precision too low, we lose
67		// significant digits when printing numbers that actually need them.
68		// It turns out there is no precision value that does the right thing
69		// for all numbers.
70		//
71		// Our strategy is to first try printing with a precision that is never
72		// over-precise, then parse the result with strtod() to see if it
73		// matches. If not, we print again with a precision that will always
74		// give a precise result, but may use more digits than necessary.
75		//
76		// An arguably better strategy would be to use the algorithm described
77		// in "How to Print Floating-Point Numbers Accurately" by Steele &
78		// White, e.g. as implemented by David M. Gay's dtoa(). It turns out,
79		// however, that the following implementation is about as fast as
80		// DMG's code. Furthermore, DMG's code locks mutexes, which means it
81		// will not scale well on multi-core machines. DMG's code is slightly
82		// more accurate (in that it will never use more digits than
83		// necessary), but this is probably irrelevant for most users.
84		//
85		// Rob Pike and Ken Thompson also have an implementation of dtoa() in
86		// third_party/fmt/fltfmt.cc. Their implementation is similar to this
87		// one in that it makes guesses and then uses strtod() to check them.
88		// Their implementation is faster because they use their own code to
89		// generate the digits in the first place rather than use snprintf(),
90		// thus avoiding format string parsing overhead. However, this makes
91		// it considerably more complicated than the following implementation,
92		// and it is embedded in a larger library. If speed turns out to be
93		// an issue, we could re-implement this in terms of their
94		// implementation.
95		// ----------------------------------------------------------------------
96	22	int DoubleToBuffer(double value, int width, char* buffer) {
97		// DBL_DIG is 15 for IEEE-754 doubles, which are used on almost all
98		// platforms these days. Just in case some system exists where DBL_DIG
99		// is significantly larger -- and risks overflowing our buffer -- we have
100		// this assert.
101	22	COMPILE_ASSERT(DBL_DIG < 20, DBL_DIG_is_too_big);
102
103	22	int snprintf_result = snprintf(buffer, width, "%.*g", DBL_DIG, value);
104
105		// The snprintf should never overflow because the buffer is significantly
106		// larger than the precision we asked for.
107	22	DCHECK(snprintf_result > 0 && snprintf_result < width);
108
109	22	if (strtod(buffer, nullptr) != value) {
110	6	snprintf_result = snprintf(buffer, width, "%.*g", DBL_DIG + 2, value);
111
112		// Should never overflow; see above.
113	6	DCHECK(snprintf_result > 0 && snprintf_result < width);
114	6	}
115
116	22	return snprintf_result;
117	22	}
118
119	36.1M	int FloatToBuffer(float value, int width, char* buffer) {
120		// FLT_DIG is 6 for IEEE-754 floats, which are used on almost all
121		// platforms these days. Just in case some system exists where FLT_DIG
122		// is significantly larger -- and risks overflowing our buffer -- we have
123		// this assert.
124	36.1M	COMPILE_ASSERT(FLT_DIG < 10, FLT_DIG_is_too_big);
125
126	36.1M	int snprintf_result = snprintf(buffer, width, "%.*g", FLT_DIG, value);
127
128		// The snprintf should never overflow because the buffer is significantly
129		// larger than the precision we asked for.
130	36.1M	DCHECK(snprintf_result > 0 && snprintf_result < width);
131
132	36.1M	float parsed_value;
133	36.1M	if (!safe_strtof(buffer, &parsed_value) \|\| parsed_value != value) {
134	20	snprintf_result = snprintf(buffer, width, "%.*g", FLT_DIG + 2, value);
135
136		// Should never overflow; see above.
137	20	DCHECK(snprintf_result > 0 && snprintf_result < width);
138	20	}
139
140	36.1M	return snprintf_result;
141	36.1M	}
142
143	1.72k	int FastDoubleToBuffer(double value, char* buffer) {
144	1.72k	auto end = fmt::format_to(buffer, FMT_COMPILE("{}"), value);
145	1.72k	*end = '\0';
146	1.72k	return end - buffer;
147	1.72k	}
148
149	1.52k	int FastFloatToBuffer(float value, char* buffer) {
150	1.52k	auto* end = fmt::format_to(buffer, FMT_COMPILE("{}"), value);
151	1.52k	*end = '\0';
152	1.52k	return end - buffer;
153	1.52k	}
154
155		// ----------------------------------------------------------------------
156		// SimpleItoaWithCommas()
157		// Description: converts an integer to a string.
158		// Puts commas every 3 spaces.
159		// Faster than printf("%d")?
160		//
161		// Return value: string
162		// ----------------------------------------------------------------------
163
164	54	char* SimpleItoaWithCommas(int64_t i, char* buffer, int32_t buffer_size) {
165		// 19 digits, 6 commas, and sign are good for 64-bit or smaller ints.
166	54	char* p = buffer + buffer_size;
167		// Need to use uint64 instead of int64 to correctly handle
168		// -9,223,372,036,854,775,808.
169	54	uint64 n = i;
170	54	if (i < 0) n = 0 - n;
171	54	*--p = '0' + n % 10; // this case deals with the number "0"
172	54	n /= 10;
173	136	while (n) {
174	126	*--p = '0' + n % 10;
175	126	n /= 10;
176	126	if (n == 0) break;
177
178	92	*--p = '0' + n % 10;
179	92	n /= 10;
180	92	if (n == 0) break;
181
182	82	*--p = ',';
183	82	*--p = '0' + n % 10;
184	82	n /= 10;
185		// For this unrolling, we check if n == 0 in the main while loop
186	82	}
187	54	if (i < 0) *--p = '-';
188	54	return p;
189	54	}
190
191	58	char* SimpleItoaWithCommas(__int128_t i, char* buffer, int32_t buffer_size) {
192		// 39 digits, 12 commas, and sign are good for 128-bit or smaller ints.
193	58	char* p = buffer + buffer_size;
194		// Need to use uint128 instead of int128 to correctly handle
195		// -170,141,183,460,469,231,731,687,303,715,884,105,728.
196	58	__uint128_t n = i;
197	58	if (i < 0) n = 0 - n;
198	58	*--p = '0' + n % 10; // this case deals with the number "0"
199	58	n /= 10;
200	148	while (n) {
201	128	*--p = '0' + n % 10;
202	128	n /= 10;
203	128	if (n == 0) break;
204
205	112	*--p = '0' + n % 10;
206	112	n /= 10;
207	112	if (n == 0) break;
208
209	90	*--p = ',';
210	90	*--p = '0' + n % 10;
211	90	n /= 10;
212		// For this unrolling, we check if n == 0 in the main while loop
213	90	}
214	58	if (i < 0) *--p = '-';
215	58	return p;
216	58	}