From 70aa9a81b123bdc4c50e0c32054c391695dfb4ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EC=9E=AC=EC=9A=B1?= Date: Thu, 22 Jan 2026 17:00:26 +0900 Subject: [PATCH 1/8] Add options for digit separators and prefix skipping --- include/fast_float/float_common.h | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/include/fast_float/float_common.h b/include/fast_float/float_common.h index f35920ba..4013c137 100644 --- a/include/fast_float/float_common.h +++ b/include/fast_float/float_common.h @@ -70,8 +70,10 @@ using from_chars_result = from_chars_result_t; template struct parse_options_t { constexpr explicit parse_options_t(chars_format fmt = chars_format::general, - UC dot = UC('.'), int b = 10) - : format(fmt), decimal_point(dot), base(b) {} + UC dot = UC('.'), int b = 10, + UC sep = UC('\0'), uint8_t opts = 0) + : format(fmt), decimal_point(dot), base(b), digit_separator(sep), + format_options(opts) {} /** Which number formats are accepted */ chars_format format; @@ -79,6 +81,14 @@ template struct parse_options_t { UC decimal_point; /** The base used for integers */ int base; + /** The character used as digit separator. Use '\0' to + * disable */ + UC digit_separator; + /** Additional format options (bitmask) */ + uint8_t format_options; + + /** Option to skip prefixes like 0x, 0b */ + static constexpr uint8_t skip_prefix = 1; }; using parse_options = parse_options_t; @@ -199,12 +209,16 @@ using parse_options = parse_options_t; #ifndef FASTFLOAT_ASSERT #define FASTFLOAT_ASSERT(x) \ - { ((void)(x)); } + { \ + ((void)(x)); \ + } #endif #ifndef FASTFLOAT_DEBUG_ASSERT #define FASTFLOAT_DEBUG_ASSERT(x) \ - { ((void)(x)); } + { \ + ((void)(x)); \ + } #endif // rust style `try!()` macro, or `?` operator From 4abcd6059b4191862c4cb06d7f6264b19e956bb3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EC=9E=AC=EC=9A=B1?= Date: Fri, 23 Jan 2026 17:00:54 +0900 Subject: [PATCH 2/8] Implement digit separator skipping in number parsing --- include/fast_float/ascii_number.h | 60 +++++++++++++++++++++++++------ 1 file changed, 50 insertions(+), 10 deletions(-) diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h index 85435373..240f668f 100644 --- a/include/fast_float/ascii_number.h +++ b/include/fast_float/ascii_number.h @@ -353,50 +353,78 @@ parse_number_string(UC const *p, UC const *pend, UC const *const start_digits = p; uint64_t i = 0; // an unsigned int avoids signed overflows (which are bad) + int64_t digit_count = 0; - while ((p != pend) && is_integer(*p)) { + while (p != pend) { + if (options.digit_separator != UC('\0') && *p == options.digit_separator) { + ++p; + continue; + } + if (!is_integer(*p)) { + break; + } // a multiplication by 10 is cheaper than an arbitrary integer // multiplication i = 10 * i + uint64_t(*p - UC('0')); // might overflow, we will handle the overflow later ++p; + ++digit_count; } UC const *const end_of_integer_part = p; - int64_t digit_count = int64_t(end_of_integer_part - start_digits); - answer.integer = span(start_digits, size_t(digit_count)); + answer.integer = + span(start_digits, size_t(end_of_integer_part - start_digits)); FASTFLOAT_IF_CONSTEXPR17(basic_json_fmt) { // at least 1 digit in integer part, without leading zeros if (digit_count == 0) { return report_parse_error(p, parse_error::no_digits_in_integer_part); } - if ((start_digits[0] == UC('0') && digit_count > 1)) { + UC const *first_digit = start_digits; + while (first_digit != end_of_integer_part && + options.digit_separator != UC('\0') && + *first_digit == options.digit_separator) { + ++first_digit; + } + if (first_digit != end_of_integer_part && *first_digit == UC('0') && + digit_count > 1) { return report_parse_error(start_digits, parse_error::leading_zeros_in_integer_part); } } int64_t exponent = 0; + int64_t fractional_digit_count = 0; bool const has_decimal_point = (p != pend) && (*p == decimal_point); if (has_decimal_point) { ++p; UC const *before = p; // can occur at most twice without overflowing, but let it occur more, since // for integers with many digits, digit parsing is the primary bottleneck. - loop_parse_if_eight_digits(p, pend, i); + if (options.digit_separator == UC('\0')) { + loop_parse_if_eight_digits(p, pend, i); + } - while ((p != pend) && is_integer(*p)) { + while (p != pend) { + if (options.digit_separator != UC('\0') && + *p == options.digit_separator) { + ++p; + continue; + } + if (!is_integer(*p)) { + break; + } uint8_t digit = uint8_t(*p - UC('0')); ++p; i = i * 10 + digit; // in rare cases, this will overflow, but that's ok + ++fractional_digit_count; } - exponent = before - p; + exponent = -fractional_digit_count; answer.fraction = span(before, size_t(p - before)); - digit_count -= exponent; + digit_count += fractional_digit_count; } FASTFLOAT_IF_CONSTEXPR17(basic_json_fmt) { // at least 1 digit in fractional part - if (has_decimal_point && exponent == 0) { + if (has_decimal_point && fractional_digit_count == 0) { return report_parse_error(p, parse_error::no_digits_in_fractional_part); } @@ -467,7 +495,9 @@ parse_number_string(UC const *p, UC const *pend, // We need to be mindful of the case where we only have zeroes... // E.g., 0.000000000...000. UC const *start = start_digits; - while ((start != pend) && (*start == UC('0') || *start == decimal_point)) { + while ((start != pend) && (*start == UC('0') || *start == decimal_point || + (options.digit_separator != UC('\0') && + *start == options.digit_separator))) { if (*start == UC('0')) { digit_count--; } @@ -484,6 +514,11 @@ parse_number_string(UC const *p, UC const *pend, UC const *int_end = p + answer.integer.len(); uint64_t const minimal_nineteen_digit_integer{1000000000000000000}; while ((i < minimal_nineteen_digit_integer) && (p != int_end)) { + if (options.digit_separator != UC('\0') && + *p == options.digit_separator) { + ++p; + continue; + } i = i * 10 + uint64_t(*p - UC('0')); ++p; } @@ -493,6 +528,11 @@ parse_number_string(UC const *p, UC const *pend, p = answer.fraction.ptr; UC const *frac_end = p + answer.fraction.len(); while ((i < minimal_nineteen_digit_integer) && (p != frac_end)) { + if (options.digit_separator != UC('\0') && + *p == options.digit_separator) { + ++p; + continue; + } i = i * 10 + uint64_t(*p - UC('0')); ++p; } From 456e4c5eb6a8a68a6fab187d39fb65645b1f87a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EC=9E=AC=EC=9A=B1?= Date: Sun, 25 Jan 2026 17:01:19 +0900 Subject: [PATCH 3/8] Implement prefix skipping in from_chars_advanced --- include/fast_float/parse_number.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/include/fast_float/parse_number.h b/include/fast_float/parse_number.h index c01bb15a..e040d045 100644 --- a/include/fast_float/parse_number.h +++ b/include/fast_float/parse_number.h @@ -476,6 +476,14 @@ template FASTFLOAT_CONSTEXPR20 from_chars_result_t from_chars_advanced(UC const *first, UC const *last, T &value, parse_options_t options) noexcept { + if ((options.format_options & parse_options_t::skip_prefix) != 0) { + if ((last - first) >= 2 && *first == UC('0')) { + if ((first[1] == UC('x') || first[1] == UC('X')) || + (first[1] == UC('b') || first[1] == UC('B'))) { + first += 2; + } + } + } return from_chars_advanced_caller< size_t(is_supported_float_type::value) + 2 * size_t(is_supported_integer_type::value)>::call(first, last, value, From 587d890fa03e035386e8374b10bf9c5506a0bc8c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EC=9E=AC=EC=9A=B1?= Date: Sun, 8 Feb 2026 17:12:53 +0900 Subject: [PATCH 4/8] Reformat --- include/fast_float/float_common.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/include/fast_float/float_common.h b/include/fast_float/float_common.h index 4013c137..951ec5d8 100644 --- a/include/fast_float/float_common.h +++ b/include/fast_float/float_common.h @@ -209,16 +209,12 @@ using parse_options = parse_options_t; #ifndef FASTFLOAT_ASSERT #define FASTFLOAT_ASSERT(x) \ - { \ - ((void)(x)); \ - } + { ((void)(x)); } #endif #ifndef FASTFLOAT_DEBUG_ASSERT #define FASTFLOAT_DEBUG_ASSERT(x) \ - { \ - ((void)(x)); \ - } + { ((void)(x)); } #endif // rust style `try!()` macro, or `?` operator From b50eeab22a6a8b39d8981ce91714fe69f0bb56ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EC=9E=AC=EC=9A=B1?= Date: Sun, 8 Feb 2026 18:21:37 +0900 Subject: [PATCH 5/8] Handle digit separators in exponent and fraction parsing --- include/fast_float/ascii_number.h | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h index 240f668f..9f94cadf 100644 --- a/include/fast_float/ascii_number.h +++ b/include/fast_float/ascii_number.h @@ -401,7 +401,10 @@ parse_number_string(UC const *p, UC const *pend, // can occur at most twice without overflowing, but let it occur more, since // for integers with many digits, digit parsing is the primary bottleneck. if (options.digit_separator == UC('\0')) { + UC const *const before_simd = p; loop_parse_if_eight_digits(p, pend, i); + size_t const exploded = size_t(p - before_simd); + fractional_digit_count += int64_t(exploded); } while (p != pend) { @@ -523,10 +526,21 @@ parse_number_string(UC const *p, UC const *pend, ++p; } if (i >= minimal_nineteen_digit_integer) { // We have a big integer - exponent = end_of_integer_part - p + exp_number; + int64_t remaining_integer_digits = 0; + while (p != int_end) { + if (options.digit_separator != UC('\0') && + *p == options.digit_separator) { + ++p; + continue; + } + ++p; + ++remaining_integer_digits; + } + exponent = remaining_integer_digits + exp_number; } else { // We have a value with a fractional component. p = answer.fraction.ptr; UC const *frac_end = p + answer.fraction.len(); + int64_t fraction_digits_consumed = 0; while ((i < minimal_nineteen_digit_integer) && (p != frac_end)) { if (options.digit_separator != UC('\0') && *p == options.digit_separator) { @@ -535,8 +549,9 @@ parse_number_string(UC const *p, UC const *pend, } i = i * 10 + uint64_t(*p - UC('0')); ++p; + ++fraction_digits_consumed; } - exponent = answer.fraction.ptr - p + exp_number; + exponent = exp_number - fraction_digits_consumed; } // We have now corrected both exponent and i, to a truncated value } From ad90d9a01d72ff94fb0617649cdf6e6ba81ddaab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EC=9E=AC=EC=9A=B1?= Date: Sun, 8 Feb 2026 21:25:00 +0900 Subject: [PATCH 6/8] refactor: simplify prefix skipping conditions --- include/fast_float/parse_number.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/include/fast_float/parse_number.h b/include/fast_float/parse_number.h index e040d045..c54afcc9 100644 --- a/include/fast_float/parse_number.h +++ b/include/fast_float/parse_number.h @@ -476,12 +476,11 @@ template FASTFLOAT_CONSTEXPR20 from_chars_result_t from_chars_advanced(UC const *first, UC const *last, T &value, parse_options_t options) noexcept { - if ((options.format_options & parse_options_t::skip_prefix) != 0) { - if ((last - first) >= 2 && *first == UC('0')) { - if ((first[1] == UC('x') || first[1] == UC('X')) || - (first[1] == UC('b') || first[1] == UC('B'))) { - first += 2; - } + if (((options.format_options & parse_options_t::skip_prefix) != 0) && + (last - first >= 2) && (*first == UC('0'))) { + const UC c_low = UC(first[1] | UC(0x20)); + if (c_low == UC('x') || c_low == UC('b')) { + first += 2; } } return from_chars_advanced_caller< From b9ffbe8dc612911ec98d8a3b0e7d511f7456d30d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EC=9E=AC=EC=9A=B1?= Date: Sun, 8 Feb 2026 22:10:34 +0900 Subject: [PATCH 7/8] refector: pre-calculate digit separator presence and value --- include/fast_float/ascii_number.h | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h index 9f94cadf..7dcbdeeb 100644 --- a/include/fast_float/ascii_number.h +++ b/include/fast_float/ascii_number.h @@ -351,12 +351,14 @@ parse_number_string(UC const *p, UC const *pend, } } UC const *const start_digits = p; + const UC separator = options.digit_separator; + const bool has_separator = (separator != UC('\0')); uint64_t i = 0; // an unsigned int avoids signed overflows (which are bad) int64_t digit_count = 0; while (p != pend) { - if (options.digit_separator != UC('\0') && *p == options.digit_separator) { + if (has_separator && *p == separator) { ++p; continue; } @@ -380,9 +382,8 @@ parse_number_string(UC const *p, UC const *pend, return report_parse_error(p, parse_error::no_digits_in_integer_part); } UC const *first_digit = start_digits; - while (first_digit != end_of_integer_part && - options.digit_separator != UC('\0') && - *first_digit == options.digit_separator) { + while (first_digit != end_of_integer_part && has_separator && + *first_digit == separator) { ++first_digit; } if (first_digit != end_of_integer_part && *first_digit == UC('0') && @@ -400,7 +401,7 @@ parse_number_string(UC const *p, UC const *pend, UC const *before = p; // can occur at most twice without overflowing, but let it occur more, since // for integers with many digits, digit parsing is the primary bottleneck. - if (options.digit_separator == UC('\0')) { + if (!has_separator) { UC const *const before_simd = p; loop_parse_if_eight_digits(p, pend, i); size_t const exploded = size_t(p - before_simd); @@ -408,8 +409,7 @@ parse_number_string(UC const *p, UC const *pend, } while (p != pend) { - if (options.digit_separator != UC('\0') && - *p == options.digit_separator) { + if (has_separator && *p == separator) { ++p; continue; } @@ -499,8 +499,7 @@ parse_number_string(UC const *p, UC const *pend, // E.g., 0.000000000...000. UC const *start = start_digits; while ((start != pend) && (*start == UC('0') || *start == decimal_point || - (options.digit_separator != UC('\0') && - *start == options.digit_separator))) { + (has_separator && *start == separator))) { if (*start == UC('0')) { digit_count--; } @@ -517,8 +516,7 @@ parse_number_string(UC const *p, UC const *pend, UC const *int_end = p + answer.integer.len(); uint64_t const minimal_nineteen_digit_integer{1000000000000000000}; while ((i < minimal_nineteen_digit_integer) && (p != int_end)) { - if (options.digit_separator != UC('\0') && - *p == options.digit_separator) { + if (has_separator && *p == separator) { ++p; continue; } @@ -528,8 +526,7 @@ parse_number_string(UC const *p, UC const *pend, if (i >= minimal_nineteen_digit_integer) { // We have a big integer int64_t remaining_integer_digits = 0; while (p != int_end) { - if (options.digit_separator != UC('\0') && - *p == options.digit_separator) { + if (has_separator && *p == separator) { ++p; continue; } @@ -542,8 +539,7 @@ parse_number_string(UC const *p, UC const *pend, UC const *frac_end = p + answer.fraction.len(); int64_t fraction_digits_consumed = 0; while ((i < minimal_nineteen_digit_integer) && (p != frac_end)) { - if (options.digit_separator != UC('\0') && - *p == options.digit_separator) { + if (has_separator && *p == separator) { ++p; continue; } From a6c87105dcae975c9f78c1b3ad87cf022f48b06e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EC=9E=AC=EC=9A=B1?= Date: Sun, 8 Feb 2026 22:15:17 +0900 Subject: [PATCH 8/8] refactor: simplify leading zero detection --- include/fast_float/ascii_number.h | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h index 7dcbdeeb..c78fd2a2 100644 --- a/include/fast_float/ascii_number.h +++ b/include/fast_float/ascii_number.h @@ -356,6 +356,7 @@ parse_number_string(UC const *p, UC const *pend, uint64_t i = 0; // an unsigned int avoids signed overflows (which are bad) int64_t digit_count = 0; + UC const *first_digit_ptr = nullptr; while (p != pend) { if (has_separator && *p == separator) { @@ -365,6 +366,9 @@ parse_number_string(UC const *p, UC const *pend, if (!is_integer(*p)) { break; } + if (digit_count == 0) { + first_digit_ptr = p; + } // a multiplication by 10 is cheaper than an arbitrary integer // multiplication i = 10 * i + @@ -381,13 +385,7 @@ parse_number_string(UC const *p, UC const *pend, if (digit_count == 0) { return report_parse_error(p, parse_error::no_digits_in_integer_part); } - UC const *first_digit = start_digits; - while (first_digit != end_of_integer_part && has_separator && - *first_digit == separator) { - ++first_digit; - } - if (first_digit != end_of_integer_part && *first_digit == UC('0') && - digit_count > 1) { + if (digit_count > 1 && *first_digit_ptr == UC('0')) { return report_parse_error(start_digits, parse_error::leading_zeros_in_integer_part); }