diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h index 85435373..c78fd2a2 100644 --- a/include/fast_float/ascii_number.h +++ b/include/fast_float/ascii_number.h @@ -351,52 +351,81 @@ parse_number_string(UC const *p, UC const *pend, } } UC const *const start_digits = p; + const UC separator = options.digit_separator; + const bool has_separator = (separator != UC('\0')); uint64_t i = 0; // an unsigned int avoids signed overflows (which are bad) + int64_t digit_count = 0; + UC const *first_digit_ptr = nullptr; - while ((p != pend) && is_integer(*p)) { + while (p != pend) { + if (has_separator && *p == separator) { + ++p; + continue; + } + if (!is_integer(*p)) { + break; + } + if (digit_count == 0) { + first_digit_ptr = p; + } // a multiplication by 10 is cheaper than an arbitrary integer // multiplication i = 10 * i + uint64_t(*p - UC('0')); // might overflow, we will handle the overflow later ++p; + ++digit_count; } UC const *const end_of_integer_part = p; - int64_t digit_count = int64_t(end_of_integer_part - start_digits); - answer.integer = span(start_digits, size_t(digit_count)); + answer.integer = + span(start_digits, size_t(end_of_integer_part - start_digits)); FASTFLOAT_IF_CONSTEXPR17(basic_json_fmt) { // at least 1 digit in integer part, without leading zeros if (digit_count == 0) { return report_parse_error(p, parse_error::no_digits_in_integer_part); } - if ((start_digits[0] == UC('0') && digit_count > 1)) { + if (digit_count > 1 && *first_digit_ptr == UC('0')) { return report_parse_error(start_digits, parse_error::leading_zeros_in_integer_part); } } int64_t exponent = 0; + int64_t fractional_digit_count = 0; bool const has_decimal_point = (p != pend) && (*p == decimal_point); if (has_decimal_point) { ++p; UC const *before = p; // can occur at most twice without overflowing, but let it occur more, since // for integers with many digits, digit parsing is the primary bottleneck. - loop_parse_if_eight_digits(p, pend, i); + if (!has_separator) { + UC const *const before_simd = p; + loop_parse_if_eight_digits(p, pend, i); + size_t const exploded = size_t(p - before_simd); + fractional_digit_count += int64_t(exploded); + } - while ((p != pend) && is_integer(*p)) { + while (p != pend) { + if (has_separator && *p == separator) { + ++p; + continue; + } + if (!is_integer(*p)) { + break; + } uint8_t digit = uint8_t(*p - UC('0')); ++p; i = i * 10 + digit; // in rare cases, this will overflow, but that's ok + ++fractional_digit_count; } - exponent = before - p; + exponent = -fractional_digit_count; answer.fraction = span(before, size_t(p - before)); - digit_count -= exponent; + digit_count += fractional_digit_count; } FASTFLOAT_IF_CONSTEXPR17(basic_json_fmt) { // at least 1 digit in fractional part - if (has_decimal_point && exponent == 0) { + if (has_decimal_point && fractional_digit_count == 0) { return report_parse_error(p, parse_error::no_digits_in_fractional_part); } @@ -467,7 +496,8 @@ parse_number_string(UC const *p, UC const *pend, // We need to be mindful of the case where we only have zeroes... // E.g., 0.000000000...000. UC const *start = start_digits; - while ((start != pend) && (*start == UC('0') || *start == decimal_point)) { + while ((start != pend) && (*start == UC('0') || *start == decimal_point || + (has_separator && *start == separator))) { if (*start == UC('0')) { digit_count--; } @@ -484,19 +514,38 @@ parse_number_string(UC const *p, UC const *pend, UC const *int_end = p + answer.integer.len(); uint64_t const minimal_nineteen_digit_integer{1000000000000000000}; while ((i < minimal_nineteen_digit_integer) && (p != int_end)) { + if (has_separator && *p == separator) { + ++p; + continue; + } i = i * 10 + uint64_t(*p - UC('0')); ++p; } if (i >= minimal_nineteen_digit_integer) { // We have a big integer - exponent = end_of_integer_part - p + exp_number; + int64_t remaining_integer_digits = 0; + while (p != int_end) { + if (has_separator && *p == separator) { + ++p; + continue; + } + ++p; + ++remaining_integer_digits; + } + exponent = remaining_integer_digits + exp_number; } else { // We have a value with a fractional component. p = answer.fraction.ptr; UC const *frac_end = p + answer.fraction.len(); + int64_t fraction_digits_consumed = 0; while ((i < minimal_nineteen_digit_integer) && (p != frac_end)) { + if (has_separator && *p == separator) { + ++p; + continue; + } i = i * 10 + uint64_t(*p - UC('0')); ++p; + ++fraction_digits_consumed; } - exponent = answer.fraction.ptr - p + exp_number; + exponent = exp_number - fraction_digits_consumed; } // We have now corrected both exponent and i, to a truncated value } diff --git a/include/fast_float/float_common.h b/include/fast_float/float_common.h index f35920ba..951ec5d8 100644 --- a/include/fast_float/float_common.h +++ b/include/fast_float/float_common.h @@ -70,8 +70,10 @@ using from_chars_result = from_chars_result_t; template struct parse_options_t { constexpr explicit parse_options_t(chars_format fmt = chars_format::general, - UC dot = UC('.'), int b = 10) - : format(fmt), decimal_point(dot), base(b) {} + UC dot = UC('.'), int b = 10, + UC sep = UC('\0'), uint8_t opts = 0) + : format(fmt), decimal_point(dot), base(b), digit_separator(sep), + format_options(opts) {} /** Which number formats are accepted */ chars_format format; @@ -79,6 +81,14 @@ template struct parse_options_t { UC decimal_point; /** The base used for integers */ int base; + /** The character used as digit separator. Use '\0' to + * disable */ + UC digit_separator; + /** Additional format options (bitmask) */ + uint8_t format_options; + + /** Option to skip prefixes like 0x, 0b */ + static constexpr uint8_t skip_prefix = 1; }; using parse_options = parse_options_t; diff --git a/include/fast_float/parse_number.h b/include/fast_float/parse_number.h index c01bb15a..c54afcc9 100644 --- a/include/fast_float/parse_number.h +++ b/include/fast_float/parse_number.h @@ -476,6 +476,13 @@ template FASTFLOAT_CONSTEXPR20 from_chars_result_t from_chars_advanced(UC const *first, UC const *last, T &value, parse_options_t options) noexcept { + if (((options.format_options & parse_options_t::skip_prefix) != 0) && + (last - first >= 2) && (*first == UC('0'))) { + const UC c_low = UC(first[1] | UC(0x20)); + if (c_low == UC('x') || c_low == UC('b')) { + first += 2; + } + } return from_chars_advanced_caller< size_t(is_supported_float_type::value) + 2 * size_t(is_supported_integer_type::value)>::call(first, last, value,