Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 61 additions & 12 deletions include/fast_float/ascii_number.h
Original file line number Diff line number Diff line change
Expand Up @@ -351,52 +351,81 @@ parse_number_string(UC const *p, UC const *pend,
}
}
UC const *const start_digits = p;
const UC separator = options.digit_separator;
const bool has_separator = (separator != UC('\0'));

uint64_t i = 0; // an unsigned int avoids signed overflows (which are bad)
int64_t digit_count = 0;
UC const *first_digit_ptr = nullptr;

while ((p != pend) && is_integer(*p)) {
while (p != pend) {
if (has_separator && *p == separator) {
++p;
continue;
}
if (!is_integer(*p)) {
break;
}
if (digit_count == 0) {
first_digit_ptr = p;
}
// a multiplication by 10 is cheaper than an arbitrary integer
// multiplication
i = 10 * i +
uint64_t(*p -
UC('0')); // might overflow, we will handle the overflow later
++p;
++digit_count;
}
UC const *const end_of_integer_part = p;
int64_t digit_count = int64_t(end_of_integer_part - start_digits);
answer.integer = span<UC const>(start_digits, size_t(digit_count));
answer.integer =
span<UC const>(start_digits, size_t(end_of_integer_part - start_digits));
FASTFLOAT_IF_CONSTEXPR17(basic_json_fmt) {
// at least 1 digit in integer part, without leading zeros
if (digit_count == 0) {
return report_parse_error<UC>(p, parse_error::no_digits_in_integer_part);
}
if ((start_digits[0] == UC('0') && digit_count > 1)) {
if (digit_count > 1 && *first_digit_ptr == UC('0')) {
return report_parse_error<UC>(start_digits,
parse_error::leading_zeros_in_integer_part);
}
}

int64_t exponent = 0;
int64_t fractional_digit_count = 0;
bool const has_decimal_point = (p != pend) && (*p == decimal_point);
if (has_decimal_point) {
++p;
UC const *before = p;
// can occur at most twice without overflowing, but let it occur more, since
// for integers with many digits, digit parsing is the primary bottleneck.
loop_parse_if_eight_digits(p, pend, i);
if (!has_separator) {
UC const *const before_simd = p;
loop_parse_if_eight_digits(p, pend, i);
size_t const exploded = size_t(p - before_simd);
fractional_digit_count += int64_t(exploded);
}

while ((p != pend) && is_integer(*p)) {
while (p != pend) {
if (has_separator && *p == separator) {
++p;
continue;
}
if (!is_integer(*p)) {
break;
}
uint8_t digit = uint8_t(*p - UC('0'));
++p;
i = i * 10 + digit; // in rare cases, this will overflow, but that's ok
++fractional_digit_count;
}
exponent = before - p;
exponent = -fractional_digit_count;
answer.fraction = span<UC const>(before, size_t(p - before));
digit_count -= exponent;
digit_count += fractional_digit_count;
}
FASTFLOAT_IF_CONSTEXPR17(basic_json_fmt) {
// at least 1 digit in fractional part
if (has_decimal_point && exponent == 0) {
if (has_decimal_point && fractional_digit_count == 0) {
return report_parse_error<UC>(p,
parse_error::no_digits_in_fractional_part);
}
Expand Down Expand Up @@ -467,7 +496,8 @@ parse_number_string(UC const *p, UC const *pend,
// We need to be mindful of the case where we only have zeroes...
// E.g., 0.000000000...000.
UC const *start = start_digits;
while ((start != pend) && (*start == UC('0') || *start == decimal_point)) {
while ((start != pend) && (*start == UC('0') || *start == decimal_point ||
(has_separator && *start == separator))) {
if (*start == UC('0')) {
digit_count--;
}
Expand All @@ -484,19 +514,38 @@ parse_number_string(UC const *p, UC const *pend,
UC const *int_end = p + answer.integer.len();
uint64_t const minimal_nineteen_digit_integer{1000000000000000000};
while ((i < minimal_nineteen_digit_integer) && (p != int_end)) {
if (has_separator && *p == separator) {
++p;
continue;
}
i = i * 10 + uint64_t(*p - UC('0'));
++p;
}
if (i >= minimal_nineteen_digit_integer) { // We have a big integer
exponent = end_of_integer_part - p + exp_number;
int64_t remaining_integer_digits = 0;
while (p != int_end) {
if (has_separator && *p == separator) {
++p;
continue;
}
++p;
++remaining_integer_digits;
}
exponent = remaining_integer_digits + exp_number;
} else { // We have a value with a fractional component.
p = answer.fraction.ptr;
UC const *frac_end = p + answer.fraction.len();
int64_t fraction_digits_consumed = 0;
while ((i < minimal_nineteen_digit_integer) && (p != frac_end)) {
if (has_separator && *p == separator) {
++p;
continue;
}
i = i * 10 + uint64_t(*p - UC('0'));
++p;
++fraction_digits_consumed;
}
exponent = answer.fraction.ptr - p + exp_number;
exponent = exp_number - fraction_digits_consumed;
}
// We have now corrected both exponent and i, to a truncated value
}
Expand Down
14 changes: 12 additions & 2 deletions include/fast_float/float_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,15 +70,25 @@ using from_chars_result = from_chars_result_t<char>;

template <typename UC> struct parse_options_t {
constexpr explicit parse_options_t(chars_format fmt = chars_format::general,
UC dot = UC('.'), int b = 10)
: format(fmt), decimal_point(dot), base(b) {}
UC dot = UC('.'), int b = 10,
UC sep = UC('\0'), uint8_t opts = 0)
: format(fmt), decimal_point(dot), base(b), digit_separator(sep),
format_options(opts) {}

/** Which number formats are accepted */
chars_format format;
/** The character used as decimal point */
UC decimal_point;
/** The base used for integers */
int base;
/** The character used as digit separator. Use '\0' to
* disable */
UC digit_separator;
/** Additional format options (bitmask) */
uint8_t format_options;

/** Option to skip prefixes like 0x, 0b */
static constexpr uint8_t skip_prefix = 1;
};

using parse_options = parse_options_t<char>;
Expand Down
7 changes: 7 additions & 0 deletions include/fast_float/parse_number.h
Original file line number Diff line number Diff line change
Expand Up @@ -476,6 +476,13 @@ template <typename T, typename UC>
FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
from_chars_advanced(UC const *first, UC const *last, T &value,
parse_options_t<UC> options) noexcept {
if (((options.format_options & parse_options_t<UC>::skip_prefix) != 0) &&
(last - first >= 2) && (*first == UC('0'))) {
const UC c_low = UC(first[1] | UC(0x20));
if (c_low == UC('x') || c_low == UC('b')) {
first += 2;
}
}
return from_chars_advanced_caller<
size_t(is_supported_float_type<T>::value) +
2 * size_t(is_supported_integer_type<T>::value)>::call(first, last, value,
Expand Down