diff --git a/include/prism/defines.h b/include/prism/defines.h index c48a600b21..d666582b17 100644 --- a/include/prism/defines.h +++ b/include/prism/defines.h @@ -91,6 +91,18 @@ # define inline __inline #endif +/** + * Force a function to be inlined at every call site. Use sparingly — only for + * small, hot functions where the compiler's heuristics fail to inline. + */ +#if defined(_MSC_VER) +# define PRISM_FORCE_INLINE __forceinline +#elif defined(__GNUC__) || defined(__clang__) +# define PRISM_FORCE_INLINE inline __attribute__((always_inline)) +#else +# define PRISM_FORCE_INLINE inline +#endif + /** * Old Visual Studio versions before 2015 do not implement sprintf, but instead * implement _snprintf. We standard that here. @@ -264,6 +276,49 @@ #define PRISM_UNLIKELY(x) (x) #endif +/** + * Platform detection for SIMD / fast-path implementations. At most one of + * these macros is defined, selecting the best available vectorization strategy. + */ +#if (defined(__aarch64__) && defined(__ARM_NEON)) || (defined(_MSC_VER) && defined(_M_ARM64)) + #define PRISM_HAS_NEON +#elif (defined(__x86_64__) && defined(__SSSE3__)) || (defined(_MSC_VER) && defined(_M_X64)) + #define PRISM_HAS_SSSE3 +#elif defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + #define PRISM_HAS_SWAR +#endif + +/** + * Count trailing zero bits in a 64-bit value. Used by SWAR identifier scanning + * to find the first non-matching byte in a word. + * + * Precondition: v must be nonzero. The result is undefined when v == 0 + * (matching the behavior of __builtin_ctzll and _BitScanForward64). + */ +#if defined(__GNUC__) || defined(__clang__) + #define pm_ctzll(v) ((unsigned) __builtin_ctzll(v)) +#elif defined(_MSC_VER) + #include + static inline unsigned pm_ctzll(uint64_t v) { + unsigned long index; + _BitScanForward64(&index, v); + return (unsigned) index; + } +#else + static inline unsigned + pm_ctzll(uint64_t v) { + unsigned c = 0; + v &= (uint64_t) (-(int64_t) v); + if (v & 0x00000000FFFFFFFFULL) c += 0; else c += 32; + if (v & 0x0000FFFF0000FFFFULL) c += 0; else c += 16; + if (v & 0x00FF00FF00FF00FFULL) c += 0; else c += 8; + if (v & 0x0F0F0F0F0F0F0F0FULL) c += 0; else c += 4; + if (v & 0x3333333333333333ULL) c += 0; else c += 2; + if (v & 0x5555555555555555ULL) c += 0; else c += 1; + return c; + } +#endif + /** * We use -Wimplicit-fallthrough to guard potentially unintended fall-through between cases of a switch. * Use PRISM_FALLTHROUGH to explicitly annotate cases where the fallthrough is intentional. diff --git a/include/prism/node.h b/include/prism/node.h index 253f890055..f02f8ba892 100644 --- a/include/prism/node.h +++ b/include/prism/node.h @@ -17,6 +17,16 @@ #define PM_NODE_LIST_FOREACH(list, index, node) \ for (size_t index = 0; index < (list)->size && ((node) = (list)->nodes[index]); index++) +/** + * Slow path for pm_node_list_append: grow the list and append the node. + * Do not call directly — use pm_node_list_append instead. + * + * @param arena The arena to allocate from. + * @param list The list to append to. + * @param node The node to append. + */ +void pm_node_list_append_slow(pm_arena_t *arena, pm_node_list_t *list, pm_node_t *node); + /** * Append a new node onto the end of the node list. * @@ -24,7 +34,14 @@ * @param list The list to append to. * @param node The node to append. */ -void pm_node_list_append(pm_arena_t *arena, pm_node_list_t *list, pm_node_t *node); +static PRISM_FORCE_INLINE void +pm_node_list_append(pm_arena_t *arena, pm_node_list_t *list, pm_node_t *node) { + if (list->size < list->capacity) { + list->nodes[list->size++] = node; + } else { + pm_node_list_append_slow(arena, list, node); + } +} /** * Prepend a new node onto the beginning of the node list. diff --git a/include/prism/parser.h b/include/prism/parser.h index d8e7a550e7..66df791244 100644 --- a/include/prism/parser.h +++ b/include/prism/parser.h @@ -100,6 +100,13 @@ typedef struct { pm_heredoc_indent_t indent; } pm_heredoc_lex_mode_t; +/** + * The size of the breakpoints and strpbrk cache charset buffers. All + * breakpoint arrays and the strpbrk cache charset must share this size so + * that memcmp can safely compare the full buffer without overreading. + */ +#define PM_STRPBRK_CACHE_SIZE 16 + /** * When lexing Ruby source, the lexer has a small amount of state to tell which * kind of token it is currently lexing. For example, when we find the start of @@ -169,7 +176,7 @@ typedef struct pm_lex_mode { * This is the character set that should be used to delimit the * tokens within the list. */ - uint8_t breakpoints[11]; + uint8_t breakpoints[PM_STRPBRK_CACHE_SIZE]; } list; struct { @@ -191,7 +198,7 @@ typedef struct pm_lex_mode { * This is the character set that should be used to delimit the * tokens within the regular expression. */ - uint8_t breakpoints[7]; + uint8_t breakpoints[PM_STRPBRK_CACHE_SIZE]; } regexp; struct { @@ -224,7 +231,7 @@ typedef struct pm_lex_mode { * This is the character set that should be used to delimit the * tokens within the string. */ - uint8_t breakpoints[7]; + uint8_t breakpoints[PM_STRPBRK_CACHE_SIZE]; } string; struct { @@ -556,6 +563,13 @@ typedef struct pm_locals { /** The capacity of the local variables set. */ uint32_t capacity; + /** + * A bloom filter over constant IDs stored in this set. Used to quickly + * reject lookups for names that are definitely not present, avoiding the + * cost of a linear scan or hash probe. + */ + uint32_t bloom; + /** The nullable allocated memory for the local variables in the set. */ pm_local_t *locals; } pm_locals_t; @@ -639,6 +653,9 @@ struct pm_parser { /** The arena used for all AST-lifetime allocations. Caller-owned. */ pm_arena_t *arena; + /** The arena used for parser metadata (comments, diagnostics, etc.). */ + pm_arena_t metadata_arena; + /** * The next node identifier that will be assigned. This is a unique * identifier used to track nodes such that the syntax tree can be dropped @@ -790,12 +807,26 @@ struct pm_parser { pm_line_offset_list_t line_offsets; /** - * We want to add a flag to integer nodes that indicates their base. We only - * want to parse these once, but we don't have space on the token itself to - * communicate this information. So we store it here and pass it through - * when we find tokens that we need it for. + * State communicated from the lexer to the parser for integer tokens. */ - pm_node_flags_t integer_base; + struct { + /** + * A flag indicating the base of the integer (binary, octal, decimal, + * hexadecimal). Set during lexing and read during node creation. + */ + pm_node_flags_t base; + + /** + * When lexing a decimal integer that fits in a uint32_t, we compute + * the value during lexing to avoid re-scanning the digits during + * parsing. If lexed is true, this holds the result and + * pm_integer_parse can be skipped. + */ + uint32_t value; + + /** Whether value holds a valid pre-computed integer. */ + bool lexed; + } integer; /** * This string is used to pass information from the lexer to the parser. It @@ -938,6 +969,27 @@ struct pm_parser { * toggled with a magic comment. */ bool warn_mismatched_indentation; + +#if defined(PRISM_HAS_NEON) || defined(PRISM_HAS_SSSE3) || defined(PRISM_HAS_SWAR) + /** + * Cached lookup tables for pm_strpbrk's SIMD fast path. Avoids rebuilding + * the nibble-based tables on every call when the charset hasn't changed + * (which is the common case during string/regex/list lexing). + */ + struct { + /** The cached charset (null-terminated, NUL-padded). */ + uint8_t charset[PM_STRPBRK_CACHE_SIZE]; + + /** Nibble-based low lookup table for SIMD matching. */ + uint8_t low_lut[16]; + + /** Nibble-based high lookup table for SIMD matching. */ + uint8_t high_lut[16]; + + /** Scalar fallback table (4 x 64-bit bitmasks covering all ASCII). */ + uint64_t table[4]; + } strpbrk_cache; +#endif }; #endif diff --git a/include/prism/util/pm_arena.h b/include/prism/util/pm_arena.h index f376d13459..175b39c6df 100644 --- a/include/prism/util/pm_arena.h +++ b/include/prism/util/pm_arena.h @@ -44,16 +44,52 @@ typedef struct { size_t block_count; } pm_arena_t; +/** + * Ensure the arena has at least `capacity` bytes available in its current + * block, allocating a new block if necessary. This allows callers to + * pre-size the arena to avoid repeated small block allocations. + * + * @param arena The arena to pre-size. + * @param capacity The minimum number of bytes to ensure are available. + */ +void pm_arena_reserve(pm_arena_t *arena, size_t capacity); + +/** + * Slow path for pm_arena_alloc: allocate a new block and return a pointer to + * the first `size` bytes. Do not call directly — use pm_arena_alloc instead. + * + * @param arena The arena to allocate from. + * @param size The number of bytes to allocate. + * @returns A pointer to the allocated memory. + */ +void * pm_arena_alloc_slow(pm_arena_t *arena, size_t size); + /** * Allocate memory from the arena. The returned memory is NOT zeroed. This * function is infallible — it aborts on allocation failure. * + * The fast path (bump pointer within the current block) is inlined at each + * call site. The slow path (new block allocation) is out-of-line. + * * @param arena The arena to allocate from. * @param size The number of bytes to allocate. * @param alignment The required alignment (must be a power of 2). * @returns A pointer to the allocated memory. */ -void * pm_arena_alloc(pm_arena_t *arena, size_t size, size_t alignment); +static PRISM_FORCE_INLINE void * +pm_arena_alloc(pm_arena_t *arena, size_t size, size_t alignment) { + if (arena->current != NULL) { + size_t used_aligned = (arena->current->used + alignment - 1) & ~(alignment - 1); + size_t needed = used_aligned + size; + + if (used_aligned >= arena->current->used && needed >= used_aligned && needed <= arena->current->capacity) { + arena->current->used = needed; + return arena->current->data + used_aligned; + } + } + + return pm_arena_alloc_slow(arena, size); +} /** * Allocate zero-initialized memory from the arena. This function is infallible @@ -64,7 +100,12 @@ void * pm_arena_alloc(pm_arena_t *arena, size_t size, size_t alignment); * @param alignment The required alignment (must be a power of 2). * @returns A pointer to the allocated, zero-initialized memory. */ -void * pm_arena_zalloc(pm_arena_t *arena, size_t size, size_t alignment); +static inline void * +pm_arena_zalloc(pm_arena_t *arena, size_t size, size_t alignment) { + void *ptr = pm_arena_alloc(arena, size, alignment); + memset(ptr, 0, size); + return ptr; +} /** * Allocate memory from the arena and copy the given data into it. This is a @@ -76,7 +117,12 @@ void * pm_arena_zalloc(pm_arena_t *arena, size_t size, size_t alignment); * @param alignment The required alignment (must be a power of 2). * @returns A pointer to the allocated copy. */ -void * pm_arena_memdup(pm_arena_t *arena, const void *src, size_t size, size_t alignment); +static inline void * +pm_arena_memdup(pm_arena_t *arena, const void *src, size_t size, size_t alignment) { + void *dst = pm_arena_alloc(arena, size, alignment); + memcpy(dst, src, size); + return dst; +} /** * Free all blocks in the arena. After this call, all pointers returned by diff --git a/include/prism/util/pm_char.h b/include/prism/util/pm_char.h index ab1f513a66..516390b21c 100644 --- a/include/prism/util/pm_char.h +++ b/include/prism/util/pm_char.h @@ -12,6 +12,58 @@ #include #include +/** Bit flag for whitespace characters in pm_byte_table. */ +#define PRISM_CHAR_BIT_WHITESPACE (1 << 0) + +/** Bit flag for inline whitespace characters in pm_byte_table. */ +#define PRISM_CHAR_BIT_INLINE_WHITESPACE (1 << 1) + +/** + * A lookup table for classifying bytes. Each entry is a bitfield of + * PRISM_CHAR_BIT_* flags. Defined in pm_char.c. + */ +extern const uint8_t pm_byte_table[256]; + +/** + * Returns true if the given character is a whitespace character. + * + * @param b The character to check. + * @return True if the given character is a whitespace character. + */ +static PRISM_FORCE_INLINE bool +pm_char_is_whitespace(const uint8_t b) { + return (pm_byte_table[b] & PRISM_CHAR_BIT_WHITESPACE) != 0; +} + +/** + * Returns true if the given character is an inline whitespace character. + * + * @param b The character to check. + * @return True if the given character is an inline whitespace character. + */ +static PRISM_FORCE_INLINE bool +pm_char_is_inline_whitespace(const uint8_t b) { + return (pm_byte_table[b] & PRISM_CHAR_BIT_INLINE_WHITESPACE) != 0; +} + +/** + * Returns the number of characters at the start of the string that are inline + * whitespace (space/tab). Scans the byte table directly for use in hot paths. + * + * @param string The string to search. + * @param length The maximum number of characters to search. + * @return The number of characters at the start of the string that are inline + * whitespace. + */ +static PRISM_FORCE_INLINE size_t +pm_strspn_inline_whitespace(const uint8_t *string, ptrdiff_t length) { + if (length <= 0) return 0; + size_t size = 0; + size_t maximum = (size_t) length; + while (size < maximum && (pm_byte_table[string[size]] & PRISM_CHAR_BIT_INLINE_WHITESPACE)) size++; + return size; +} + /** * Returns the number of characters at the start of the string that are * whitespace. Disallows searching past the given maximum number of characters. @@ -30,24 +82,14 @@ size_t pm_strspn_whitespace(const uint8_t *string, ptrdiff_t length); * * @param string The string to search. * @param length The maximum number of characters to search. + * @param arena The arena to allocate from when appending to line_offsets. * @param line_offsets The list of newlines to populate. * @param start_offset The offset at which the string occurs in the source, for * the purpose of tracking newlines. * @return The number of characters at the start of the string that are * whitespace. */ -size_t pm_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, pm_line_offset_list_t *line_offsets, uint32_t start_offset); - -/** - * Returns the number of characters at the start of the string that are inline - * whitespace. Disallows searching past the given maximum number of characters. - * - * @param string The string to search. - * @param length The maximum number of characters to search. - * @return The number of characters at the start of the string that are inline - * whitespace. - */ -size_t pm_strspn_inline_whitespace(const uint8_t *string, ptrdiff_t length); +size_t pm_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, pm_arena_t *arena, pm_line_offset_list_t *line_offsets, uint32_t start_offset); /** * Returns the number of characters at the start of the string that are decimal @@ -155,21 +197,6 @@ size_t pm_strspn_regexp_option(const uint8_t *string, ptrdiff_t length); */ size_t pm_strspn_binary_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid); -/** - * Returns true if the given character is a whitespace character. - * - * @param b The character to check. - * @return True if the given character is a whitespace character. - */ -bool pm_char_is_whitespace(const uint8_t b); - -/** - * Returns true if the given character is an inline whitespace character. - * - * @param b The character to check. - * @return True if the given character is an inline whitespace character. - */ -bool pm_char_is_inline_whitespace(const uint8_t b); /** * Returns true if the given character is a binary digit. diff --git a/include/prism/util/pm_constant_pool.h b/include/prism/util/pm_constant_pool.h index 1d4922a661..c527343273 100644 --- a/include/prism/util/pm_constant_pool.h +++ b/include/prism/util/pm_constant_pool.h @@ -113,6 +113,15 @@ typedef struct { /** The hash of the bucket. */ uint32_t hash; + + /** + * A pointer to the start of the string, stored directly in the bucket to + * avoid a pointer chase to the constants array during probing. + */ + const uint8_t *start; + + /** The length of the string. */ + size_t length; } pm_constant_pool_bucket_t; /** A constant in the pool which effectively stores a string. */ @@ -142,11 +151,11 @@ typedef struct { /** * Initialize a new constant pool with a given capacity. * + * @param arena The arena to allocate from. * @param pool The pool to initialize. * @param capacity The initial capacity of the pool. - * @return Whether the initialization succeeded. */ -bool pm_constant_pool_init(pm_constant_pool_t *pool, uint32_t capacity); +void pm_constant_pool_init(pm_arena_t *arena, pm_constant_pool_t *pool, uint32_t capacity); /** * Return a pointer to the constant indicated by the given constant id. @@ -172,41 +181,37 @@ pm_constant_id_t pm_constant_pool_find(const pm_constant_pool_t *pool, const uin * Insert a constant into a constant pool that is a slice of a source string. * Returns the id of the constant, or 0 if any potential calls to resize fail. * + * @param arena The arena to allocate from. * @param pool The pool to insert the constant into. * @param start A pointer to the start of the constant. * @param length The length of the constant. * @return The id of the constant. */ -pm_constant_id_t pm_constant_pool_insert_shared(pm_constant_pool_t *pool, const uint8_t *start, size_t length); +pm_constant_id_t pm_constant_pool_insert_shared(pm_arena_t *arena, pm_constant_pool_t *pool, const uint8_t *start, size_t length); /** * Insert a constant into a constant pool from memory that is now owned by the * constant pool. Returns the id of the constant, or 0 if any potential calls to * resize fail. * + * @param arena The arena to allocate from. * @param pool The pool to insert the constant into. * @param start A pointer to the start of the constant. * @param length The length of the constant. * @return The id of the constant. */ -pm_constant_id_t pm_constant_pool_insert_owned(pm_constant_pool_t *pool, uint8_t *start, size_t length); +pm_constant_id_t pm_constant_pool_insert_owned(pm_arena_t *arena, pm_constant_pool_t *pool, uint8_t *start, size_t length); /** * Insert a constant into a constant pool from memory that is constant. Returns * the id of the constant, or 0 if any potential calls to resize fail. * + * @param arena The arena to allocate from. * @param pool The pool to insert the constant into. * @param start A pointer to the start of the constant. * @param length The length of the constant. * @return The id of the constant. */ -pm_constant_id_t pm_constant_pool_insert_constant(pm_constant_pool_t *pool, const uint8_t *start, size_t length); - -/** - * Free the memory associated with a constant pool. - * - * @param pool The pool to free. - */ -void pm_constant_pool_free(pm_constant_pool_t *pool); +pm_constant_id_t pm_constant_pool_insert_constant(pm_arena_t *arena, pm_constant_pool_t *pool, const uint8_t *start, size_t length); #endif diff --git a/include/prism/util/pm_line_offset_list.h b/include/prism/util/pm_line_offset_list.h index 968eeae52d..62a52da4ec 100644 --- a/include/prism/util/pm_line_offset_list.h +++ b/include/prism/util/pm_line_offset_list.h @@ -15,6 +15,7 @@ #define PRISM_LINE_OFFSET_LIST_H #include "prism/defines.h" +#include "prism/util/pm_arena.h" #include #include @@ -48,14 +49,13 @@ typedef struct { } pm_line_column_t; /** - * Initialize a new line offset list with the given capacity. Returns true if - * the allocation of the offsets succeeds, otherwise returns false. + * Initialize a new line offset list with the given capacity. * + * @param arena The arena to allocate from. * @param list The list to initialize. * @param capacity The initial capacity of the list. - * @return True if the allocation of the offsets succeeds, otherwise false. */ -bool pm_line_offset_list_init(pm_line_offset_list_t *list, size_t capacity); +void pm_line_offset_list_init(pm_arena_t *arena, pm_line_offset_list_t *list, size_t capacity); /** * Clear out the offsets that have been appended to the list. @@ -65,15 +65,29 @@ bool pm_line_offset_list_init(pm_line_offset_list_t *list, size_t capacity); void pm_line_offset_list_clear(pm_line_offset_list_t *list); /** - * Append a new offset to the list. Returns true if the reallocation of the - * offsets succeeds (if one was necessary), otherwise returns false. + * Append a new offset to the list (slow path with resize). * + * @param arena The arena to allocate from. * @param list The list to append to. * @param cursor The offset to append. - * @return True if the reallocation of the offsets succeeds (if one was - * necessary), otherwise false. */ -bool pm_line_offset_list_append(pm_line_offset_list_t *list, uint32_t cursor); +void pm_line_offset_list_append_slow(pm_arena_t *arena, pm_line_offset_list_t *list, uint32_t cursor); + +/** + * Append a new offset to the list. + * + * @param arena The arena to allocate from. + * @param list The list to append to. + * @param cursor The offset to append. + */ +static PRISM_FORCE_INLINE void +pm_line_offset_list_append(pm_arena_t *arena, pm_line_offset_list_t *list, uint32_t cursor) { + if (list->size < list->capacity) { + list->offsets[list->size++] = cursor; + } else { + pm_line_offset_list_append_slow(arena, list, cursor); + } +} /** * Returns the line of the given offset. If the offset is not in the list, the @@ -98,11 +112,4 @@ int32_t pm_line_offset_list_line(const pm_line_offset_list_t *list, uint32_t cur */ PRISM_EXPORTED_FUNCTION pm_line_column_t pm_line_offset_list_line_column(const pm_line_offset_list_t *list, uint32_t cursor, int32_t start_line); -/** - * Free the internal memory allocated for the list. - * - * @param list The list to free. - */ -void pm_line_offset_list_free(pm_line_offset_list_t *list); - #endif diff --git a/src/prism.c b/src/prism.c index 9d58bdb43d..dc7cbef2d4 100644 --- a/src/prism.c +++ b/src/prism.c @@ -149,7 +149,8 @@ lex_mode_push_list(pm_parser_t *parser, bool interpolation, uint8_t delimiter) { // These are the places where we need to split up the content of the list. // We'll use strpbrk to find the first of these characters. uint8_t *breakpoints = lex_mode.as.list.breakpoints; - memcpy(breakpoints, "\\ \t\f\r\v\n\0\0\0", sizeof(lex_mode.as.list.breakpoints)); + memset(breakpoints, 0, PM_STRPBRK_CACHE_SIZE); + memcpy(breakpoints, "\\ \t\f\r\v\n", sizeof("\\ \t\f\r\v\n") - 1); size_t index = 7; // Now we'll add the terminator to the list of breakpoints. If the @@ -201,7 +202,8 @@ lex_mode_push_regexp(pm_parser_t *parser, uint8_t incrementor, uint8_t terminato // regular expression. We'll use strpbrk to find the first of these // characters. uint8_t *breakpoints = lex_mode.as.regexp.breakpoints; - memcpy(breakpoints, "\r\n\\#\0\0", sizeof(lex_mode.as.regexp.breakpoints)); + memset(breakpoints, 0, PM_STRPBRK_CACHE_SIZE); + memcpy(breakpoints, "\r\n\\#", sizeof("\r\n\\#") - 1); size_t index = 4; // First we'll add the terminator. @@ -237,7 +239,8 @@ lex_mode_push_string(pm_parser_t *parser, bool interpolation, bool label_allowed // These are the places where we need to split up the content of the // string. We'll use strpbrk to find the first of these characters. uint8_t *breakpoints = lex_mode.as.string.breakpoints; - memcpy(breakpoints, "\r\n\\\0\0\0", sizeof(lex_mode.as.string.breakpoints)); + memset(breakpoints, 0, PM_STRPBRK_CACHE_SIZE); + memcpy(breakpoints, "\r\n\\", sizeof("\r\n\\") - 1); size_t index = 3; // Now add in the terminator. If the terminator is not already a NULL byte, @@ -451,7 +454,7 @@ debug_lex_state_set(pm_parser_t *parser, pm_lex_state_t state, char const * call */ static inline void pm_parser_err(pm_parser_t *parser, uint32_t start, uint32_t length, pm_diagnostic_id_t diag_id) { - pm_diagnostic_list_append(&parser->error_list, start, length, diag_id); + pm_diagnostic_list_append(&parser->metadata_arena, &parser->error_list, start, length, diag_id); } /** @@ -494,7 +497,7 @@ pm_parser_err_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_ * Append an error to the list of errors on the parser using a format string. */ #define PM_PARSER_ERR_FORMAT(parser_, start_, length_, diag_id_, ...) \ - pm_diagnostic_list_append_format(&(parser_)->error_list, start_, length_, diag_id_, __VA_ARGS__) + pm_diagnostic_list_append_format(&(parser_)->metadata_arena, &(parser_)->error_list, start_, length_, diag_id_, __VA_ARGS__) /** * Append an error to the list of errors on the parser using the location of the @@ -529,7 +532,7 @@ pm_parser_err_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_ */ static inline void pm_parser_warn(pm_parser_t *parser, uint32_t start, uint32_t length, pm_diagnostic_id_t diag_id) { - pm_diagnostic_list_append(&parser->warning_list, start, length, diag_id); + pm_diagnostic_list_append(&parser->metadata_arena, &parser->warning_list, start, length, diag_id); } /** @@ -555,7 +558,7 @@ pm_parser_warn_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id * and the given location. */ #define PM_PARSER_WARN_FORMAT(parser_, start_, length_, diag_id_, ...) \ - pm_diagnostic_list_append_format(&(parser_)->warning_list, start_, length_, diag_id_, __VA_ARGS__) + pm_diagnostic_list_append_format(&(parser_)->metadata_arena, &(parser_)->warning_list, start_, length_, diag_id_, __VA_ARGS__) /** * Append a warning to the list of warnings on the parser using the location of @@ -773,7 +776,7 @@ pm_parser_scope_shareable_constant_set(pm_parser_t *parser, pm_shareable_constan /** * The point at which the set of locals switches from being a list to a hash. */ -#define PM_LOCALS_HASH_THRESHOLD 9 +#define PM_LOCALS_HASH_THRESHOLD 5 static void pm_locals_free(pm_locals_t *locals) { @@ -855,6 +858,8 @@ pm_locals_write(pm_locals_t *locals, pm_constant_id_t name, uint32_t start, uint pm_locals_resize(locals); } + locals->bloom |= (1u << (name & 31)); + if (locals->capacity < PM_LOCALS_HASH_THRESHOLD) { for (uint32_t index = 0; index < locals->capacity; index++) { pm_local_t *local = &locals->locals[index]; @@ -907,6 +912,8 @@ pm_locals_write(pm_locals_t *locals, pm_constant_id_t name, uint32_t start, uint */ static uint32_t pm_locals_find(pm_locals_t *locals, pm_constant_id_t name) { + if (!(locals->bloom & (1u << (name & 31)))) return UINT32_MAX; + if (locals->capacity < PM_LOCALS_HASH_THRESHOLD) { for (uint32_t index = 0; index < locals->size; index++) { pm_local_t *local = &locals->locals[index]; @@ -1028,7 +1035,7 @@ pm_locals_order(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, pm_locals_t *locals, */ static inline pm_constant_id_t pm_parser_constant_id_raw(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) { - return pm_constant_pool_insert_shared(&parser->constant_pool, start, (size_t) (end - start)); + return pm_constant_pool_insert_shared(&parser->metadata_arena, &parser->constant_pool, start, (size_t) (end - start)); } /** @@ -1036,7 +1043,7 @@ pm_parser_constant_id_raw(pm_parser_t *parser, const uint8_t *start, const uint8 */ static inline pm_constant_id_t pm_parser_constant_id_owned(pm_parser_t *parser, uint8_t *start, size_t length) { - return pm_constant_pool_insert_owned(&parser->constant_pool, start, length); + return pm_constant_pool_insert_owned(&parser->metadata_arena, &parser->constant_pool, start, length); } /** @@ -1044,7 +1051,7 @@ pm_parser_constant_id_owned(pm_parser_t *parser, uint8_t *start, size_t length) */ static inline pm_constant_id_t pm_parser_constant_id_constant(pm_parser_t *parser, const char *start, size_t length) { - return pm_constant_pool_insert_constant(&parser->constant_pool, (const uint8_t *) start, length); + return pm_constant_pool_insert_constant(&parser->metadata_arena, &parser->constant_pool, (const uint8_t *) start, length); } /** @@ -1777,6 +1784,184 @@ char_is_identifier_utf8(const uint8_t *b, ptrdiff_t n) { } } +/** + * Scan forward through ASCII identifier characters (a-z, A-Z, 0-9, _) using + * wide operations. Returns the number of leading ASCII identifier bytes. + * Callers must handle any remaining bytes (short tail or non-ASCII/UTF-8) + * with a byte-at-a-time loop. + * + * Up to three optimized implementations are selected at compile time, with a + * no-op fallback for unsupported platforms: + * 1. NEON — processes 16 bytes per iteration on aarch64. + * 2. SSSE3 — processes 16 bytes per iteration on x86-64. + * 3. SWAR — little-endian fallback, processes 8 bytes per iteration. + */ + +#if defined(PRISM_HAS_NEON) +#include + +static inline size_t +scan_identifier_ascii(const uint8_t *start, const uint8_t *end) { + const uint8_t *cursor = start; + + // Nibble-based lookup tables for classifying [a-zA-Z0-9_]. + // Each high nibble is assigned a unique bit; the low nibble table + // contains the OR of bits for all high nibbles that have an + // identifier character at that low nibble position. A byte is an + // identifier character iff (low_lut[lo] & high_lut[hi]) != 0. + static const uint8_t low_lut_data[16] = { + 0x15, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, + 0x1F, 0x1F, 0x1E, 0x0A, 0x0A, 0x0A, 0x0A, 0x0E + }; + static const uint8_t high_lut_data[16] = { + 0x00, 0x00, 0x00, 0x01, 0x02, 0x04, 0x08, 0x10, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 + }; + const uint8x16_t low_lut = vld1q_u8(low_lut_data); + const uint8x16_t high_lut = vld1q_u8(high_lut_data); + const uint8x16_t mask_0f = vdupq_n_u8(0x0F); + + while (cursor + 16 <= end) { + uint8x16_t v = vld1q_u8(cursor); + + uint8x16_t lo_class = vqtbl1q_u8(low_lut, vandq_u8(v, mask_0f)); + uint8x16_t hi_class = vqtbl1q_u8(high_lut, vshrq_n_u8(v, 4)); + uint8x16_t ident = vandq_u8(lo_class, hi_class); + + // Fast check: if the per-byte minimum is nonzero, every byte matched. + if (vminvq_u8(ident) != 0) { + cursor += 16; + continue; + } + + // Find the first non-identifier byte (zero in ident). + uint8x16_t is_zero = vceqq_u8(ident, vdupq_n_u8(0)); + uint64_t lo = vgetq_lane_u64(vreinterpretq_u64_u8(is_zero), 0); + + if (lo != 0) { + cursor += pm_ctzll(lo) / 8; + } else { + uint64_t hi = vgetq_lane_u64(vreinterpretq_u64_u8(is_zero), 1); + cursor += 8 + pm_ctzll(hi) / 8; + } + + return (size_t) (cursor - start); + } + + return (size_t) (cursor - start); +} + +#elif defined(PRISM_HAS_SSSE3) +#include + +static inline size_t +scan_identifier_ascii(const uint8_t *start, const uint8_t *end) { + const uint8_t *cursor = start; + + while (cursor + 16 <= end) { + __m128i v = _mm_loadu_si128((const __m128i *) cursor); + __m128i zero = _mm_setzero_si128(); + + // Unsigned range check via saturating subtraction: + // byte >= lo ⟺ saturate(lo - byte) == 0 + // byte <= hi ⟺ saturate(byte - hi) == 0 + + // Fold case: OR with 0x20 maps A-Z to a-z. + __m128i lowered = _mm_or_si128(v, _mm_set1_epi8(0x20)); + __m128i letter = _mm_and_si128( + _mm_cmpeq_epi8(_mm_subs_epu8(_mm_set1_epi8(0x61), lowered), zero), + _mm_cmpeq_epi8(_mm_subs_epu8(lowered, _mm_set1_epi8(0x7A)), zero)); + + __m128i digit = _mm_and_si128( + _mm_cmpeq_epi8(_mm_subs_epu8(_mm_set1_epi8(0x30), v), zero), + _mm_cmpeq_epi8(_mm_subs_epu8(v, _mm_set1_epi8(0x39)), zero)); + + __m128i underscore = _mm_cmpeq_epi8(v, _mm_set1_epi8(0x5F)); + + __m128i ident = _mm_or_si128(_mm_or_si128(letter, digit), underscore); + int mask = _mm_movemask_epi8(ident); + + if (mask == 0xFFFF) { + cursor += 16; + continue; + } + + cursor += pm_ctzll((uint64_t) (~mask & 0xFFFF)); + return (size_t) (cursor - start); + } + + return (size_t) (cursor - start); +} + +// The SWAR path uses pm_ctzll to find the first non-matching byte within a +// word, which only yields the correct byte index on little-endian targets. +// We gate on a positive little-endian check so that unknown-endianness +// platforms safely fall through to the no-op fallback. +#elif defined(PRISM_HAS_SWAR) + +/** + * Portable SWAR fallback — processes 8 bytes per iteration. + * + * The byte-wise range checks avoid cross-byte borrows by pre-setting the high + * bit of each byte before subtraction: (byte | 0x80) - lo has a minimum value + * of 0x80 - 0x7F = 1, so underflow (and thus a borrow into the next byte) is + * impossible. The result has bit 7 set if and only if byte >= lo. The same + * reasoning applies to the upper-bound direction. + */ +static inline size_t +scan_identifier_ascii(const uint8_t *start, const uint8_t *end) { + static const uint64_t ones = 0x0101010101010101ULL; + static const uint64_t highs = 0x8080808080808080ULL; + const uint8_t *cursor = start; + + while (cursor + 8 <= end) { + uint64_t word; + memcpy(&word, cursor, 8); + + // Bail on any non-ASCII byte. + if (word & highs) break; + + uint64_t digit = ((word | highs) - ones * 0x30) & ((ones * 0x39 | highs) - word) & highs; + + // Fold upper- and lowercase together by forcing bit 5 (OR 0x20), + // then check the lowercase range once. A-Z maps to a-z; the + // only non-letter byte that could alias into [0x61,0x7A] is one + // whose original value was in [0x41,0x5A] — which is exactly + // the uppercase letters we want to match. + uint64_t lowered = word | (ones * 0x20); + uint64_t letter = ((lowered | highs) - ones * 0x61) & ((ones * 0x7A | highs) - lowered) & highs; + + // Standard SWAR "has zero byte" idiom on (word XOR 0x5F) to find + // bytes equal to underscore. Safe from cross-byte borrows because + // the ASCII guard above ensures all bytes are < 0x80. + uint64_t xor_us = word ^ (ones * 0x5F); + uint64_t underscore = (xor_us - ones) & ~xor_us & highs; + + uint64_t ident = digit | letter | underscore; + + if (ident == highs) { + cursor += 8; + continue; + } + + // Find the first non-identifier byte. On little-endian the first + // byte sits in the least-significant position. + uint64_t not_ident = ~ident & highs; + cursor += pm_ctzll(not_ident) / 8; + return (size_t) (cursor - start); + } + + return (size_t) (cursor - start); +} + +#else + +// No-op fallback for big-endian or other unsupported platforms. +// The caller's byte-at-a-time loop handles everything. +#define scan_identifier_ascii(start, end) ((size_t) 0) + +#endif + /** * Like the above, this function is also used extremely frequently to lex all of * the identifiers in a source file once the first character has been found. So @@ -2908,10 +3093,10 @@ pm_call_write_read_name_init(pm_parser_t *parser, pm_constant_id_t *read_name, p if (write_constant->length > 0) { size_t length = write_constant->length - 1; - void *memory = xmalloc(length); + uint8_t *memory = (uint8_t *) pm_arena_alloc(parser->arena, length, 1); memcpy(memory, write_constant->start, length); - *read_name = pm_constant_pool_insert_owned(&parser->constant_pool, (uint8_t *) memory, length); + *read_name = pm_constant_pool_insert_owned(&parser->metadata_arena, &parser->constant_pool, memory, length); } else { // We can get here if the message was missing because of a syntax error. *read_name = pm_parser_constant_id_constant(parser, "", 0); @@ -3897,7 +4082,7 @@ pm_double_parse(pm_parser_t *parser, const pm_token_t *token) { ellipsis = ""; } - pm_diagnostic_list_append_format(&parser->warning_list, PM_TOKEN_START(parser, token), PM_TOKEN_LENGTH(token), PM_WARN_FLOAT_OUT_OF_RANGE, warn_width, (const char *) token->start, ellipsis); + pm_diagnostic_list_append_format(&parser->metadata_arena, &parser->warning_list, PM_TOKEN_START(parser, token), PM_TOKEN_LENGTH(token), PM_WARN_FLOAT_OUT_OF_RANGE, warn_width, (const char *) token->start, ellipsis); value = (value < 0.0) ? -HUGE_VAL : HUGE_VAL; } @@ -4489,17 +4674,24 @@ pm_integer_node_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token ((pm_integer_t) { 0 }) ); - pm_integer_base_t integer_base = PM_INTEGER_BASE_DECIMAL; - switch (base) { - case PM_INTEGER_BASE_FLAGS_BINARY: integer_base = PM_INTEGER_BASE_BINARY; break; - case PM_INTEGER_BASE_FLAGS_OCTAL: integer_base = PM_INTEGER_BASE_OCTAL; break; - case PM_INTEGER_BASE_FLAGS_DECIMAL: break; - case PM_INTEGER_BASE_FLAGS_HEXADECIMAL: integer_base = PM_INTEGER_BASE_HEXADECIMAL; break; - default: assert(false && "unreachable"); break; + if (parser->integer.lexed) { + // The value was already computed during lexing. + node->value.value = parser->integer.value; + parser->integer.lexed = false; + } else { + pm_integer_base_t integer_base = PM_INTEGER_BASE_DECIMAL; + switch (base) { + case PM_INTEGER_BASE_FLAGS_BINARY: integer_base = PM_INTEGER_BASE_BINARY; break; + case PM_INTEGER_BASE_FLAGS_OCTAL: integer_base = PM_INTEGER_BASE_OCTAL; break; + case PM_INTEGER_BASE_FLAGS_DECIMAL: break; + case PM_INTEGER_BASE_FLAGS_HEXADECIMAL: integer_base = PM_INTEGER_BASE_HEXADECIMAL; break; + default: assert(false && "unreachable"); break; + } + + pm_integer_parse(&node->value, integer_base, token->start, token->end); + pm_integer_arena_move(parser->arena, &node->value); } - pm_integer_parse(&node->value, integer_base, token->start, token->end); - pm_integer_arena_move(parser->arena, &node->value); return node; } @@ -7316,11 +7508,13 @@ pm_char_is_magic_comment_key_delimiter(const uint8_t b) { */ static inline const uint8_t * parser_lex_magic_comment_emacs_marker(pm_parser_t *parser, const uint8_t *cursor, const uint8_t *end) { - while ((cursor + 3 <= end) && (cursor = pm_memchr(cursor, '-', (size_t) (end - cursor), parser->encoding_changed, parser->encoding)) != NULL) { - if (cursor + 3 <= end && cursor[1] == '*' && cursor[2] == '-') { - return cursor; + // Scan for '*' as the middle character, since it is rarer than '-' in + // typical comments and avoids repeated memchr calls for '-' that hit + // dashes in words like "foo-bar". + while ((cursor + 3 <= end) && (cursor = pm_memchr(cursor + 1, '*', (size_t) (end - cursor - 1), parser->encoding_changed, parser->encoding)) != NULL) { + if (cursor[-1] == '-' && cursor + 1 < end && cursor[1] == '-') { + return cursor - 1; } - cursor++; } return NULL; } @@ -7357,11 +7551,24 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) { // have a magic comment. return false; } + } else { + // Non-emacs magic comments must contain a colon for `key: value`. + // Reject early if there is no colon to avoid scanning the entire + // comment character-by-character. + if (pm_memchr(start, ':', (size_t) (end - start), parser->encoding_changed, parser->encoding) == NULL) { + return false; + } + + // Advance start past leading whitespace so the main loop begins + // directly at the key, avoiding a redundant whitespace scan. + start += pm_strspn_whitespace(start, end - start); } cursor = start; while (cursor < end) { - while (cursor < end && (pm_char_is_magic_comment_key_delimiter(*cursor) || pm_char_is_whitespace(*cursor))) cursor++; + if (indicator) { + while (cursor < end && (pm_char_is_magic_comment_key_delimiter(*cursor) || pm_char_is_whitespace(*cursor))) cursor++; + } const uint8_t *key_start = cursor; while (cursor < end && (!pm_char_is_magic_comment_key_delimiter(*cursor) && !pm_char_is_whitespace(*cursor))) cursor++; @@ -7525,12 +7732,11 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) { pm_string_free(&key); // Allocate a new magic comment node to append to the parser's list. - pm_magic_comment_t *magic_comment; - if ((magic_comment = (pm_magic_comment_t *) xcalloc(1, sizeof(pm_magic_comment_t))) != NULL) { - magic_comment->key = (pm_location_t) { .start = U32(key_start - parser->start), .length = U32(key_length) }; - magic_comment->value = (pm_location_t) { .start = U32(value_start - parser->start), .length = value_length }; - pm_list_append(&parser->magic_comment_list, (pm_list_node_t *) magic_comment); - } + pm_magic_comment_t *magic_comment = (pm_magic_comment_t *) pm_arena_alloc(&parser->metadata_arena, sizeof(pm_magic_comment_t), PRISM_ALIGNOF(pm_magic_comment_t)); + magic_comment->node.next = NULL; + magic_comment->key = (pm_location_t) { .start = U32(key_start - parser->start), .length = U32(key_length) }; + magic_comment->value = (pm_location_t) { .start = U32(value_start - parser->start), .length = value_length }; + pm_list_append(&parser->magic_comment_list, (pm_list_node_t *) magic_comment); } return result; @@ -7877,7 +8083,7 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) { pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_BINARY); } - parser->integer_base = PM_INTEGER_BASE_FLAGS_BINARY; + parser->integer.base = PM_INTEGER_BASE_FLAGS_BINARY; break; // 0o1111 is an octal number @@ -7891,7 +8097,7 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) { pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_OCTAL); } - parser->integer_base = PM_INTEGER_BASE_FLAGS_OCTAL; + parser->integer.base = PM_INTEGER_BASE_FLAGS_OCTAL; break; // 01111 is an octal number @@ -7905,7 +8111,7 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) { case '6': case '7': parser->current.end += pm_strspn_octal_number_validate(parser, parser->current.end); - parser->integer_base = PM_INTEGER_BASE_FLAGS_OCTAL; + parser->integer.base = PM_INTEGER_BASE_FLAGS_OCTAL; break; // 0x1111 is a hexadecimal number @@ -7919,7 +8125,7 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) { pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_HEXADECIMAL); } - parser->integer_base = PM_INTEGER_BASE_FLAGS_HEXADECIMAL; + parser->integer.base = PM_INTEGER_BASE_FLAGS_HEXADECIMAL; break; // 0.xxx is a float @@ -7937,11 +8143,62 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) { } } else { // If it didn't start with a 0, then we'll lex as far as we can into a - // decimal number. - parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end); + // decimal number. We compute the integer value inline to avoid + // re-scanning the digits later in pm_integer_parse. + { + const uint8_t *cursor = parser->current.end; + const uint8_t *end = parser->end; + uint64_t value = (uint64_t) (cursor[-1] - '0'); + + bool has_underscore = false; + bool prev_underscore = false; + const uint8_t *invalid = NULL; + + while (cursor < end) { + uint8_t c = *cursor; + if (c >= '0' && c <= '9') { + if (value <= UINT32_MAX) value = value * 10 + (uint64_t) (c - '0'); + prev_underscore = false; + cursor++; + } else if (c == '_') { + has_underscore = true; + if (prev_underscore && invalid == NULL) invalid = cursor; + prev_underscore = true; + cursor++; + } else { + break; + } + } + + if (has_underscore) { + if (prev_underscore && invalid == NULL) invalid = cursor - 1; + pm_strspn_number_validate(parser, parser->current.end, (size_t) (cursor - parser->current.end), invalid); + } + + if (value <= UINT32_MAX) { + parser->integer.value = (uint32_t) value; + parser->integer.lexed = true; + } + + parser->current.end = cursor; + } // Afterward, we'll lex as far as we can into an optional float suffix. - type = lex_optional_float_suffix(parser, seen_e); + // Guard the function call: the vast majority of decimal numbers are + // plain integers, so avoid the call when the next byte cannot start a + // float suffix. + { + uint8_t next = peek(parser); + if (next == '.' || next == 'e' || next == 'E') { + type = lex_optional_float_suffix(parser, seen_e); + + // If it turned out to be a float, the cached integer value is + // invalid. + if (type != PM_TOKEN_INTEGER) { + parser->integer.lexed = false; + } + } + } } // At this point we have a completed number, but we want to provide the user @@ -7960,7 +8217,8 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) { static pm_token_type_t lex_numeric(pm_parser_t *parser) { pm_token_type_t type = PM_TOKEN_INTEGER; - parser->integer_base = PM_INTEGER_BASE_FLAGS_DECIMAL; + parser->integer.base = PM_INTEGER_BASE_FLAGS_DECIMAL; + parser->integer.lexed = false; if (parser->current.end < parser->end) { bool seen_e = false; @@ -8148,6 +8406,10 @@ lex_identifier(pm_parser_t *parser, bool previous_command_start) { current_end += width; } } else { + // Fast path: scan ASCII identifier bytes using wide operations. + current_end += scan_identifier_ascii(current_end, end); + + // Byte-at-a-time fallback for the tail and any UTF-8 sequences. while ((width = char_is_identifier_utf8(current_end, end - current_end)) > 0) { current_end += width; } @@ -8594,7 +8856,7 @@ escape_write_escape_encoded(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_ } if (width == 1) { - if (*parser->current.end == '\n') pm_line_offset_list_append(&parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1); + if (*parser->current.end == '\n') pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1); escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(*parser->current.end++, flags)); } else if (width > 1) { // Valid multibyte character. Just ignore escape. @@ -8911,7 +9173,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre return; } - if (peeked == '\n') pm_line_offset_list_append(&parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1); + if (peeked == '\n') pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1); parser->current.end++; escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL)); return; @@ -8970,7 +9232,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre return; } - if (peeked == '\n') pm_line_offset_list_append(&parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1); + if (peeked == '\n') pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1); parser->current.end++; escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL)); return; @@ -9024,7 +9286,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre return; } - if (peeked == '\n') pm_line_offset_list_append(&parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1); + if (peeked == '\n') pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1); parser->current.end++; escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META)); return; @@ -9032,7 +9294,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre } case '\r': { if (peek_offset(parser, 1) == '\n') { - pm_line_offset_list_append(&parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 2); + pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 2); parser->current.end += 2; escape_write_byte_encoded(parser, buffer, flags, escape_byte('\n', flags)); return; @@ -9189,8 +9451,7 @@ parser_lex_callback(pm_parser_t *parser) { */ static inline pm_comment_t * parser_comment(pm_parser_t *parser, pm_comment_type_t type) { - pm_comment_t *comment = (pm_comment_t *) xcalloc(1, sizeof(pm_comment_t)); - if (comment == NULL) return NULL; + pm_comment_t *comment = (pm_comment_t *) pm_arena_alloc(&parser->metadata_arena, sizeof(pm_comment_t), PRISM_ALIGNOF(pm_comment_t)); *comment = (pm_comment_t) { .type = type, @@ -9213,7 +9474,7 @@ lex_embdoc(pm_parser_t *parser) { if (newline == NULL) { parser->current.end = parser->end; } else { - pm_line_offset_list_append(&parser->line_offsets, U32(newline - parser->start + 1)); + pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(newline - parser->start + 1)); parser->current.end = newline + 1; } @@ -9223,7 +9484,6 @@ lex_embdoc(pm_parser_t *parser) { // Now, create a comment that is going to be attached to the parser. const uint8_t *comment_start = parser->current.start; pm_comment_t *comment = parser_comment(parser, PM_COMMENT_EMBDOC); - if (comment == NULL) return PM_TOKEN_EOF; // Now, loop until we find the end of the embedded documentation or the end // of the file. @@ -9247,7 +9507,7 @@ lex_embdoc(pm_parser_t *parser) { if (newline == NULL) { parser->current.end = parser->end; } else { - pm_line_offset_list_append(&parser->line_offsets, U32(newline - parser->start + 1)); + pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(newline - parser->start + 1)); parser->current.end = newline + 1; } @@ -9267,7 +9527,7 @@ lex_embdoc(pm_parser_t *parser) { if (newline == NULL) { parser->current.end = parser->end; } else { - pm_line_offset_list_append(&parser->line_offsets, U32(newline - parser->start + 1)); + pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(newline - parser->start + 1)); parser->current.end = newline + 1; } @@ -9577,7 +9837,7 @@ pm_lex_percent_delimiter(pm_parser_t *parser) { parser_flush_heredoc_end(parser); } else { // Otherwise, we'll add the newline to the list of newlines. - pm_line_offset_list_append(&parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + U32(eol_length)); + pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + U32(eol_length)); } uint8_t delimiter = *parser->current.end; @@ -9653,17 +9913,24 @@ parser_lex(pm_parser_t *parser) { bool space_seen = false; // First, we're going to skip past any whitespace at the front of the next - // token. + // token. Skip runs of inline whitespace in bulk to avoid per-character + // stores back to parser->current.end. bool chomping = true; while (parser->current.end < parser->end && chomping) { - switch (*parser->current.end) { - case ' ': - case '\t': - case '\f': - case '\v': - parser->current.end++; + { + static const uint8_t inline_whitespace[256] = { + [' '] = 1, ['\t'] = 1, ['\f'] = 1, ['\v'] = 1 + }; + const uint8_t *scan = parser->current.end; + while (scan < parser->end && inline_whitespace[*scan]) scan++; + if (scan > parser->current.end) { + parser->current.end = scan; space_seen = true; - break; + continue; + } + } + + switch (*parser->current.end) { case '\r': if (match_eol_offset(parser, 1)) { chomping = false; @@ -9681,7 +9948,7 @@ parser_lex(pm_parser_t *parser) { parser->heredoc_end = NULL; } else { parser->current.end += eol_length + 1; - pm_line_offset_list_append(&parser->line_offsets, PM_TOKEN_END(parser, &parser->current)); + pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current)); space_seen = true; } } else if (pm_char_is_inline_whitespace(*parser->current.end)) { @@ -9783,7 +10050,7 @@ parser_lex(pm_parser_t *parser) { } if (parser->heredoc_end == NULL) { - pm_line_offset_list_append(&parser->line_offsets, PM_TOKEN_END(parser, &parser->current)); + pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current)); } } @@ -10309,7 +10576,7 @@ parser_lex(pm_parser_t *parser) { } else { // Otherwise, we want to indicate that the body of the // heredoc starts on the character after the next newline. - pm_line_offset_list_append(&parser->line_offsets, U32(body_start - parser->start + 1)); + pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(body_start - parser->start + 1)); body_start++; } @@ -10950,7 +11217,7 @@ parser_lex(pm_parser_t *parser) { // correct column information for it. const uint8_t *cursor = parser->current.end; while ((cursor = next_newline(cursor, parser->end - cursor)) != NULL) { - pm_line_offset_list_append(&parser->line_offsets, U32(++cursor - parser->start)); + pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(++cursor - parser->start)); } parser->current.end = parser->end; @@ -11011,7 +11278,7 @@ parser_lex(pm_parser_t *parser) { whitespace += 1; } } else { - whitespace = pm_strspn_whitespace_newlines(parser->current.end, parser->end - parser->current.end, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current)); + whitespace = pm_strspn_whitespace_newlines(parser->current.end, parser->end - parser->current.end, &parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current)); } if (whitespace > 0) { @@ -11126,7 +11393,7 @@ parser_lex(pm_parser_t *parser) { LEX(PM_TOKEN_STRING_CONTENT); } else { // ... else track the newline. - pm_line_offset_list_append(&parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1); + pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1); } parser->current.end++; @@ -11264,7 +11531,7 @@ parser_lex(pm_parser_t *parser) { // would have already have added the newline to the // list. if (parser->heredoc_end == NULL) { - pm_line_offset_list_append(&parser->line_offsets, PM_TOKEN_END(parser, &parser->current)); + pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current)); } } else { parser->current.end = breakpoint + 1; @@ -11311,7 +11578,7 @@ parser_lex(pm_parser_t *parser) { // If we've hit a newline, then we need to track that in // the list of newlines. if (parser->heredoc_end == NULL) { - pm_line_offset_list_append(&parser->line_offsets, U32(breakpoint - parser->start + 1)); + pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(breakpoint - parser->start + 1)); parser->current.end = breakpoint + 1; breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false); break; @@ -11359,7 +11626,7 @@ parser_lex(pm_parser_t *parser) { LEX(PM_TOKEN_STRING_CONTENT); } else { // ... else track the newline. - pm_line_offset_list_append(&parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1); + pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1); } parser->current.end++; @@ -11524,7 +11791,7 @@ parser_lex(pm_parser_t *parser) { // would have already have added the newline to the // list. if (parser->heredoc_end == NULL) { - pm_line_offset_list_append(&parser->line_offsets, PM_TOKEN_END(parser, &parser->current)); + pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current)); } } else { parser->current.end = breakpoint + 1; @@ -11576,7 +11843,7 @@ parser_lex(pm_parser_t *parser) { // for the terminator in case the terminator is a // newline character. if (parser->heredoc_end == NULL) { - pm_line_offset_list_append(&parser->line_offsets, U32(breakpoint - parser->start + 1)); + pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(breakpoint - parser->start + 1)); parser->current.end = breakpoint + 1; breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true); break; @@ -11630,7 +11897,7 @@ parser_lex(pm_parser_t *parser) { LEX(PM_TOKEN_STRING_CONTENT); } else { // ... else track the newline. - pm_line_offset_list_append(&parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1); + pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1); } parser->current.end++; @@ -11759,7 +12026,7 @@ parser_lex(pm_parser_t *parser) { (memcmp(terminator_start, ident_start, ident_length) == 0) ) { if (newline != NULL) { - pm_line_offset_list_append(&parser->line_offsets, U32(newline - parser->start + 1)); + pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(newline - parser->start + 1)); } parser->current.end = terminator_end; @@ -11790,7 +12057,7 @@ parser_lex(pm_parser_t *parser) { // Otherwise we'll be parsing string content. These are the places // where we need to split up the content of the heredoc. We'll use // strpbrk to find the first of these characters. - uint8_t breakpoints[] = "\r\n\\#"; + uint8_t breakpoints[PM_STRPBRK_CACHE_SIZE] = "\r\n\\#"; pm_heredoc_quote_t quote = heredoc_lex_mode->quote; if (quote == PM_HEREDOC_QUOTE_SINGLE) { @@ -11831,7 +12098,7 @@ parser_lex(pm_parser_t *parser) { LEX(PM_TOKEN_STRING_CONTENT); } - pm_line_offset_list_append(&parser->line_offsets, U32(breakpoint - parser->start + 1)); + pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(breakpoint - parser->start + 1)); // If we have a - or ~ heredoc, then we can match after // some leading whitespace. @@ -11951,7 +12218,7 @@ parser_lex(pm_parser_t *parser) { const uint8_t *end = parser->current.end; if (parser->heredoc_end == NULL) { - pm_line_offset_list_append(&parser->line_offsets, U32(end - parser->start + 1)); + pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(end - parser->start + 1)); } // Here we want the buffer to only @@ -12547,16 +12814,12 @@ parse_write_name(pm_parser_t *parser, pm_constant_id_t *name_field) { // append an =. pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, *name_field); size_t length = constant->length; - uint8_t *name = xcalloc(length + 1, sizeof(uint8_t)); - if (name == NULL) return; + uint8_t *name = (uint8_t *) pm_arena_alloc(parser->arena, length + 1, 1); memcpy(name, constant->start, length); name[length] = '='; - // Now switch the name to the new string. - // This silences clang analyzer warning about leak of memory pointed by `name`. - // NOLINTNEXTLINE(clang-analyzer-*) - *name_field = pm_constant_pool_insert_owned(&parser->constant_pool, name, length + 1); + *name_field = pm_constant_pool_insert_owned(&parser->metadata_arena, &parser->constant_pool, name, length + 1); } /** @@ -13177,6 +13440,7 @@ pm_hash_key_static_literals_add(pm_parser_t *parser, pm_static_literals_t *liter pm_static_literal_inspect(&buffer, &parser->line_offsets, parser->start, parser->start_line, parser->encoding->name, duplicated); pm_diagnostic_list_append_format( + &parser->metadata_arena, &parser->warning_list, duplicated->location.start, duplicated->location.length, @@ -13200,6 +13464,7 @@ pm_when_clause_static_literals_add(pm_parser_t *parser, pm_static_literals_t *li if ((previous = pm_static_literals_add(&parser->line_offsets, parser->start, parser->start_line, literals, node, false)) != NULL) { pm_diagnostic_list_append_format( + &parser->metadata_arena, &parser->warning_list, PM_NODE_START(node), PM_NODE_LENGTH(node), @@ -18065,22 +18330,22 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, u return node; } case PM_TOKEN_INTEGER: { - pm_node_flags_t base = parser->integer_base; + pm_node_flags_t base = parser->integer.base; parser_lex(parser); return UP(pm_integer_node_create(parser, base, &parser->previous)); } case PM_TOKEN_INTEGER_IMAGINARY: { - pm_node_flags_t base = parser->integer_base; + pm_node_flags_t base = parser->integer.base; parser_lex(parser); return UP(pm_integer_node_imaginary_create(parser, base, &parser->previous)); } case PM_TOKEN_INTEGER_RATIONAL: { - pm_node_flags_t base = parser->integer_base; + pm_node_flags_t base = parser->integer.base; parser_lex(parser); return UP(pm_integer_node_rational_create(parser, base, &parser->previous)); } case PM_TOKEN_INTEGER_RATIONAL_IMAGINARY: { - pm_node_flags_t base = parser->integer_base; + pm_node_flags_t base = parser->integer.base; parser_lex(parser); return UP(pm_integer_node_rational_imaginary_create(parser, base, &parser->previous)); } @@ -20457,11 +20722,9 @@ parse_regular_expression_named_capture(pm_parser_t *parser, const pm_string_t *c start = parser->start + PM_NODE_START(call->receiver); end = parser->start + PM_NODE_END(call->receiver); - void *memory = xmalloc(length); - if (memory == NULL) abort(); - + uint8_t *memory = (uint8_t *) pm_arena_alloc(parser->arena, length, 1); memcpy(memory, source, length); - name = pm_parser_constant_id_owned(parser, (uint8_t *) memory, length); + name = pm_parser_constant_id_owned(parser, memory, length); } // Add this name to the list of constants if it is valid, not duplicated, @@ -21884,6 +22147,7 @@ pm_parser_init(pm_arena_t *arena, pm_parser_t *parser, const uint8_t *source, si *parser = (pm_parser_t) { .arena = arena, + .metadata_arena = { 0 }, .node_id = 0, .lex_state = PM_LEX_STATE_BEG, .enclosure_nesting = 0, @@ -21916,7 +22180,7 @@ pm_parser_init(pm_arena_t *arena, pm_parser_t *parser, const uint8_t *source, si .filepath = { 0 }, .constant_pool = { 0 }, .line_offsets = { 0 }, - .integer_base = 0, + .integer = { 0 }, .current_string = PM_STRING_EMPTY, .start_line = 1, .explicit_encoding = NULL, @@ -21936,28 +22200,27 @@ pm_parser_init(pm_arena_t *arena, pm_parser_t *parser, const uint8_t *source, si .warn_mismatched_indentation = true }; - // Initialize the constant pool. We're going to completely guess as to the - // number of constants that we'll need based on the size of the input. The - // ratio we chose here is actually less arbitrary than you might think. - // - // We took ~50K Ruby files and measured the size of the file versus the - // number of constants that were found in those files. Then we found the - // average and standard deviation of the ratios of constants/bytesize. Then - // we added 1.34 standard deviations to the average to get a ratio that - // would fit 75% of the files (for a two-tailed distribution). This works - // because there was about a 0.77 correlation and the distribution was - // roughly normal. - // - // This ratio will need to change if we add more constants to the constant - // pool for another node type. - uint32_t constant_size = ((uint32_t) size) / 95; - pm_constant_pool_init(&parser->constant_pool, constant_size < 4 ? 4 : constant_size); - - // Initialize the newline list. Similar to the constant pool, we're going to - // guess at the number of newlines that we'll need based on the size of the - // input. + /* Pre-size the arenas based on input size to reduce the number of block + * allocations (and the kernel page zeroing they trigger). The ratios were + * measured empirically: AST arena ~3.3x input, metadata arena ~1.1x input. + * The reserve call is a no-op when the capacity is at or below the default + * arena block size, so small inputs don't waste an extra allocation. */ + if (size <= SIZE_MAX / 4) pm_arena_reserve(arena, size * 4); + if (size <= SIZE_MAX / 5 * 4) pm_arena_reserve(&parser->metadata_arena, size + size / 4); + + /* Initialize the constant pool. Measured across 1532 Ruby stdlib files, the + * bytes/constant ratio has a median of ~56 and a 90th percentile of ~135. + * We use 120 as a balance between over-allocation waste and resize + * frequency. Resizes are cheap with arena allocation, so we lean toward + * under-estimating. */ + uint32_t constant_size = ((uint32_t) size) / 120; + pm_constant_pool_init(&parser->metadata_arena, &parser->constant_pool, constant_size < 4 ? 4 : constant_size); + + /* Initialize the line offset list. Similar to the constant pool, we are + * going to estimate the number of newlines that we will need based on the + * size of the input. */ size_t newline_size = size / 22; - pm_line_offset_list_init(&parser->line_offsets, newline_size < 4 ? 4 : newline_size); + pm_line_offset_list_init(&parser->metadata_arena, &parser->line_offsets, newline_size < 4 ? 4 : newline_size); // If options were provided to this parse, establish them here. if (options != NULL) { @@ -22007,11 +22270,9 @@ pm_parser_init(pm_arena_t *arena, pm_parser_t *parser, const uint8_t *source, si const uint8_t *source = pm_string_source(local); size_t length = pm_string_length(local); - void *allocated = xmalloc(length); - if (allocated == NULL) continue; - + uint8_t *allocated = (uint8_t *) pm_arena_alloc(&parser->metadata_arena, length, 1); memcpy(allocated, source, length); - pm_parser_local_add_owned(parser, (uint8_t *) allocated, length); + pm_parser_local_add_owned(parser, allocated, length); } } } @@ -22096,7 +22357,7 @@ pm_parser_init(pm_arena_t *arena, pm_parser_t *parser, const uint8_t *source, si const uint8_t *newline = next_newline(cursor, parser->end - cursor); while (newline != NULL) { - pm_line_offset_list_append(&parser->line_offsets, U32(newline - parser->start + 1)); + pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(newline - parser->start + 1)); cursor = newline + 1; newline = next_newline(cursor, parser->end - cursor); @@ -22145,48 +22406,13 @@ pm_parser_register_encoding_changed_callback(pm_parser_t *parser, pm_encoding_ch parser->encoding_changed_callback = callback; } -/** - * Free all of the memory associated with the comment list. - */ -static inline void -pm_comment_list_free(pm_list_t *list) { - pm_list_node_t *node, *next; - - for (node = list->head; node != NULL; node = next) { - next = node->next; - - pm_comment_t *comment = (pm_comment_t *) node; - xfree_sized(comment, sizeof(pm_comment_t)); - } -} - -/** - * Free all of the memory associated with the magic comment list. - */ -static inline void -pm_magic_comment_list_free(pm_list_t *list) { - pm_list_node_t *node, *next; - - for (node = list->head; node != NULL; node = next) { - next = node->next; - - pm_magic_comment_t *magic_comment = (pm_magic_comment_t *) node; - xfree_sized(magic_comment, sizeof(pm_magic_comment_t)); - } -} - /** * Free any memory associated with the given parser. */ PRISM_EXPORTED_FUNCTION void pm_parser_free(pm_parser_t *parser) { pm_string_free(&parser->filepath); - pm_diagnostic_list_free(&parser->error_list); - pm_diagnostic_list_free(&parser->warning_list); - pm_comment_list_free(&parser->comment_list); - pm_magic_comment_list_free(&parser->magic_comment_list); - pm_constant_pool_free(&parser->constant_pool); - pm_line_offset_list_free(&parser->line_offsets); + pm_arena_free(&parser->metadata_arena); while (parser->current_scope != NULL) { // Normally, popping the scope doesn't free the locals since it is diff --git a/src/regexp.c b/src/regexp.c index f864e187c9..df8bb69b21 100644 --- a/src/regexp.c +++ b/src/regexp.c @@ -128,7 +128,7 @@ pm_regexp_parse_error(pm_regexp_parser_t *parser, const uint8_t *start, const ui loc_length = (uint32_t) (parser->node_end - parser->node_start); } - pm_diagnostic_list_append_format(&pm->error_list, loc_start, loc_length, PM_ERR_REGEXP_PARSE_ERROR, message); + pm_diagnostic_list_append_format(&pm->metadata_arena, &pm->error_list, loc_start, loc_length, PM_ERR_REGEXP_PARSE_ERROR, message); } /** @@ -146,7 +146,7 @@ pm_regexp_parse_error(pm_regexp_parser_t *parser, const uint8_t *start, const ui loc_start__ = (uint32_t) ((parser_)->node_start - pm__->start); \ loc_length__ = (uint32_t) ((parser_)->node_end - (parser_)->node_start); \ } \ - pm_diagnostic_list_append_format(&pm__->error_list, loc_start__, loc_length__, diag_id, __VA_ARGS__); \ + pm_diagnostic_list_append_format(&pm__->metadata_arena, &pm__->error_list, loc_start__, loc_length__, diag_id, __VA_ARGS__); \ } while (0) /** @@ -1397,6 +1397,7 @@ pm_regexp_format_for_error(pm_buffer_t *buffer, const pm_encoding_t *encoding, c */ #define PM_REGEXP_ENCODING_ERROR(parser, diag_id, ...) \ pm_diagnostic_list_append_format( \ + &(parser)->parser->metadata_arena, \ &(parser)->parser->error_list, \ (uint32_t) ((parser)->node_start - (parser)->parser->start), \ (uint32_t) ((parser)->node_end - (parser)->node_start), \ diff --git a/src/util/pm_arena.c b/src/util/pm_arena.c index a9b69b3c8d..6b07e25210 100644 --- a/src/util/pm_arena.c +++ b/src/util/pm_arena.c @@ -1,5 +1,7 @@ #include "prism/util/pm_arena.h" +#include + /** * Compute the block allocation size using offsetof so it is correct regardless * of PM_FLEX_ARY_LEN. @@ -22,7 +24,7 @@ static size_t pm_arena_next_block_size(const pm_arena_t *arena, size_t min_size) { size_t size = PM_ARENA_INITIAL_SIZE; - for (size_t i = PM_ARENA_GROWTH_INTERVAL; i <= arena->block_count; i += PM_ARENA_GROWTH_INTERVAL) { + for (size_t exp = PM_ARENA_GROWTH_INTERVAL; exp <= arena->block_count; exp += PM_ARENA_GROWTH_INTERVAL) { if (size < PM_ARENA_MAX_SIZE) size *= 2; } @@ -30,62 +32,49 @@ pm_arena_next_block_size(const pm_arena_t *arena, size_t min_size) { } /** - * Allocate memory from the arena. The returned memory is NOT zeroed. This - * function is infallible — it aborts on allocation failure. + * Allocate a new block with the given data capacity and initial usage, link it + * into the arena, and return it. Aborts on allocation failure. */ -void * -pm_arena_alloc(pm_arena_t *arena, size_t size, size_t alignment) { - // Try current block. - if (arena->current != NULL) { - size_t used_aligned = (arena->current->used + alignment - 1) & ~(alignment - 1); - size_t needed = used_aligned + size; - - // Guard against overflow in the alignment or size arithmetic. - if (used_aligned >= arena->current->used && needed >= used_aligned && needed <= arena->current->capacity) { - arena->current->used = needed; - return arena->current->data + used_aligned; - } - } - - // Allocate new block via xmalloc — memory is NOT zeroed. - // New blocks from xmalloc are max-aligned, so data[] starts aligned for - // any C type. No padding needed at the start. - size_t block_data_size = pm_arena_next_block_size(arena, size); - pm_arena_block_t *block = (pm_arena_block_t *) xmalloc(PM_ARENA_BLOCK_SIZE(block_data_size)); +static pm_arena_block_t * +pm_arena_block_new(pm_arena_t *arena, size_t data_size, size_t initial_used) { + assert(initial_used <= data_size); + pm_arena_block_t *block = (pm_arena_block_t *) xmalloc(PM_ARENA_BLOCK_SIZE(data_size)); if (block == NULL) { fprintf(stderr, "prism: out of memory; aborting\n"); abort(); } - block->capacity = block_data_size; - block->used = size; + block->capacity = data_size; + block->used = initial_used; block->prev = arena->current; arena->current = block; arena->block_count++; - return block->data; + return block; } /** - * Allocate zero-initialized memory from the arena. This function is infallible - * — it aborts on allocation failure. + * Ensure the arena has at least `capacity` bytes available in its current + * block, allocating a new block if necessary. This allows callers to + * pre-size the arena to avoid repeated small block allocations. */ -void * -pm_arena_zalloc(pm_arena_t *arena, size_t size, size_t alignment) { - void *ptr = pm_arena_alloc(arena, size, alignment); - memset(ptr, 0, size); - return ptr; +void +pm_arena_reserve(pm_arena_t *arena, size_t capacity) { + if (capacity <= PM_ARENA_INITIAL_SIZE) return; + if (arena->current != NULL && (arena->current->capacity - arena->current->used) >= capacity) return; + pm_arena_block_new(arena, capacity, 0); } /** - * Allocate memory from the arena and copy the given data into it. + * Slow path for pm_arena_alloc: allocate a new block and return a pointer to + * the first `size` bytes. Called when the current block has insufficient space. */ void * -pm_arena_memdup(pm_arena_t *arena, const void *src, size_t size, size_t alignment) { - void *dst = pm_arena_alloc(arena, size, alignment); - memcpy(dst, src, size); - return dst; +pm_arena_alloc_slow(pm_arena_t *arena, size_t size) { + size_t block_data_size = pm_arena_next_block_size(arena, size); + pm_arena_block_t *block = pm_arena_block_new(arena, block_data_size, size); + return block->data; } /** diff --git a/src/util/pm_char.c b/src/util/pm_char.c index f0baf47784..ac283af356 100644 --- a/src/util/pm_char.c +++ b/src/util/pm_char.c @@ -1,7 +1,5 @@ #include "prism/util/pm_char.h" -#define PRISM_CHAR_BIT_WHITESPACE (1 << 0) -#define PRISM_CHAR_BIT_INLINE_WHITESPACE (1 << 1) #define PRISM_CHAR_BIT_REGEXP_OPTION (1 << 2) #define PRISM_NUMBER_BIT_BINARY_DIGIT (1 << 0) @@ -13,7 +11,7 @@ #define PRISM_NUMBER_BIT_HEXADECIMAL_DIGIT (1 << 6) #define PRISM_NUMBER_BIT_HEXADECIMAL_NUMBER (1 << 7) -static const uint8_t pm_byte_table[256] = { +const uint8_t pm_byte_table[256] = { // 0 1 2 3 4 5 6 7 8 9 A B C D E F 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 1, 3, 3, 3, 0, 0, // 0x 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x @@ -83,7 +81,7 @@ pm_strspn_whitespace(const uint8_t *string, ptrdiff_t length) { * searching past the given maximum number of characters. */ size_t -pm_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, pm_line_offset_list_t *line_offsets, uint32_t start_offset) { +pm_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, pm_arena_t *arena, pm_line_offset_list_t *line_offsets, uint32_t start_offset) { if (length <= 0) return 0; uint32_t size = 0; @@ -91,7 +89,7 @@ pm_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, pm_line_o while (size < maximum && (pm_byte_table[string[size]] & PRISM_CHAR_BIT_WHITESPACE)) { if (string[size] == '\n') { - pm_line_offset_list_append(line_offsets, start_offset + size + 1); + pm_line_offset_list_append(arena, line_offsets, start_offset + size + 1); } size++; @@ -100,15 +98,6 @@ pm_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, pm_line_o return size; } -/** - * Returns the number of characters at the start of the string that are inline - * whitespace. Disallows searching past the given maximum number of characters. - */ -size_t -pm_strspn_inline_whitespace(const uint8_t *string, ptrdiff_t length) { - return pm_strspn_char_kind(string, length, PRISM_CHAR_BIT_INLINE_WHITESPACE); -} - /** * Returns the number of characters at the start of the string that are regexp * options. Disallows searching past the given maximum number of characters. @@ -118,29 +107,6 @@ pm_strspn_regexp_option(const uint8_t *string, ptrdiff_t length) { return pm_strspn_char_kind(string, length, PRISM_CHAR_BIT_REGEXP_OPTION); } -/** - * Returns true if the given character matches the given kind. - */ -static inline bool -pm_char_is_char_kind(const uint8_t b, uint8_t kind) { - return (pm_byte_table[b] & kind) != 0; -} - -/** - * Returns true if the given character is a whitespace character. - */ -bool -pm_char_is_whitespace(const uint8_t b) { - return pm_char_is_char_kind(b, PRISM_CHAR_BIT_WHITESPACE); -} - -/** - * Returns true if the given character is an inline whitespace character. - */ -bool -pm_char_is_inline_whitespace(const uint8_t b) { - return pm_char_is_char_kind(b, PRISM_CHAR_BIT_INLINE_WHITESPACE); -} /** * Scan through the string and return the number of characters at the start of diff --git a/src/util/pm_constant_pool.c b/src/util/pm_constant_pool.c index f7173dd062..74e2a12524 100644 --- a/src/util/pm_constant_pool.c +++ b/src/util/pm_constant_pool.c @@ -70,19 +70,66 @@ pm_constant_id_list_includes(pm_constant_id_list_t *list, pm_constant_id_t id) { } /** - * A relatively simple hash function (djb2) that is used to hash strings. We are - * optimizing here for simplicity and speed. + * A multiply-xorshift hash that processes input a word at a time. This is + * significantly faster than the byte-at-a-time djb2 hash for the short strings + * typical in Ruby source (~15 bytes average). Each word is mixed into the hash + * by XOR followed by multiplication by a large odd constant, which spreads + * entropy across all bits. A final xorshift fold produces the 32-bit result. */ static inline uint32_t pm_constant_pool_hash(const uint8_t *start, size_t length) { - // This is a prime number used as the initial value for the hash function. - uint32_t value = 5381; + // This constant is borrowed from wyhash. It is a 64-bit odd integer with + // roughly equal 0/1 bits, chosen for good avalanche behavior when used in + // multiply-xorshift sequences. + static const uint64_t secret = 0x517cc1b727220a95ULL; + uint64_t hash = (uint64_t) length; + + if (length <= 8) { + // Short strings: read first and last 4 bytes (overlapping for len < 8). + // This covers the majority of Ruby identifiers with a single multiply. + if (length >= 4) { + uint32_t a, b; + memcpy(&a, start, 4); + memcpy(&b, start + length - 4, 4); + hash ^= (uint64_t) a | ((uint64_t) b << 32); + } else if (length > 0) { + hash ^= (uint64_t) start[0] | ((uint64_t) start[length >> 1] << 8) | ((uint64_t) start[length - 1] << 16); + } + hash *= secret; + } else if (length <= 16) { + // Medium strings: read first and last 8 bytes (overlapping). + // Two multiplies instead of the three the loop-based approach needs. + uint64_t word; + memcpy(&word, start, 8); + hash ^= word; + hash *= secret; + memcpy(&word, start + length - 8, 8); + hash ^= word; + hash *= secret; + } else { + const uint8_t *ptr = start; + size_t remaining = length; + + while (remaining >= 8) { + uint64_t word; + memcpy(&word, ptr, 8); + hash ^= word; + hash *= secret; + ptr += 8; + remaining -= 8; + } - for (size_t index = 0; index < length; index++) { - value = ((value << 5) + value) + start[index]; + if (remaining > 0) { + // Read the last 8 bytes (overlapping with already-processed data). + uint64_t word; + memcpy(&word, start + length - 8, 8); + hash ^= word; + hash *= secret; + } } - return value; + hash ^= hash >> 32; + return (uint32_t) hash; } /** @@ -115,21 +162,15 @@ is_power_of_two(uint32_t size) { /** * Resize a constant pool to a given capacity. */ -static inline bool -pm_constant_pool_resize(pm_constant_pool_t *pool) { +static inline void +pm_constant_pool_resize(pm_arena_t *arena, pm_constant_pool_t *pool) { assert(is_power_of_two(pool->capacity)); uint32_t next_capacity = pool->capacity * 2; - if (next_capacity < pool->capacity) return false; - const uint32_t mask = next_capacity - 1; - const size_t element_size = sizeof(pm_constant_pool_bucket_t) + sizeof(pm_constant_t); - - void *next = xcalloc(next_capacity, element_size); - if (next == NULL) return false; - pm_constant_pool_bucket_t *next_buckets = next; - pm_constant_t *next_constants = (void *)(((char *) next) + next_capacity * sizeof(pm_constant_pool_bucket_t)); + pm_constant_pool_bucket_t *next_buckets = (pm_constant_pool_bucket_t *) pm_arena_zalloc(arena, next_capacity * sizeof(pm_constant_pool_bucket_t), PRISM_ALIGNOF(pm_constant_pool_bucket_t)); + pm_constant_t *next_constants = (pm_constant_t *) pm_arena_alloc(arena, next_capacity * sizeof(pm_constant_t), PRISM_ALIGNOF(pm_constant_t)); // For each bucket in the current constant pool, find the index in the // next constant pool, and insert it. @@ -157,33 +198,22 @@ pm_constant_pool_resize(pm_constant_pool_t *pool) { // The constants are stable with respect to hash table resizes. memcpy(next_constants, pool->constants, pool->size * sizeof(pm_constant_t)); - // pool->constants and pool->buckets are allocated out of the same chunk - // of memory, with the buckets coming first. - xfree_sized(pool->buckets, pool->capacity * element_size); pool->constants = next_constants; pool->buckets = next_buckets; pool->capacity = next_capacity; - return true; } /** * Initialize a new constant pool with a given capacity. */ -bool -pm_constant_pool_init(pm_constant_pool_t *pool, uint32_t capacity) { - const uint32_t maximum = (~((uint32_t) 0)); - if (capacity >= ((maximum / 2) + 1)) return false; - +void +pm_constant_pool_init(pm_arena_t *arena, pm_constant_pool_t *pool, uint32_t capacity) { capacity = next_power_of_two(capacity); - const size_t element_size = sizeof(pm_constant_pool_bucket_t) + sizeof(pm_constant_t); - void *memory = xcalloc(capacity, element_size); - if (memory == NULL) return false; - pool->buckets = memory; - pool->constants = (void *)(((char *)memory) + capacity * sizeof(pm_constant_pool_bucket_t)); + pool->buckets = (pm_constant_pool_bucket_t *) pm_arena_zalloc(arena, capacity * sizeof(pm_constant_pool_bucket_t), PRISM_ALIGNOF(pm_constant_pool_bucket_t)); + pool->constants = (pm_constant_t *) pm_arena_alloc(arena, capacity * sizeof(pm_constant_t), PRISM_ALIGNOF(pm_constant_t)); pool->size = 0; pool->capacity = capacity; - return true; } /** @@ -209,8 +239,7 @@ pm_constant_pool_find(const pm_constant_pool_t *pool, const uint8_t *start, size pm_constant_pool_bucket_t *bucket; while (bucket = &pool->buckets[index], bucket->id != PM_CONSTANT_ID_UNSET) { - pm_constant_t *constant = &pool->constants[bucket->id - 1]; - if ((constant->length == length) && memcmp(constant->start, start, length) == 0) { + if ((bucket->length == length) && memcmp(bucket->start, start, length) == 0) { return bucket->id; } @@ -224,9 +253,9 @@ pm_constant_pool_find(const pm_constant_pool_t *pool, const uint8_t *start, size * Insert a constant into a constant pool and return its index in the pool. */ static inline pm_constant_id_t -pm_constant_pool_insert(pm_constant_pool_t *pool, const uint8_t *start, size_t length, pm_constant_pool_bucket_type_t type) { +pm_constant_pool_insert(pm_arena_t *arena, pm_constant_pool_t *pool, const uint8_t *start, size_t length, pm_constant_pool_bucket_type_t type) { if (pool->size >= (pool->capacity / 4 * 3)) { - if (!pm_constant_pool_resize(pool)) return PM_CONSTANT_ID_UNSET; + pm_constant_pool_resize(arena, pool); } assert(is_power_of_two(pool->capacity)); @@ -240,25 +269,17 @@ pm_constant_pool_insert(pm_constant_pool_t *pool, const uint8_t *start, size_t l // If there is a collision, then we need to check if the content is the // same as the content we are trying to insert. If it is, then we can // return the id of the existing constant. - pm_constant_t *constant = &pool->constants[bucket->id - 1]; - - if ((constant->length == length) && memcmp(constant->start, start, length) == 0) { + if ((bucket->length == length) && memcmp(bucket->start, start, length) == 0) { // Since we have found a match, we need to check if this is // attempting to insert a shared or an owned constant. We want to // prefer shared constants since they don't require allocations. - if (type == PM_CONSTANT_POOL_BUCKET_OWNED) { - // If we're attempting to insert an owned constant and we have - // an existing constant, then either way we don't want the given - // memory. Either it's duplicated with the existing constant or - // it's not necessary because we have a shared version. - xfree_sized((void *) start, length); - } else if (bucket->type == PM_CONSTANT_POOL_BUCKET_OWNED) { + if (type != PM_CONSTANT_POOL_BUCKET_OWNED && bucket->type == PM_CONSTANT_POOL_BUCKET_OWNED) { // If we're attempting to insert a shared constant and the - // existing constant is owned, then we can free the owned - // constant and replace it with the shared constant. - xfree_sized((void *) constant->start, constant->length); - constant->start = start; + // existing constant is owned, then we can replace it with the + // shared constant to prefer non-owned references. + bucket->start = start; bucket->type = (unsigned int) (type & 0x3); + pool->constants[bucket->id - 1].start = start; } return bucket->id; @@ -275,7 +296,9 @@ pm_constant_pool_insert(pm_constant_pool_t *pool, const uint8_t *start, size_t l *bucket = (pm_constant_pool_bucket_t) { .id = (unsigned int) (id & 0x3fffffff), .type = (unsigned int) (type & 0x3), - .hash = hash + .hash = hash, + .start = start, + .length = length }; pool->constants[id - 1] = (pm_constant_t) { @@ -291,8 +314,8 @@ pm_constant_pool_insert(pm_constant_pool_t *pool, const uint8_t *start, size_t l * PM_CONSTANT_ID_UNSET if any potential calls to resize fail. */ pm_constant_id_t -pm_constant_pool_insert_shared(pm_constant_pool_t *pool, const uint8_t *start, size_t length) { - return pm_constant_pool_insert(pool, start, length, PM_CONSTANT_POOL_BUCKET_DEFAULT); +pm_constant_pool_insert_shared(pm_arena_t *arena, pm_constant_pool_t *pool, const uint8_t *start, size_t length) { + return pm_constant_pool_insert(arena, pool, start, length, PM_CONSTANT_POOL_BUCKET_DEFAULT); } /** @@ -301,8 +324,8 @@ pm_constant_pool_insert_shared(pm_constant_pool_t *pool, const uint8_t *start, s * potential calls to resize fail. */ pm_constant_id_t -pm_constant_pool_insert_owned(pm_constant_pool_t *pool, uint8_t *start, size_t length) { - return pm_constant_pool_insert(pool, start, length, PM_CONSTANT_POOL_BUCKET_OWNED); +pm_constant_pool_insert_owned(pm_arena_t *arena, pm_constant_pool_t *pool, uint8_t *start, size_t length) { + return pm_constant_pool_insert(arena, pool, start, length, PM_CONSTANT_POOL_BUCKET_OWNED); } /** @@ -311,26 +334,7 @@ pm_constant_pool_insert_owned(pm_constant_pool_t *pool, uint8_t *start, size_t l * resize fail. */ pm_constant_id_t -pm_constant_pool_insert_constant(pm_constant_pool_t *pool, const uint8_t *start, size_t length) { - return pm_constant_pool_insert(pool, start, length, PM_CONSTANT_POOL_BUCKET_CONSTANT); +pm_constant_pool_insert_constant(pm_arena_t *arena, pm_constant_pool_t *pool, const uint8_t *start, size_t length) { + return pm_constant_pool_insert(arena, pool, start, length, PM_CONSTANT_POOL_BUCKET_CONSTANT); } -/** - * Free the memory associated with a constant pool. - */ -void -pm_constant_pool_free(pm_constant_pool_t *pool) { - // For each constant in the current constant pool, free the contents if the - // contents are owned. - for (uint32_t index = 0; index < pool->capacity; index++) { - pm_constant_pool_bucket_t *bucket = &pool->buckets[index]; - - // If an id is set on this constant, then we know we have content here. - if (bucket->id != PM_CONSTANT_ID_UNSET && bucket->type == PM_CONSTANT_POOL_BUCKET_OWNED) { - pm_constant_t *constant = &pool->constants[bucket->id - 1]; - xfree_sized((void *) constant->start, constant->length); - } - } - - xfree_sized(pool->buckets, pool->capacity * (sizeof(pm_constant_pool_bucket_t) + sizeof(pm_constant_t))); -} diff --git a/src/util/pm_line_offset_list.c b/src/util/pm_line_offset_list.c index d55b2f6874..0648901e29 100644 --- a/src/util/pm_line_offset_list.c +++ b/src/util/pm_line_offset_list.c @@ -1,20 +1,16 @@ #include "prism/util/pm_line_offset_list.h" /** - * Initialize a new newline list with the given capacity. Returns true if the - * allocation of the offsets succeeds, otherwise returns false. + * Initialize a new line offset list with the given capacity. */ -bool -pm_line_offset_list_init(pm_line_offset_list_t *list, size_t capacity) { - list->offsets = (uint32_t *) xcalloc(capacity, sizeof(uint32_t)); - if (list->offsets == NULL) return false; +void +pm_line_offset_list_init(pm_arena_t *arena, pm_line_offset_list_t *list, size_t capacity) { + list->offsets = (uint32_t *) pm_arena_alloc(arena, capacity * sizeof(uint32_t), PRISM_ALIGNOF(uint32_t)); - // This is 1 instead of 0 because we want to include the first line of the - // file as having offset 0, which is set because of calloc. + // The first line always has offset 0. + list->offsets[0] = 0; list->size = 1; list->capacity = capacity; - - return true; } /** @@ -26,26 +22,20 @@ pm_line_offset_list_clear(pm_line_offset_list_t *list) { } /** - * Append a new offset to the newline list. Returns true if the reallocation of - * the offsets succeeds (if one was necessary), otherwise returns false. + * Append a new offset to the newline list (slow path: resize and store). */ -bool -pm_line_offset_list_append(pm_line_offset_list_t *list, uint32_t cursor) { - if (list->size == list->capacity) { - uint32_t *original_offsets = list->offsets; +void +pm_line_offset_list_append_slow(pm_arena_t *arena, pm_line_offset_list_t *list, uint32_t cursor) { + size_t new_capacity = (list->capacity * 3) / 2; + uint32_t *new_offsets = (uint32_t *) pm_arena_alloc(arena, new_capacity * sizeof(uint32_t), PRISM_ALIGNOF(uint32_t)); - list->capacity = (list->capacity * 3) / 2; - list->offsets = (uint32_t *) xcalloc(list->capacity, sizeof(uint32_t)); - if (list->offsets == NULL) return false; + memcpy(new_offsets, list->offsets, list->size * sizeof(uint32_t)); - memcpy(list->offsets, original_offsets, list->size * sizeof(uint32_t)); - xfree_sized(original_offsets, list->size * sizeof(uint32_t)); - } + list->offsets = new_offsets; + list->capacity = new_capacity; assert(list->size == 0 || cursor > list->offsets[list->size - 1]); list->offsets[list->size++] = cursor; - - return true; } /** @@ -103,11 +93,3 @@ pm_line_offset_list_line_column(const pm_line_offset_list_t *list, uint32_t curs .column = cursor - list->offsets[left - 1] }); } - -/** - * Free the internal memory allocated for the newline list. - */ -void -pm_line_offset_list_free(pm_line_offset_list_t *list) { - xfree_sized(list->offsets, list->capacity * sizeof(uint32_t)); -} diff --git a/src/util/pm_strpbrk.c b/src/util/pm_strpbrk.c index 60c67b2983..fdd2ab4567 100644 --- a/src/util/pm_strpbrk.c +++ b/src/util/pm_strpbrk.c @@ -5,7 +5,7 @@ */ static inline void pm_strpbrk_invalid_multibyte_character(pm_parser_t *parser, uint32_t start, uint32_t length) { - pm_diagnostic_list_append_format(&parser->error_list, start, length, PM_ERR_INVALID_MULTIBYTE_CHARACTER, parser->start[start]); + pm_diagnostic_list_append_format(&parser->metadata_arena, &parser->error_list, start, length, PM_ERR_INVALID_MULTIBYTE_CHARACTER, parser->start[start]); } /** @@ -19,7 +19,7 @@ pm_strpbrk_explicit_encoding_set(pm_parser_t *parser, uint32_t start, uint32_t l } else if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) { // Not okay, we already found a Unicode escape sequence and this // conflicts. - pm_diagnostic_list_append_format(&parser->error_list, start, length, PM_ERR_MIXED_ENCODING, parser->encoding->name); + pm_diagnostic_list_append_format(&parser->metadata_arena, &parser->error_list, start, length, PM_ERR_MIXED_ENCODING, parser->encoding->name); } else { // Should not be anything else. assert(false && "unreachable"); @@ -29,13 +29,233 @@ pm_strpbrk_explicit_encoding_set(pm_parser_t *parser, uint32_t start, uint32_t l parser->explicit_encoding = parser->encoding; } +/** + * Scan forward through ASCII bytes looking for a byte that is in the given + * charset. Returns true if a match was found, storing its offset in *index. + * Returns false if no match was found, storing the number of ASCII bytes + * consumed in *index (so the caller can skip past them). + * + * All charset characters must be ASCII (< 0x80). The scanner stops at non-ASCII + * bytes, returning control to the caller's encoding-aware loop. + * + * Up to three optimized implementations are selected at compile time, with a + * no-op fallback for unsupported platforms: + * 1. NEON — processes 16 bytes per iteration on aarch64. + * 2. SSSE3 — processes 16 bytes per iteration on x86-64. + * 3. SWAR — little-endian fallback, processes 8 bytes per iteration. + */ + +#if defined(PRISM_HAS_NEON) || defined(PRISM_HAS_SSSE3) || defined(PRISM_HAS_SWAR) + +/** + * Update the cached strpbrk lookup tables if the charset has changed. The + * parser caches the last charset's precomputed tables so that repeated calls + * with the same breakpoints (the common case during string/regex/list lexing) + * skip table construction entirely. + * + * Builds three structures: + * - low_lut/high_lut: nibble-based lookup tables for SIMD matching (NEON/SSSE3) + * - table: 256-bit bitmap for scalar fallback matching (all platforms) + */ +static inline void +pm_strpbrk_cache_update(pm_parser_t *parser, const uint8_t *charset) { + // The cache key is the full charset buffer (PM_STRPBRK_CACHE_SIZE bytes). + // Since it is always NUL-padded, a fixed-size comparison covers both + // content and length. + if (memcmp(parser->strpbrk_cache.charset, charset, sizeof(parser->strpbrk_cache.charset)) == 0) return; + + memset(parser->strpbrk_cache.low_lut, 0, sizeof(parser->strpbrk_cache.low_lut)); + memset(parser->strpbrk_cache.high_lut, 0, sizeof(parser->strpbrk_cache.high_lut)); + memset(parser->strpbrk_cache.table, 0, sizeof(parser->strpbrk_cache.table)); + + // Always include NUL in the tables. The slow path uses strchr, which + // always matches NUL (it finds the C string terminator), so NUL is + // effectively always a breakpoint. Replicating that here lets the fast + // scanner handle NUL at full speed instead of bailing to the slow path. + parser->strpbrk_cache.low_lut[0x00] |= (uint8_t) (1 << 0); + parser->strpbrk_cache.high_lut[0x00] = (uint8_t) (1 << 0); + parser->strpbrk_cache.table[0] |= (uint64_t) 1; + + size_t charset_len = 0; + for (const uint8_t *c = charset; *c != '\0'; c++) { + parser->strpbrk_cache.low_lut[*c & 0x0F] |= (uint8_t) (1 << (*c >> 4)); + parser->strpbrk_cache.high_lut[*c >> 4] = (uint8_t) (1 << (*c >> 4)); + parser->strpbrk_cache.table[*c >> 6] |= (uint64_t) 1 << (*c & 0x3F); + charset_len++; + } + + // Store the new charset key, NUL-padded to the full buffer size. + memcpy(parser->strpbrk_cache.charset, charset, charset_len + 1); + memset(parser->strpbrk_cache.charset + charset_len + 1, 0, sizeof(parser->strpbrk_cache.charset) - charset_len - 1); +} + +#endif + +#if defined(PRISM_HAS_NEON) +#include + +static inline bool +scan_strpbrk_ascii(pm_parser_t *parser, const uint8_t *source, size_t maximum, const uint8_t *charset, size_t *index) { + pm_strpbrk_cache_update(parser, charset); + + uint8x16_t low_lut = vld1q_u8(parser->strpbrk_cache.low_lut); + uint8x16_t high_lut = vld1q_u8(parser->strpbrk_cache.high_lut); + uint8x16_t mask_0f = vdupq_n_u8(0x0F); + uint8x16_t mask_80 = vdupq_n_u8(0x80); + + size_t idx = 0; + + while (idx + 16 <= maximum) { + uint8x16_t v = vld1q_u8(source + idx); + + // If any byte has the high bit set, we have non-ASCII data. + // Return to let the caller's encoding-aware loop handle it. + if (vmaxvq_u8(vandq_u8(v, mask_80)) != 0) break; + + uint8x16_t lo_class = vqtbl1q_u8(low_lut, vandq_u8(v, mask_0f)); + uint8x16_t hi_class = vqtbl1q_u8(high_lut, vshrq_n_u8(v, 4)); + uint8x16_t matched = vtstq_u8(lo_class, hi_class); + + if (vmaxvq_u8(matched) == 0) { + idx += 16; + continue; + } + + // Find the position of the first matching byte. + uint64_t lo64 = vgetq_lane_u64(vreinterpretq_u64_u8(matched), 0); + if (lo64 != 0) { + *index = idx + pm_ctzll(lo64) / 8; + return true; + } + uint64_t hi64 = vgetq_lane_u64(vreinterpretq_u64_u8(matched), 1); + *index = idx + 8 + pm_ctzll(hi64) / 8; + return true; + } + + // Scalar tail for remaining < 16 ASCII bytes. + while (idx < maximum && source[idx] < 0x80) { + uint8_t byte = source[idx]; + if (parser->strpbrk_cache.table[byte >> 6] & ((uint64_t) 1 << (byte & 0x3F))) { + *index = idx; + return true; + } + idx++; + } + + *index = idx; + return false; +} + +#elif defined(PRISM_HAS_SSSE3) +#include + +static inline bool +scan_strpbrk_ascii(pm_parser_t *parser, const uint8_t *source, size_t maximum, const uint8_t *charset, size_t *index) { + pm_strpbrk_cache_update(parser, charset); + + __m128i low_lut = _mm_loadu_si128((const __m128i *) parser->strpbrk_cache.low_lut); + __m128i high_lut = _mm_loadu_si128((const __m128i *) parser->strpbrk_cache.high_lut); + __m128i mask_0f = _mm_set1_epi8(0x0F); + + size_t idx = 0; + + while (idx + 16 <= maximum) { + __m128i v = _mm_loadu_si128((const __m128i *) (source + idx)); + + // If any byte has the high bit set, stop. + if (_mm_movemask_epi8(v) != 0) break; + + // Nibble-based classification using pshufb (SSSE3), same as NEON + // vqtbl1q_u8. A byte matches iff (low_lut[lo_nib] & high_lut[hi_nib]) != 0. + __m128i lo_class = _mm_shuffle_epi8(low_lut, _mm_and_si128(v, mask_0f)); + __m128i hi_class = _mm_shuffle_epi8(high_lut, _mm_and_si128(_mm_srli_epi16(v, 4), mask_0f)); + __m128i matched = _mm_and_si128(lo_class, hi_class); + + // Check if any byte matched. + int mask = _mm_movemask_epi8(_mm_cmpeq_epi8(matched, _mm_setzero_si128())); + + if (mask == 0xFFFF) { + // All bytes were zero — no match in this chunk. + idx += 16; + continue; + } + + // Find the first matching byte (first non-zero in matched). + *index = idx + pm_ctzll((uint64_t) (~mask & 0xFFFF)); + return true; + } + + // Scalar tail. + while (idx < maximum && source[idx] < 0x80) { + uint8_t byte = source[idx]; + if (parser->strpbrk_cache.table[byte >> 6] & ((uint64_t) 1 << (byte & 0x3F))) { + *index = idx; + return true; + } + idx++; + } + + *index = idx; + return false; +} + +#elif defined(PRISM_HAS_SWAR) + +static inline bool +scan_strpbrk_ascii(pm_parser_t *parser, const uint8_t *source, size_t maximum, const uint8_t *charset, size_t *index) { + pm_strpbrk_cache_update(parser, charset); + + static const uint64_t highs = 0x8080808080808080ULL; + size_t idx = 0; + + while (idx + 8 <= maximum) { + uint64_t word; + memcpy(&word, source + idx, 8); + + // Bail on any non-ASCII byte. + if (word & highs) break; + + // Check each byte against the charset table. + for (size_t j = 0; j < 8; j++) { + uint8_t byte = source[idx + j]; + if (parser->strpbrk_cache.table[byte >> 6] & ((uint64_t) 1 << (byte & 0x3F))) { + *index = idx + j; + return true; + } + } + + idx += 8; + } + + // Scalar tail. + while (idx < maximum && source[idx] < 0x80) { + uint8_t byte = source[idx]; + if (parser->strpbrk_cache.table[byte >> 6] & ((uint64_t) 1 << (byte & 0x3F))) { + *index = idx; + return true; + } + idx++; + } + + *index = idx; + return false; +} + +#else + +static inline bool +scan_strpbrk_ascii(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, PRISM_ATTRIBUTE_UNUSED const uint8_t *source, PRISM_ATTRIBUTE_UNUSED size_t maximum, PRISM_ATTRIBUTE_UNUSED const uint8_t *charset, size_t *index) { + *index = 0; + return false; +} + +#endif + /** * This is the default path. */ static inline const uint8_t * -pm_strpbrk_utf8(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum, bool validate) { - size_t index = 0; - +pm_strpbrk_utf8(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t index, size_t maximum, bool validate) { while (index < maximum) { if (strchr((const char *) charset, source[index]) != NULL) { return source + index; @@ -73,9 +293,7 @@ pm_strpbrk_utf8(pm_parser_t *parser, const uint8_t *source, const uint8_t *chars * This is the path when the encoding is ASCII-8BIT. */ static inline const uint8_t * -pm_strpbrk_ascii_8bit(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum, bool validate) { - size_t index = 0; - +pm_strpbrk_ascii_8bit(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t index, size_t maximum, bool validate) { while (index < maximum) { if (strchr((const char *) charset, source[index]) != NULL) { return source + index; @@ -92,8 +310,7 @@ pm_strpbrk_ascii_8bit(pm_parser_t *parser, const uint8_t *source, const uint8_t * This is the slow path that does care about the encoding. */ static inline const uint8_t * -pm_strpbrk_multi_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum, bool validate) { - size_t index = 0; +pm_strpbrk_multi_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t index, size_t maximum, bool validate) { const pm_encoding_t *encoding = parser->encoding; while (index < maximum) { @@ -135,8 +352,7 @@ pm_strpbrk_multi_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t * the encoding only supports single-byte characters. */ static inline const uint8_t * -pm_strpbrk_single_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum, bool validate) { - size_t index = 0; +pm_strpbrk_single_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t index, size_t maximum, bool validate) { const pm_encoding_t *encoding = parser->encoding; while (index < maximum) { @@ -192,15 +408,19 @@ pm_strpbrk_single_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t */ const uint8_t * pm_strpbrk(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length, bool validate) { - if (length <= 0) { - return NULL; - } else if (!parser->encoding_changed) { - return pm_strpbrk_utf8(parser, source, charset, (size_t) length, validate); + if (length <= 0) return NULL; + + size_t maximum = (size_t) length; + size_t index = 0; + if (scan_strpbrk_ascii(parser, source, maximum, charset, &index)) return source + index; + + if (!parser->encoding_changed) { + return pm_strpbrk_utf8(parser, source, charset, index, maximum, validate); } else if (parser->encoding == PM_ENCODING_ASCII_8BIT_ENTRY) { - return pm_strpbrk_ascii_8bit(parser, source, charset, (size_t) length, validate); + return pm_strpbrk_ascii_8bit(parser, source, charset, index, maximum, validate); } else if (parser->encoding->multibyte) { - return pm_strpbrk_multi_byte(parser, source, charset, (size_t) length, validate); + return pm_strpbrk_multi_byte(parser, source, charset, index, maximum, validate); } else { - return pm_strpbrk_single_byte(parser, source, charset, (size_t) length, validate); + return pm_strpbrk_single_byte(parser, source, charset, index, maximum, validate); } } diff --git a/templates/include/prism/diagnostic.h.erb b/templates/include/prism/diagnostic.h.erb index c1864e6021..935fb663ea 100644 --- a/templates/include/prism/diagnostic.h.erb +++ b/templates/include/prism/diagnostic.h.erb @@ -8,6 +8,7 @@ #include "prism/ast.h" #include "prism/defines.h" +#include "prism/util/pm_arena.h" #include "prism/util/pm_list.h" #include @@ -48,13 +49,6 @@ typedef struct { /** The message associated with the diagnostic. */ const char *message; - /** - * Whether or not the memory related to the message of this diagnostic is - * owned by this diagnostic. If it is, it needs to be freed when the - * diagnostic is freed. - */ - bool owned; - /** * The level of the diagnostic, see `pm_error_level_t` and * `pm_warning_level_t` for possible values. @@ -99,32 +93,25 @@ const char * pm_diagnostic_id_human(pm_diagnostic_id_t diag_id); * Append a diagnostic to the given list of diagnostics that is using shared * memory for its message. * + * @param arena The arena to allocate from. * @param list The list to append to. * @param start The source offset of the start of the diagnostic. * @param length The length of the diagnostic. * @param diag_id The diagnostic ID. - * @return Whether the diagnostic was successfully appended. */ -bool pm_diagnostic_list_append(pm_list_t *list, uint32_t start, uint32_t length, pm_diagnostic_id_t diag_id); +void pm_diagnostic_list_append(pm_arena_t *arena, pm_list_t *list, uint32_t start, uint32_t length, pm_diagnostic_id_t diag_id); /** * Append a diagnostic to the given list of diagnostics that is using a format * string for its message. * + * @param arena The arena to allocate from. * @param list The list to append to. * @param start The source offset of the start of the diagnostic. * @param length The length of the diagnostic. * @param diag_id The diagnostic ID. * @param ... The arguments to the format string for the message. - * @return Whether the diagnostic was successfully appended. - */ -bool pm_diagnostic_list_append_format(pm_list_t *list, uint32_t start, uint32_t length, pm_diagnostic_id_t diag_id, ...); - -/** - * Deallocate the internal state of the given diagnostic list. - * - * @param list The list to deallocate. */ -void pm_diagnostic_list_free(pm_list_t *list); +void pm_diagnostic_list_append_format(pm_arena_t *arena, pm_list_t *list, uint32_t start, uint32_t length, pm_diagnostic_id_t diag_id, ...); #endif diff --git a/templates/src/diagnostic.c.erb b/templates/src/diagnostic.c.erb index 8fa47590c0..b02714637d 100644 --- a/templates/src/diagnostic.c.erb +++ b/templates/src/diagnostic.c.erb @@ -1,4 +1,5 @@ #include "prism/diagnostic.h" +#include "prism/util/pm_arena.h" #define PM_DIAGNOSTIC_ID_MAX <%= errors.length + warnings.length %> @@ -451,29 +452,26 @@ pm_diagnostic_level(pm_diagnostic_id_t diag_id) { /** * Append an error to the given list of diagnostic. */ -bool -pm_diagnostic_list_append(pm_list_t *list, uint32_t start, uint32_t length, pm_diagnostic_id_t diag_id) { - pm_diagnostic_t *diagnostic = (pm_diagnostic_t *) xcalloc(1, sizeof(pm_diagnostic_t)); - if (diagnostic == NULL) return false; +void +pm_diagnostic_list_append(pm_arena_t *arena, pm_list_t *list, uint32_t start, uint32_t length, pm_diagnostic_id_t diag_id) { + pm_diagnostic_t *diagnostic = (pm_diagnostic_t *) pm_arena_zalloc(arena, sizeof(pm_diagnostic_t), PRISM_ALIGNOF(pm_diagnostic_t)); *diagnostic = (pm_diagnostic_t) { .location = { .start = start, .length = length }, .diag_id = diag_id, .message = pm_diagnostic_message(diag_id), - .owned = false, .level = pm_diagnostic_level(diag_id) }; pm_list_append(list, (pm_list_node_t *) diagnostic); - return true; } /** * Append a diagnostic to the given list of diagnostics that is using a format * string for its message. */ -bool -pm_diagnostic_list_append_format(pm_list_t *list, uint32_t start, uint32_t length, pm_diagnostic_id_t diag_id, ...) { +void +pm_diagnostic_list_append_format(pm_arena_t *arena, pm_list_t *list, uint32_t start, uint32_t length, pm_diagnostic_id_t diag_id, ...) { va_list arguments; va_start(arguments, diag_id); @@ -482,20 +480,13 @@ pm_diagnostic_list_append_format(pm_list_t *list, uint32_t start, uint32_t lengt va_end(arguments); if (result < 0) { - return false; + return; } - pm_diagnostic_t *diagnostic = (pm_diagnostic_t *) xcalloc(1, sizeof(pm_diagnostic_t)); - if (diagnostic == NULL) { - return false; - } + pm_diagnostic_t *diagnostic = (pm_diagnostic_t *) pm_arena_zalloc(arena, sizeof(pm_diagnostic_t), PRISM_ALIGNOF(pm_diagnostic_t)); size_t message_length = (size_t) (result + 1); - char *message = (char *) xmalloc(message_length); - if (message == NULL) { - xfree_sized(diagnostic, sizeof(pm_diagnostic_t)); - return false; - } + char *message = (char *) pm_arena_alloc(arena, message_length, 1); va_start(arguments, diag_id); vsnprintf(message, message_length, format, arguments); @@ -505,27 +496,9 @@ pm_diagnostic_list_append_format(pm_list_t *list, uint32_t start, uint32_t lengt .location = { .start = start, .length = length }, .diag_id = diag_id, .message = message, - .owned = true, .level = pm_diagnostic_level(diag_id) }; pm_list_append(list, (pm_list_node_t *) diagnostic); - return true; } -/** - * Deallocate the internal state of the given diagnostic list. - */ -void -pm_diagnostic_list_free(pm_list_t *list) { - pm_diagnostic_t *node = (pm_diagnostic_t *) list->head; - - while (node != NULL) { - pm_diagnostic_t *next = (pm_diagnostic_t *) node->node.next; - - if (node->owned) xfree((void *) node->message); - xfree_sized(node, sizeof(pm_diagnostic_t)); - - node = next; - } -} diff --git a/templates/src/node.c.erb b/templates/src/node.c.erb index df59545129..93ea275a54 100644 --- a/templates/src/node.c.erb +++ b/templates/src/node.c.erb @@ -39,10 +39,11 @@ pm_node_list_grow(pm_arena_t *arena, pm_node_list_t *list, size_t size) { } /** - * Append a new node onto the end of the node list. + * Slow path for pm_node_list_append: grow the list and append the node. + * Do not call directly - use pm_node_list_append instead. */ void -pm_node_list_append(pm_arena_t *arena, pm_node_list_t *list, pm_node_t *node) { +pm_node_list_append_slow(pm_arena_t *arena, pm_node_list_t *list, pm_node_t *node) { pm_node_list_grow(arena, list, 1); list->nodes[list->size++] = node; } diff --git a/test/prism/magic_comment_test.rb b/test/prism/magic_comment_test.rb index ccfe5a5d0a..7985bae568 100644 --- a/test/prism/magic_comment_test.rb +++ b/test/prism/magic_comment_test.rb @@ -69,6 +69,10 @@ def test_emacs_multiple assert_magic_encoding(Encoding::US_ASCII, "# -*- foo: bar; encoding: ascii -*-") end + def test_emacs_missing_delimiter + assert_magic_encoding(Encoding::US_ASCII, '# -*- \1; encoding: ascii -*-') + end + def test_coding_whitespace assert_magic_encoding(Encoding::ASCII_8BIT, "# coding \t \r \v : \t \v \r ascii-8bit") end