Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
dadeb7e
Use an arena for parser metadata
kddnewton Mar 7, 2026
390bdaa
Use the parser arena for the constant pool
kddnewton Mar 7, 2026
1dd9853
Speed up the constant hash function
kddnewton Mar 8, 2026
e0708c4
Small optimization for parser_lex_magic_comment
kddnewton Mar 8, 2026
c1ad25e
Scan forward through inline whitespace to avoid writing to parser->cu…
kddnewton Mar 8, 2026
fb526a8
Fast-paths for ASCII-only identifiers
kddnewton Mar 8, 2026
bfa7692
Avoid unnecessary zero-ing of memory
kddnewton Mar 9, 2026
f94fe6b
Pre-size arena to avoid unnecessary growth
kddnewton Mar 9, 2026
dfdc930
Force the allocation to be inlined
kddnewton Mar 9, 2026
83f54c2
Inline pm_node_list_append, pm_char_is_whitespace, and pm_char_is_inl…
kddnewton Mar 9, 2026
a14431c
Avoid redundant whitespace scanning in magic comment lexing
kddnewton Mar 9, 2026
b5b88ba
Potentially skip whitespace scanning for speed
kddnewton Mar 9, 2026
fbcd3fc
Inline three more functions, and lower the hash threshold for locals
kddnewton Mar 9, 2026
20e626a
Lex simple integer values as we are lexing
kddnewton Mar 10, 2026
2a1dc79
Only dispatch to lex_optional_float_suffix when it is possible
kddnewton Mar 10, 2026
a52c2bd
Optimize constant pool hash for short strings
kddnewton Mar 10, 2026
dcb2e8c
Include string in constant pool entry to avoid chasing pointer
kddnewton Mar 10, 2026
c464b29
SIMD/SWAR for strpbrk
kddnewton Mar 10, 2026
559f24f
Fix a bug where we removed the \r warning
kddnewton Mar 10, 2026
fc0ec4c
Use a bloom filter to quickly reject local lookups
kddnewton Mar 11, 2026
46656b2
Cache strpbrk lookup tables
kddnewton Mar 11, 2026
b2658d2
Fix up rebase errors
kddnewton Mar 17, 2026
5fe0448
More correctly detect SIMD on MSVC
kddnewton Mar 17, 2026
f5ae7b7
Ensure allocations to the constant pool are through the arena
kddnewton Mar 17, 2026
1cabef7
Fix ASAN reading off end of strpbrk cache
Earlopain Mar 18, 2026
898241c
Do not use GCC-specific syntax for lookup tables
Earlopain Mar 18, 2026
ec2cf2e
Fix infinite loop in parser_lex_magic_comment
Earlopain Mar 18, 2026
eab6f33
Fix C coverage by moving stuff slightly around
Earlopain Mar 18, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 55 additions & 0 deletions include/prism/defines.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,18 @@
# define inline __inline
#endif

/**
* Force a function to be inlined at every call site. Use sparingly — only for
* small, hot functions where the compiler's heuristics fail to inline.
*/
#if defined(_MSC_VER)
# define PRISM_FORCE_INLINE __forceinline
#elif defined(__GNUC__) || defined(__clang__)
# define PRISM_FORCE_INLINE inline __attribute__((always_inline))
#else
# define PRISM_FORCE_INLINE inline
#endif

/**
* Old Visual Studio versions before 2015 do not implement sprintf, but instead
* implement _snprintf. We standard that here.
Expand Down Expand Up @@ -264,6 +276,49 @@
#define PRISM_UNLIKELY(x) (x)
#endif

/**
* Platform detection for SIMD / fast-path implementations. At most one of
* these macros is defined, selecting the best available vectorization strategy.
*/
#if (defined(__aarch64__) && defined(__ARM_NEON)) || (defined(_MSC_VER) && defined(_M_ARM64))
#define PRISM_HAS_NEON
#elif (defined(__x86_64__) && defined(__SSSE3__)) || (defined(_MSC_VER) && defined(_M_X64))
#define PRISM_HAS_SSSE3
#elif defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
#define PRISM_HAS_SWAR
#endif

/**
* Count trailing zero bits in a 64-bit value. Used by SWAR identifier scanning
* to find the first non-matching byte in a word.
*
* Precondition: v must be nonzero. The result is undefined when v == 0
* (matching the behavior of __builtin_ctzll and _BitScanForward64).
*/
#if defined(__GNUC__) || defined(__clang__)
#define pm_ctzll(v) ((unsigned) __builtin_ctzll(v))
#elif defined(_MSC_VER)
#include <intrin.h>
static inline unsigned pm_ctzll(uint64_t v) {
unsigned long index;
_BitScanForward64(&index, v);
return (unsigned) index;
}
#else
static inline unsigned
pm_ctzll(uint64_t v) {
unsigned c = 0;
v &= (uint64_t) (-(int64_t) v);
if (v & 0x00000000FFFFFFFFULL) c += 0; else c += 32;
if (v & 0x0000FFFF0000FFFFULL) c += 0; else c += 16;
if (v & 0x00FF00FF00FF00FFULL) c += 0; else c += 8;
if (v & 0x0F0F0F0F0F0F0F0FULL) c += 0; else c += 4;
if (v & 0x3333333333333333ULL) c += 0; else c += 2;
if (v & 0x5555555555555555ULL) c += 0; else c += 1;
return c;
}
#endif

/**
* We use -Wimplicit-fallthrough to guard potentially unintended fall-through between cases of a switch.
* Use PRISM_FALLTHROUGH to explicitly annotate cases where the fallthrough is intentional.
Expand Down
19 changes: 18 additions & 1 deletion include/prism/node.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,31 @@
#define PM_NODE_LIST_FOREACH(list, index, node) \
for (size_t index = 0; index < (list)->size && ((node) = (list)->nodes[index]); index++)

/**
* Slow path for pm_node_list_append: grow the list and append the node.
* Do not call directly — use pm_node_list_append instead.
*
* @param arena The arena to allocate from.
* @param list The list to append to.
* @param node The node to append.
*/
void pm_node_list_append_slow(pm_arena_t *arena, pm_node_list_t *list, pm_node_t *node);

/**
* Append a new node onto the end of the node list.
*
* @param arena The arena to allocate from.
* @param list The list to append to.
* @param node The node to append.
*/
void pm_node_list_append(pm_arena_t *arena, pm_node_list_t *list, pm_node_t *node);
static PRISM_FORCE_INLINE void
pm_node_list_append(pm_arena_t *arena, pm_node_list_t *list, pm_node_t *node) {
if (list->size < list->capacity) {
list->nodes[list->size++] = node;
} else {
pm_node_list_append_slow(arena, list, node);
}
}

/**
* Prepend a new node onto the beginning of the node list.
Expand Down
68 changes: 60 additions & 8 deletions include/prism/parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,13 @@ typedef struct {
pm_heredoc_indent_t indent;
} pm_heredoc_lex_mode_t;

/**
* The size of the breakpoints and strpbrk cache charset buffers. All
* breakpoint arrays and the strpbrk cache charset must share this size so
* that memcmp can safely compare the full buffer without overreading.
*/
#define PM_STRPBRK_CACHE_SIZE 16

/**
* When lexing Ruby source, the lexer has a small amount of state to tell which
* kind of token it is currently lexing. For example, when we find the start of
Expand Down Expand Up @@ -169,7 +176,7 @@ typedef struct pm_lex_mode {
* This is the character set that should be used to delimit the
* tokens within the list.
*/
uint8_t breakpoints[11];
uint8_t breakpoints[PM_STRPBRK_CACHE_SIZE];
} list;

struct {
Expand All @@ -191,7 +198,7 @@ typedef struct pm_lex_mode {
* This is the character set that should be used to delimit the
* tokens within the regular expression.
*/
uint8_t breakpoints[7];
uint8_t breakpoints[PM_STRPBRK_CACHE_SIZE];
} regexp;

struct {
Expand Down Expand Up @@ -224,7 +231,7 @@ typedef struct pm_lex_mode {
* This is the character set that should be used to delimit the
* tokens within the string.
*/
uint8_t breakpoints[7];
uint8_t breakpoints[PM_STRPBRK_CACHE_SIZE];
} string;

struct {
Expand Down Expand Up @@ -556,6 +563,13 @@ typedef struct pm_locals {
/** The capacity of the local variables set. */
uint32_t capacity;

/**
* A bloom filter over constant IDs stored in this set. Used to quickly
* reject lookups for names that are definitely not present, avoiding the
* cost of a linear scan or hash probe.
*/
uint32_t bloom;

/** The nullable allocated memory for the local variables in the set. */
pm_local_t *locals;
} pm_locals_t;
Expand Down Expand Up @@ -639,6 +653,9 @@ struct pm_parser {
/** The arena used for all AST-lifetime allocations. Caller-owned. */
pm_arena_t *arena;

/** The arena used for parser metadata (comments, diagnostics, etc.). */
pm_arena_t metadata_arena;

/**
* The next node identifier that will be assigned. This is a unique
* identifier used to track nodes such that the syntax tree can be dropped
Expand Down Expand Up @@ -790,12 +807,26 @@ struct pm_parser {
pm_line_offset_list_t line_offsets;

/**
* We want to add a flag to integer nodes that indicates their base. We only
* want to parse these once, but we don't have space on the token itself to
* communicate this information. So we store it here and pass it through
* when we find tokens that we need it for.
* State communicated from the lexer to the parser for integer tokens.
*/
pm_node_flags_t integer_base;
struct {
/**
* A flag indicating the base of the integer (binary, octal, decimal,
* hexadecimal). Set during lexing and read during node creation.
*/
pm_node_flags_t base;

/**
* When lexing a decimal integer that fits in a uint32_t, we compute
* the value during lexing to avoid re-scanning the digits during
* parsing. If lexed is true, this holds the result and
* pm_integer_parse can be skipped.
*/
uint32_t value;

/** Whether value holds a valid pre-computed integer. */
bool lexed;
} integer;

/**
* This string is used to pass information from the lexer to the parser. It
Expand Down Expand Up @@ -938,6 +969,27 @@ struct pm_parser {
* toggled with a magic comment.
*/
bool warn_mismatched_indentation;

#if defined(PRISM_HAS_NEON) || defined(PRISM_HAS_SSSE3) || defined(PRISM_HAS_SWAR)
/**
* Cached lookup tables for pm_strpbrk's SIMD fast path. Avoids rebuilding
* the nibble-based tables on every call when the charset hasn't changed
* (which is the common case during string/regex/list lexing).
*/
struct {
/** The cached charset (null-terminated, NUL-padded). */
uint8_t charset[PM_STRPBRK_CACHE_SIZE];

/** Nibble-based low lookup table for SIMD matching. */
uint8_t low_lut[16];

/** Nibble-based high lookup table for SIMD matching. */
uint8_t high_lut[16];

/** Scalar fallback table (4 x 64-bit bitmasks covering all ASCII). */
uint64_t table[4];
} strpbrk_cache;
#endif
};

#endif
52 changes: 49 additions & 3 deletions include/prism/util/pm_arena.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,16 +44,52 @@ typedef struct {
size_t block_count;
} pm_arena_t;

/**
* Ensure the arena has at least `capacity` bytes available in its current
* block, allocating a new block if necessary. This allows callers to
* pre-size the arena to avoid repeated small block allocations.
*
* @param arena The arena to pre-size.
* @param capacity The minimum number of bytes to ensure are available.
*/
void pm_arena_reserve(pm_arena_t *arena, size_t capacity);

/**
* Slow path for pm_arena_alloc: allocate a new block and return a pointer to
* the first `size` bytes. Do not call directly — use pm_arena_alloc instead.
*
* @param arena The arena to allocate from.
* @param size The number of bytes to allocate.
* @returns A pointer to the allocated memory.
*/
void * pm_arena_alloc_slow(pm_arena_t *arena, size_t size);

/**
* Allocate memory from the arena. The returned memory is NOT zeroed. This
* function is infallible — it aborts on allocation failure.
*
* The fast path (bump pointer within the current block) is inlined at each
* call site. The slow path (new block allocation) is out-of-line.
*
* @param arena The arena to allocate from.
* @param size The number of bytes to allocate.
* @param alignment The required alignment (must be a power of 2).
* @returns A pointer to the allocated memory.
*/
void * pm_arena_alloc(pm_arena_t *arena, size_t size, size_t alignment);
static PRISM_FORCE_INLINE void *
pm_arena_alloc(pm_arena_t *arena, size_t size, size_t alignment) {
if (arena->current != NULL) {
size_t used_aligned = (arena->current->used + alignment - 1) & ~(alignment - 1);
size_t needed = used_aligned + size;

if (used_aligned >= arena->current->used && needed >= used_aligned && needed <= arena->current->capacity) {
arena->current->used = needed;
return arena->current->data + used_aligned;
}
}

return pm_arena_alloc_slow(arena, size);
}

/**
* Allocate zero-initialized memory from the arena. This function is infallible
Expand All @@ -64,7 +100,12 @@ void * pm_arena_alloc(pm_arena_t *arena, size_t size, size_t alignment);
* @param alignment The required alignment (must be a power of 2).
* @returns A pointer to the allocated, zero-initialized memory.
*/
void * pm_arena_zalloc(pm_arena_t *arena, size_t size, size_t alignment);
static inline void *
pm_arena_zalloc(pm_arena_t *arena, size_t size, size_t alignment) {
void *ptr = pm_arena_alloc(arena, size, alignment);
memset(ptr, 0, size);
return ptr;
}

/**
* Allocate memory from the arena and copy the given data into it. This is a
Expand All @@ -76,7 +117,12 @@ void * pm_arena_zalloc(pm_arena_t *arena, size_t size, size_t alignment);
* @param alignment The required alignment (must be a power of 2).
* @returns A pointer to the allocated copy.
*/
void * pm_arena_memdup(pm_arena_t *arena, const void *src, size_t size, size_t alignment);
static inline void *
pm_arena_memdup(pm_arena_t *arena, const void *src, size_t size, size_t alignment) {
void *dst = pm_arena_alloc(arena, size, alignment);
memcpy(dst, src, size);
return dst;
}

/**
* Free all blocks in the arena. After this call, all pointers returned by
Expand Down
Loading