From 429672ea58cd53e152f29fecf79ad1437cb4edae Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 17 Mar 2026 09:04:33 -0400 Subject: [PATCH 001/100] Move buffer out of utils --- include/prism.h | 2 +- include/prism/{util/pm_buffer.h => buffer.h} | 8 +------- include/prism/node.h | 2 +- include/prism/prettyprint.h | 2 +- include/prism/util/pm_integer.h | 2 +- lib/prism/ffi.rb | 2 +- prism.gemspec | 4 ++-- src/{util/pm_buffer.c => buffer.c} | 8 +++++++- src/regexp.c | 2 +- 9 files changed, 16 insertions(+), 16 deletions(-) rename include/prism/{util/pm_buffer.h => buffer.h} (98%) rename src/{util/pm_buffer.c => buffer.c} (98%) diff --git a/include/prism.h b/include/prism.h index 76733b8aaf..7b81dd6513 100644 --- a/include/prism.h +++ b/include/prism.h @@ -12,13 +12,13 @@ extern "C" { #include "prism/defines.h" #include "prism/util/pm_arena.h" -#include "prism/util/pm_buffer.h" #include "prism/util/pm_char.h" #include "prism/util/pm_integer.h" #include "prism/util/pm_memchr.h" #include "prism/util/pm_strncasecmp.h" #include "prism/util/pm_strpbrk.h" #include "prism/ast.h" +#include "prism/buffer.h" #include "prism/diagnostic.h" #include "prism/node.h" #include "prism/options.h" diff --git a/include/prism/util/pm_buffer.h b/include/prism/buffer.h similarity index 98% rename from include/prism/util/pm_buffer.h rename to include/prism/buffer.h index cb80f8b3ce..e6e21b6895 100644 --- a/include/prism/util/pm_buffer.h +++ b/include/prism/buffer.h @@ -1,5 +1,5 @@ /** - * @file pm_buffer.h + * @file buffer.h * * A wrapper around a contiguous block of allocated memory. */ @@ -9,12 +9,6 @@ #include "prism/defines.h" #include "prism/util/pm_char.h" -#include -#include -#include -#include -#include - /** * A pm_buffer_t is a simple memory buffer that stores data in a contiguous * block of memory. diff --git a/include/prism/node.h b/include/prism/node.h index f02f8ba892..a4ead730bc 100644 --- a/include/prism/node.h +++ b/include/prism/node.h @@ -7,8 +7,8 @@ #define PRISM_NODE_H #include "prism/defines.h" +#include "prism/buffer.h" #include "prism/parser.h" -#include "prism/util/pm_buffer.h" /** * Loop through each node in the node list, writing each node to the given diff --git a/include/prism/prettyprint.h b/include/prism/prettyprint.h index 5a52b2b6b8..55da182f53 100644 --- a/include/prism/prettyprint.h +++ b/include/prism/prettyprint.h @@ -17,8 +17,8 @@ void pm_prettyprint(void); #include #include "prism/ast.h" +#include "prism/buffer.h" #include "prism/parser.h" -#include "prism/util/pm_buffer.h" /** * Pretty-prints the AST represented by the given node to the given buffer. diff --git a/include/prism/util/pm_integer.h b/include/prism/util/pm_integer.h index 304665e620..b50446deff 100644 --- a/include/prism/util/pm_integer.h +++ b/include/prism/util/pm_integer.h @@ -7,7 +7,7 @@ #define PRISM_NUMBER_H #include "prism/defines.h" -#include "prism/util/pm_buffer.h" +#include "prism/buffer.h" #include #include diff --git a/lib/prism/ffi.rb b/lib/prism/ffi.rb index 1bcbfc367c..72187fa490 100644 --- a/lib/prism/ffi.rb +++ b/lib/prism/ffi.rb @@ -106,7 +106,7 @@ def self.load_exported_functions_from(header, *functions, callbacks) ) load_exported_functions_from( - "prism/util/pm_buffer.h", + "prism/buffer.h", "pm_buffer_sizeof", "pm_buffer_init", "pm_buffer_value", diff --git a/prism.gemspec b/prism.gemspec index d8b86c6fba..2ad20a692d 100644 --- a/prism.gemspec +++ b/prism.gemspec @@ -47,6 +47,7 @@ Gem::Specification.new do |spec| "ext/prism/extension.h", "include/prism.h", "include/prism/ast.h", + "include/prism/buffer.h", "include/prism/debug_allocator.h", "include/prism/defines.h", "include/prism/diagnostic.h", @@ -59,7 +60,6 @@ Gem::Specification.new do |spec| "include/prism/regexp.h", "include/prism/static_literals.h", "include/prism/util/pm_arena.h", - "include/prism/util/pm_buffer.h", "include/prism/util/pm_char.h", "include/prism/util/pm_constant_pool.h", "include/prism/util/pm_integer.h", @@ -158,6 +158,7 @@ Gem::Specification.new do |spec| "sig/generated/prism/parse_result/comments.rbs", "sig/generated/prism/parse_result/errors.rbs", "sig/generated/prism/parse_result/newlines.rbs", + "src/buffer.c", "src/diagnostic.c", "src/encoding.c", "src/node.c", @@ -169,7 +170,6 @@ Gem::Specification.new do |spec| "src/static_literals.c", "src/token_type.c", "src/util/pm_arena.c", - "src/util/pm_buffer.c", "src/util/pm_char.c", "src/util/pm_constant_pool.c", "src/util/pm_integer.c", diff --git a/src/util/pm_buffer.c b/src/buffer.c similarity index 98% rename from src/util/pm_buffer.c rename to src/buffer.c index 9e392427c6..5d1f2a9118 100644 --- a/src/util/pm_buffer.c +++ b/src/buffer.c @@ -1,4 +1,10 @@ -#include "prism/util/pm_buffer.h" +#include "prism/buffer.h" + +#include +#include +#include +#include +#include /** * Return the size of the pm_buffer_t struct. diff --git a/src/regexp.c b/src/regexp.c index df8bb69b21..cc626290b7 100644 --- a/src/regexp.c +++ b/src/regexp.c @@ -1,6 +1,6 @@ +#include "prism/buffer.h" #include "prism/regexp.h" #include "prism/diagnostic.h" -#include "prism/util/pm_buffer.h" #include "prism/util/pm_strncasecmp.h" /** The maximum depth of nested groups allowed in a regular expression. */ From bda0f889b57e50ef5a90c7c29d00ae2812960d51 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 17 Mar 2026 09:08:16 -0400 Subject: [PATCH 002/100] Split up buffer headers between internal and external --- include/prism.h | 2 +- include/prism/buffer.h | 157 ----------------------------- include/prism/internal/buffer.h | 169 ++++++++++++++++++++++++++++++++ include/prism/node.h | 2 +- include/prism/util/pm_integer.h | 2 +- prism.gemspec | 1 + src/buffer.c | 2 +- src/regexp.c | 2 +- 8 files changed, 175 insertions(+), 162 deletions(-) create mode 100644 include/prism/internal/buffer.h diff --git a/include/prism.h b/include/prism.h index 7b81dd6513..73cc5bfc03 100644 --- a/include/prism.h +++ b/include/prism.h @@ -17,8 +17,8 @@ extern "C" { #include "prism/util/pm_memchr.h" #include "prism/util/pm_strncasecmp.h" #include "prism/util/pm_strpbrk.h" +#include "prism/internal/buffer.h" #include "prism/ast.h" -#include "prism/buffer.h" #include "prism/diagnostic.h" #include "prism/node.h" #include "prism/options.h" diff --git a/include/prism/buffer.h b/include/prism/buffer.h index e6e21b6895..335dae6811 100644 --- a/include/prism/buffer.h +++ b/include/prism/buffer.h @@ -31,15 +31,6 @@ typedef struct { */ PRISM_EXPORTED_FUNCTION size_t pm_buffer_sizeof(void); -/** - * Initialize a pm_buffer_t with the given capacity. - * - * @param buffer The buffer to initialize. - * @param capacity The capacity of the buffer. - * @returns True if the buffer was initialized successfully, false otherwise. - */ -bool pm_buffer_init_capacity(pm_buffer_t *buffer, size_t capacity); - /** * Initialize a pm_buffer_t with its default values. * @@ -70,154 +61,6 @@ PRISM_EXPORTED_FUNCTION char * pm_buffer_value(const pm_buffer_t *buffer); */ PRISM_EXPORTED_FUNCTION size_t pm_buffer_length(const pm_buffer_t *buffer); -/** - * Append the given amount of space as zeroes to the buffer. - * - * @param buffer The buffer to append to. - * @param length The amount of space to append and zero. - */ -void pm_buffer_append_zeroes(pm_buffer_t *buffer, size_t length); - -/** - * Append a formatted string to the buffer. - * - * @param buffer The buffer to append to. - * @param format The format string to append. - * @param ... The arguments to the format string. - */ -void pm_buffer_append_format(pm_buffer_t *buffer, const char *format, ...) PRISM_ATTRIBUTE_FORMAT(2, 3); - -/** - * Append a string to the buffer. - * - * @param buffer The buffer to append to. - * @param value The string to append. - * @param length The length of the string to append. - */ -void pm_buffer_append_string(pm_buffer_t *buffer, const char *value, size_t length); - -/** - * Append a list of bytes to the buffer. - * - * @param buffer The buffer to append to. - * @param value The bytes to append. - * @param length The length of the bytes to append. - */ -void pm_buffer_append_bytes(pm_buffer_t *buffer, const uint8_t *value, size_t length); - -/** - * Append a single byte to the buffer. - * - * @param buffer The buffer to append to. - * @param value The byte to append. - */ -void pm_buffer_append_byte(pm_buffer_t *buffer, uint8_t value); - -/** - * Append a 32-bit unsigned integer to the buffer as a variable-length integer. - * - * @param buffer The buffer to append to. - * @param value The integer to append. - */ -void pm_buffer_append_varuint(pm_buffer_t *buffer, uint32_t value); - -/** - * Append a 32-bit signed integer to the buffer as a variable-length integer. - * - * @param buffer The buffer to append to. - * @param value The integer to append. - */ -void pm_buffer_append_varsint(pm_buffer_t *buffer, int32_t value); - -/** - * Append a double to the buffer. - * - * @param buffer The buffer to append to. - * @param value The double to append. - */ -void pm_buffer_append_double(pm_buffer_t *buffer, double value); - -/** - * Append a unicode codepoint to the buffer. - * - * @param buffer The buffer to append to. - * @param value The character to append. - * @returns True if the codepoint was valid and appended successfully, false - * otherwise. - */ -bool pm_buffer_append_unicode_codepoint(pm_buffer_t *buffer, uint32_t value); - -/** - * The different types of escaping that can be performed by the buffer when - * appending a slice of Ruby source code. - */ -typedef enum { - PM_BUFFER_ESCAPING_RUBY, - PM_BUFFER_ESCAPING_JSON -} pm_buffer_escaping_t; - -/** - * Append a slice of source code to the buffer. - * - * @param buffer The buffer to append to. - * @param source The source code to append. - * @param length The length of the source code to append. - * @param escaping The type of escaping to perform. - */ -void pm_buffer_append_source(pm_buffer_t *buffer, const uint8_t *source, size_t length, pm_buffer_escaping_t escaping); - -/** - * Prepend the given string to the buffer. - * - * @param buffer The buffer to prepend to. - * @param value The string to prepend. - * @param length The length of the string to prepend. - */ -void pm_buffer_prepend_string(pm_buffer_t *buffer, const char *value, size_t length); - -/** - * Concatenate one buffer onto another. - * - * @param destination The buffer to concatenate onto. - * @param source The buffer to concatenate. - */ -void pm_buffer_concat(pm_buffer_t *destination, const pm_buffer_t *source); - -/** - * Clear the buffer by reducing its size to 0. This does not free the allocated - * memory, but it does allow the buffer to be reused. - * - * @param buffer The buffer to clear. - */ -void pm_buffer_clear(pm_buffer_t *buffer); - -/** - * Strip the whitespace from the end of the buffer. - * - * @param buffer The buffer to strip. - */ -void pm_buffer_rstrip(pm_buffer_t *buffer); - -/** - * Checks if the buffer includes the given value. - * - * @param buffer The buffer to check. - * @param value The value to check for. - * @returns The index of the first occurrence of the value in the buffer, or - * SIZE_MAX if the value is not found. - */ -size_t pm_buffer_index(const pm_buffer_t *buffer, char value); - -/** - * Insert the given string into the buffer at the given index. - * - * @param buffer The buffer to insert into. - * @param index The index to insert at. - * @param value The string to insert. - * @param length The length of the string to insert. - */ -void pm_buffer_insert(pm_buffer_t *buffer, size_t index, const char *value, size_t length); - /** * Free the memory associated with the buffer. * diff --git a/include/prism/internal/buffer.h b/include/prism/internal/buffer.h new file mode 100644 index 0000000000..b76cf54ca8 --- /dev/null +++ b/include/prism/internal/buffer.h @@ -0,0 +1,169 @@ +/** + * @file internal/buffer.h + * + * A wrapper around a contiguous block of allocated memory. + */ +#ifndef PRISM_INTERNAL_BUFFER_H +#define PRISM_INTERNAL_BUFFER_H + +#include "prism/defines.h" +#include "prism/buffer.h" + +/** + * Initialize a pm_buffer_t with the given capacity. + * + * @param buffer The buffer to initialize. + * @param capacity The capacity of the buffer. + * @returns True if the buffer was initialized successfully, false otherwise. + */ +bool pm_buffer_init_capacity(pm_buffer_t *buffer, size_t capacity); + +/** + * Append the given amount of space as zeroes to the buffer. + * + * @param buffer The buffer to append to. + * @param length The amount of space to append and zero. + */ +void pm_buffer_append_zeroes(pm_buffer_t *buffer, size_t length); + +/** + * Append a formatted string to the buffer. + * + * @param buffer The buffer to append to. + * @param format The format string to append. + * @param ... The arguments to the format string. + */ +void pm_buffer_append_format(pm_buffer_t *buffer, const char *format, ...) PRISM_ATTRIBUTE_FORMAT(2, 3); + +/** + * Append a string to the buffer. + * + * @param buffer The buffer to append to. + * @param value The string to append. + * @param length The length of the string to append. + */ +void pm_buffer_append_string(pm_buffer_t *buffer, const char *value, size_t length); + +/** + * Append a list of bytes to the buffer. + * + * @param buffer The buffer to append to. + * @param value The bytes to append. + * @param length The length of the bytes to append. + */ +void pm_buffer_append_bytes(pm_buffer_t *buffer, const uint8_t *value, size_t length); + +/** + * Append a single byte to the buffer. + * + * @param buffer The buffer to append to. + * @param value The byte to append. + */ +void pm_buffer_append_byte(pm_buffer_t *buffer, uint8_t value); + +/** + * Append a 32-bit unsigned integer to the buffer as a variable-length integer. + * + * @param buffer The buffer to append to. + * @param value The integer to append. + */ +void pm_buffer_append_varuint(pm_buffer_t *buffer, uint32_t value); + +/** + * Append a 32-bit signed integer to the buffer as a variable-length integer. + * + * @param buffer The buffer to append to. + * @param value The integer to append. + */ +void pm_buffer_append_varsint(pm_buffer_t *buffer, int32_t value); + +/** + * Append a double to the buffer. + * + * @param buffer The buffer to append to. + * @param value The double to append. + */ +void pm_buffer_append_double(pm_buffer_t *buffer, double value); + +/** + * Append a unicode codepoint to the buffer. + * + * @param buffer The buffer to append to. + * @param value The character to append. + * @returns True if the codepoint was valid and appended successfully, false + * otherwise. + */ +bool pm_buffer_append_unicode_codepoint(pm_buffer_t *buffer, uint32_t value); + +/** + * The different types of escaping that can be performed by the buffer when + * appending a slice of Ruby source code. + */ +typedef enum { + PM_BUFFER_ESCAPING_RUBY, + PM_BUFFER_ESCAPING_JSON +} pm_buffer_escaping_t; + +/** + * Append a slice of source code to the buffer. + * + * @param buffer The buffer to append to. + * @param source The source code to append. + * @param length The length of the source code to append. + * @param escaping The type of escaping to perform. + */ +void pm_buffer_append_source(pm_buffer_t *buffer, const uint8_t *source, size_t length, pm_buffer_escaping_t escaping); + +/** + * Prepend the given string to the buffer. + * + * @param buffer The buffer to prepend to. + * @param value The string to prepend. + * @param length The length of the string to prepend. + */ +void pm_buffer_prepend_string(pm_buffer_t *buffer, const char *value, size_t length); + +/** + * Concatenate one buffer onto another. + * + * @param destination The buffer to concatenate onto. + * @param source The buffer to concatenate. + */ +void pm_buffer_concat(pm_buffer_t *destination, const pm_buffer_t *source); + +/** + * Clear the buffer by reducing its size to 0. This does not free the allocated + * memory, but it does allow the buffer to be reused. + * + * @param buffer The buffer to clear. + */ +void pm_buffer_clear(pm_buffer_t *buffer); + +/** + * Strip the whitespace from the end of the buffer. + * + * @param buffer The buffer to strip. + */ +void pm_buffer_rstrip(pm_buffer_t *buffer); + +/** + * Checks if the buffer includes the given value. + * + * @param buffer The buffer to check. + * @param value The value to check for. + * @returns The index of the first occurrence of the value in the buffer, or + * SIZE_MAX if the value is not found. + */ +size_t pm_buffer_index(const pm_buffer_t *buffer, char value); + +/** + * Insert the given string into the buffer at the given index. + * + * @param buffer The buffer to insert into. + * @param index The index to insert at. + * @param value The string to insert. + * @param length The length of the string to insert. + */ +void pm_buffer_insert(pm_buffer_t *buffer, size_t index, const char *value, size_t length); + +#endif diff --git a/include/prism/node.h b/include/prism/node.h index a4ead730bc..560936dc08 100644 --- a/include/prism/node.h +++ b/include/prism/node.h @@ -7,7 +7,7 @@ #define PRISM_NODE_H #include "prism/defines.h" -#include "prism/buffer.h" +#include "prism/internal/buffer.h" #include "prism/parser.h" /** diff --git a/include/prism/util/pm_integer.h b/include/prism/util/pm_integer.h index b50446deff..673db20419 100644 --- a/include/prism/util/pm_integer.h +++ b/include/prism/util/pm_integer.h @@ -7,7 +7,7 @@ #define PRISM_NUMBER_H #include "prism/defines.h" -#include "prism/buffer.h" +#include "prism/internal/buffer.h" #include #include diff --git a/prism.gemspec b/prism.gemspec index 2ad20a692d..7d6ea6ea08 100644 --- a/prism.gemspec +++ b/prism.gemspec @@ -59,6 +59,7 @@ Gem::Specification.new do |spec| "include/prism/prettyprint.h", "include/prism/regexp.h", "include/prism/static_literals.h", + "include/prism/internal/buffer.h", "include/prism/util/pm_arena.h", "include/prism/util/pm_char.h", "include/prism/util/pm_constant_pool.h", diff --git a/src/buffer.c b/src/buffer.c index 5d1f2a9118..111e02ea06 100644 --- a/src/buffer.c +++ b/src/buffer.c @@ -1,4 +1,4 @@ -#include "prism/buffer.h" +#include "prism/internal/buffer.h" #include #include diff --git a/src/regexp.c b/src/regexp.c index cc626290b7..5fc5b983d4 100644 --- a/src/regexp.c +++ b/src/regexp.c @@ -1,4 +1,4 @@ -#include "prism/buffer.h" +#include "prism/internal/buffer.h" #include "prism/regexp.h" #include "prism/diagnostic.h" #include "prism/util/pm_strncasecmp.h" From 1039dff6ca5747b67e7ef97bbf22a5f08d9efac3 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 17 Mar 2026 09:21:05 -0400 Subject: [PATCH 003/100] Only include what is necessary for buffer --- include/prism/allocator.h | 85 +++++++++++++++++++++++++++++++++ include/prism/buffer.h | 6 ++- include/prism/defines.h | 39 ++------------- include/prism/exported.h | 26 ++++++++++ include/prism/format.h | 28 +++++++++++ include/prism/internal/buffer.h | 4 +- src/buffer.c | 7 +-- 7 files changed, 154 insertions(+), 41 deletions(-) create mode 100644 include/prism/allocator.h create mode 100644 include/prism/exported.h create mode 100644 include/prism/format.h diff --git a/include/prism/allocator.h b/include/prism/allocator.h new file mode 100644 index 0000000000..6b1c26e84e --- /dev/null +++ b/include/prism/allocator.h @@ -0,0 +1,85 @@ +/** + * @file allocator.h + * + * Macro definitions for defining the main and a custom allocator for Prism. + */ +#ifndef PRISM_ALLOCATOR_H +#define PRISM_ALLOCATOR_H + +/** + * If you build Prism with a custom allocator, configure it with + * "-D PRISM_XALLOCATOR" to use your own allocator that defines xmalloc, + * xrealloc, xcalloc, and xfree. + * + * For example, your `prism_xallocator.h` file could look like this: + * + * ``` + * #ifndef PRISM_XALLOCATOR_H + * #define PRISM_XALLOCATOR_H + * #define xmalloc my_malloc + * #define xrealloc my_realloc + * #define xcalloc my_calloc + * #define xfree my_free + * #define xrealloc_sized my_realloc_sized // (optional) + * #define xfree_sized my_free_sized // (optional) + * #endif + * ``` + */ +#ifdef PRISM_XALLOCATOR + #include "prism_xallocator.h" +#else + #ifndef xmalloc + /** + * The malloc function that should be used. This can be overridden with + * the PRISM_XALLOCATOR define. + */ + #define xmalloc malloc + #endif + + #ifndef xrealloc + /** + * The realloc function that should be used. This can be overridden with + * the PRISM_XALLOCATOR define. + */ + #define xrealloc realloc + #endif + + #ifndef xcalloc + /** + * The calloc function that should be used. This can be overridden with + * the PRISM_XALLOCATOR define. + */ + #define xcalloc calloc + #endif + + #ifndef xfree + /** + * The free function that should be used. This can be overridden with + * the PRISM_XALLOCATOR define. + */ + #define xfree free + #endif +#endif + +#ifndef xfree_sized + /** + * The free_sized function that should be used. This can be overridden with + * the PRISM_XALLOCATOR define. If not defined, defaults to calling xfree. + */ + #define xfree_sized(p, s) xfree(((void)(s), (p))) +#endif + +#ifndef xrealloc_sized + /** + * The xrealloc_sized function that should be used. This can be overridden + * with the PRISM_XALLOCATOR define. If not defined, defaults to calling + * xrealloc. + */ + #define xrealloc_sized(p, ns, os) xrealloc((p), ((void)(os), (ns))) +#endif + +#ifdef PRISM_BUILD_DEBUG + #include "prism/debug_allocator.h" +#endif + +#endif diff --git a/include/prism/buffer.h b/include/prism/buffer.h index 335dae6811..ffc07461fc 100644 --- a/include/prism/buffer.h +++ b/include/prism/buffer.h @@ -6,8 +6,10 @@ #ifndef PRISM_BUFFER_H #define PRISM_BUFFER_H -#include "prism/defines.h" -#include "prism/util/pm_char.h" +#include "prism/exported.h" + +#include +#include /** * A pm_buffer_t is a simple memory buffer that stores data in a contiguous diff --git a/include/prism/defines.h b/include/prism/defines.h index d666582b17..c533ba4ffe 100644 --- a/include/prism/defines.h +++ b/include/prism/defines.h @@ -9,6 +9,10 @@ #ifndef PRISM_DEFINES_H #define PRISM_DEFINES_H +#include "prism/allocator.h" +#include "prism/exported.h" +#include "prism/format.h" + #include #include #include @@ -37,41 +41,6 @@ #define PRISM_DEPTH_MAXIMUM 10000 #endif -/** - * By default, we compile with -fvisibility=hidden. When this is enabled, we - * need to mark certain functions as being publically-visible. This macro does - * that in a compiler-agnostic way. - */ -#ifndef PRISM_EXPORTED_FUNCTION -# ifdef PRISM_EXPORT_SYMBOLS -# ifdef _WIN32 -# define PRISM_EXPORTED_FUNCTION __declspec(dllexport) extern -# else -# define PRISM_EXPORTED_FUNCTION __attribute__((__visibility__("default"))) extern -# endif -# else -# define PRISM_EXPORTED_FUNCTION -# endif -#endif - -/** - * Certain compilers support specifying that a function accepts variadic - * parameters that look like printf format strings to provide a better developer - * experience when someone is using the function. This macro does that in a - * compiler-agnostic way. - */ -#if defined(__GNUC__) -# if defined(__MINGW_PRINTF_FORMAT) -# define PRISM_ATTRIBUTE_FORMAT(string_index, argument_index) __attribute__((format(__MINGW_PRINTF_FORMAT, string_index, argument_index))) -# else -# define PRISM_ATTRIBUTE_FORMAT(string_index, argument_index) __attribute__((format(printf, string_index, argument_index))) -# endif -#elif defined(__clang__) -# define PRISM_ATTRIBUTE_FORMAT(string_index, argument_index) __attribute__((__format__(__printf__, string_index, argument_index))) -#else -# define PRISM_ATTRIBUTE_FORMAT(string_index, argument_index) -#endif - /** * GCC will warn if you specify a function or parameter that is unused at * runtime. This macro allows you to mark a function or parameter as unused in a diff --git a/include/prism/exported.h b/include/prism/exported.h new file mode 100644 index 0000000000..74476b0efb --- /dev/null +++ b/include/prism/exported.h @@ -0,0 +1,26 @@ +/** + * @file exported.h + * + * Macro definitions for make functions publically visible. + */ +#ifndef PRISM_EXPORTED_H +#define PRISM_EXPORTED_H + +/** + * By default, we compile with -fvisibility=hidden. When this is enabled, we + * need to mark certain functions as being publically-visible. This macro does + * that in a compiler-agnostic way. + */ +#ifndef PRISM_EXPORTED_FUNCTION +# ifdef PRISM_EXPORT_SYMBOLS +# ifdef _WIN32 +# define PRISM_EXPORTED_FUNCTION __declspec(dllexport) extern +# else +# define PRISM_EXPORTED_FUNCTION __attribute__((__visibility__("default"))) extern +# endif +# else +# define PRISM_EXPORTED_FUNCTION +# endif +#endif + +#endif diff --git a/include/prism/format.h b/include/prism/format.h new file mode 100644 index 0000000000..973fc3fe4e --- /dev/null +++ b/include/prism/format.h @@ -0,0 +1,28 @@ +/** + * @file format.h + * + * Macro definition for specifying that a function accepts variadic parameters + * that look like printf format strings. + */ +#ifndef PRISM_FORMAT_H +#define PRISM_FORMAT_H + +/** + * Certain compilers support specifying that a function accepts variadic + * parameters that look like printf format strings to provide a better developer + * experience when someone is using the function. This macro does that in a + * compiler-agnostic way. + */ +#if defined(__GNUC__) +# if defined(__MINGW_PRINTF_FORMAT) +# define PRISM_ATTRIBUTE_FORMAT(string_index_, argument_index_) __attribute__((format(__MINGW_PRINTF_FORMAT, string_index_, argument_index_))) +# else +# define PRISM_ATTRIBUTE_FORMAT(string_index_, argument_index_) __attribute__((format(printf, string_index_, argument_index_))) +# endif +#elif defined(__clang__) +# define PRISM_ATTRIBUTE_FORMAT(string_index_, argument_index_) __attribute__((__format__(__printf__, string_index_, argument_index_))) +#else +# define PRISM_ATTRIBUTE_FORMAT(string_index_, argument_index_) +#endif + +#endif diff --git a/include/prism/internal/buffer.h b/include/prism/internal/buffer.h index b76cf54ca8..a5c46ed530 100644 --- a/include/prism/internal/buffer.h +++ b/include/prism/internal/buffer.h @@ -6,8 +6,10 @@ #ifndef PRISM_INTERNAL_BUFFER_H #define PRISM_INTERNAL_BUFFER_H -#include "prism/defines.h" #include "prism/buffer.h" +#include "prism/format.h" + +#include /** * Initialize a pm_buffer_t with the given capacity. diff --git a/src/buffer.c b/src/buffer.c index 111e02ea06..dbbb05c7ef 100644 --- a/src/buffer.c +++ b/src/buffer.c @@ -1,8 +1,9 @@ #include "prism/internal/buffer.h" +#include "prism/allocator.h" +#include "prism/util/pm_char.h" -#include -#include -#include +#include +#include #include #include From e4df04157a1e28bfd02d2c5f492ba6d0907c7ce9 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 17 Mar 2026 09:26:07 -0400 Subject: [PATCH 004/100] Move strncasecmp out of utils and make internal only --- include/prism.h | 2 +- include/prism/encoding.h | 2 +- .../{util/pm_strncasecmp.h => internal/strncasecmp.h} | 9 +++------ prism.gemspec | 4 ++-- src/regexp.c | 2 +- src/{util/pm_strncasecmp.c => strncasecmp.c} | 5 ++--- 6 files changed, 10 insertions(+), 14 deletions(-) rename include/prism/{util/pm_strncasecmp.h => internal/strncasecmp.h} (87%) rename src/{util/pm_strncasecmp.c => strncasecmp.c} (94%) diff --git a/include/prism.h b/include/prism.h index 73cc5bfc03..50451f45ed 100644 --- a/include/prism.h +++ b/include/prism.h @@ -15,9 +15,9 @@ extern "C" { #include "prism/util/pm_char.h" #include "prism/util/pm_integer.h" #include "prism/util/pm_memchr.h" -#include "prism/util/pm_strncasecmp.h" #include "prism/util/pm_strpbrk.h" #include "prism/internal/buffer.h" +#include "prism/internal/strncasecmp.h" #include "prism/ast.h" #include "prism/diagnostic.h" #include "prism/node.h" diff --git a/include/prism/encoding.h b/include/prism/encoding.h index 5f7724821f..99c2397bdc 100644 --- a/include/prism/encoding.h +++ b/include/prism/encoding.h @@ -7,7 +7,7 @@ #define PRISM_ENCODING_H #include "prism/defines.h" -#include "prism/util/pm_strncasecmp.h" +#include "prism/internal/strncasecmp.h" #include #include diff --git a/include/prism/util/pm_strncasecmp.h b/include/prism/internal/strncasecmp.h similarity index 87% rename from include/prism/util/pm_strncasecmp.h rename to include/prism/internal/strncasecmp.h index 5cb88cb5eb..c6cabe9c23 100644 --- a/include/prism/util/pm_strncasecmp.h +++ b/include/prism/internal/strncasecmp.h @@ -1,14 +1,11 @@ /** - * @file pm_strncasecmp.h + * @file internal/strncasecmp.h * * A custom strncasecmp implementation. */ -#ifndef PRISM_STRNCASECMP_H -#define PRISM_STRNCASECMP_H +#ifndef PRISM_INTERNAL_STRNCASECMP_H +#define PRISM_INTERNAL_STRNCASECMP_H -#include "prism/defines.h" - -#include #include #include diff --git a/prism.gemspec b/prism.gemspec index 7d6ea6ea08..10a0a6bffe 100644 --- a/prism.gemspec +++ b/prism.gemspec @@ -60,6 +60,7 @@ Gem::Specification.new do |spec| "include/prism/regexp.h", "include/prism/static_literals.h", "include/prism/internal/buffer.h", + "include/prism/internal/strncasecmp.h", "include/prism/util/pm_arena.h", "include/prism/util/pm_char.h", "include/prism/util/pm_constant_pool.h", @@ -67,7 +68,6 @@ Gem::Specification.new do |spec| "include/prism/util/pm_list.h", "include/prism/util/pm_memchr.h", "include/prism/util/pm_line_offset_list.h", - "include/prism/util/pm_strncasecmp.h", "include/prism/util/pm_string.h", "include/prism/util/pm_strpbrk.h", "include/prism/version.h", @@ -169,6 +169,7 @@ Gem::Specification.new do |spec| "src/regexp.c", "src/serialize.c", "src/static_literals.c", + "src/strncasecmp.c", "src/token_type.c", "src/util/pm_arena.c", "src/util/pm_char.c", @@ -178,7 +179,6 @@ Gem::Specification.new do |spec| "src/util/pm_memchr.c", "src/util/pm_line_offset_list.c", "src/util/pm_string.c", - "src/util/pm_strncasecmp.c", "src/util/pm_strpbrk.c" ] diff --git a/src/regexp.c b/src/regexp.c index 5fc5b983d4..115b3ed7e1 100644 --- a/src/regexp.c +++ b/src/regexp.c @@ -1,7 +1,7 @@ #include "prism/internal/buffer.h" +#include "prism/internal/strncasecmp.h" #include "prism/regexp.h" #include "prism/diagnostic.h" -#include "prism/util/pm_strncasecmp.h" /** The maximum depth of nested groups allowed in a regular expression. */ #define PM_REGEXP_PARSE_DEPTH_MAX 4096 diff --git a/src/util/pm_strncasecmp.c b/src/strncasecmp.c similarity index 94% rename from src/util/pm_strncasecmp.c rename to src/strncasecmp.c index 3f58421554..c4e2214ee1 100644 --- a/src/util/pm_strncasecmp.c +++ b/src/strncasecmp.c @@ -1,11 +1,10 @@ -#include "prism/util/pm_strncasecmp.h" +#include "prism/internal/strncasecmp.h" /** * A locale-insensitive version of `tolower(3)` */ static inline int -pm_tolower(int c) -{ +pm_tolower(int c) { if ('A' <= c && c <= 'Z') { return c | 0x20; } From cdf8bed9ba36b8d1bea693e8155961a4f8a3118e Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 17 Mar 2026 09:30:12 -0400 Subject: [PATCH 005/100] Make memchr implementation internal --- include/prism.h | 2 +- .../prism/{util/pm_memchr.h => internal/memchr.h} | 7 +++---- include/prism/regexp.h | 2 +- prism.gemspec | 4 ++-- src/{util/pm_memchr.c => memchr.c} | 12 +++++++----- 5 files changed, 14 insertions(+), 13 deletions(-) rename include/prism/{util/pm_memchr.h => internal/memchr.h} (89%) rename src/{util/pm_memchr.c => memchr.c} (85%) diff --git a/include/prism.h b/include/prism.h index 50451f45ed..9d18c8ccb2 100644 --- a/include/prism.h +++ b/include/prism.h @@ -14,9 +14,9 @@ extern "C" { #include "prism/util/pm_arena.h" #include "prism/util/pm_char.h" #include "prism/util/pm_integer.h" -#include "prism/util/pm_memchr.h" #include "prism/util/pm_strpbrk.h" #include "prism/internal/buffer.h" +#include "prism/internal/memchr.h" #include "prism/internal/strncasecmp.h" #include "prism/ast.h" #include "prism/diagnostic.h" diff --git a/include/prism/util/pm_memchr.h b/include/prism/internal/memchr.h similarity index 89% rename from include/prism/util/pm_memchr.h rename to include/prism/internal/memchr.h index e0671eaed3..7277971681 100644 --- a/include/prism/util/pm_memchr.h +++ b/include/prism/internal/memchr.h @@ -1,12 +1,11 @@ /** - * @file pm_memchr.h + * @file internal/memchr.h * * A custom memchr implementation. */ -#ifndef PRISM_MEMCHR_H -#define PRISM_MEMCHR_H +#ifndef PRISM_INTERNAL_MEMCHR_H +#define PRISM_INTERNAL_MEMCHR_H -#include "prism/defines.h" #include "prism/encoding.h" #include diff --git a/include/prism/regexp.h b/include/prism/regexp.h index b3e739b457..60a84b9838 100644 --- a/include/prism/regexp.h +++ b/include/prism/regexp.h @@ -9,7 +9,7 @@ #include "prism/defines.h" #include "prism/parser.h" #include "prism/encoding.h" -#include "prism/util/pm_memchr.h" +#include "prism/internal/memchr.h" #include "prism/util/pm_string.h" #include diff --git a/prism.gemspec b/prism.gemspec index 10a0a6bffe..43bb030c98 100644 --- a/prism.gemspec +++ b/prism.gemspec @@ -60,13 +60,13 @@ Gem::Specification.new do |spec| "include/prism/regexp.h", "include/prism/static_literals.h", "include/prism/internal/buffer.h", + "include/prism/internal/memchr.h", "include/prism/internal/strncasecmp.h", "include/prism/util/pm_arena.h", "include/prism/util/pm_char.h", "include/prism/util/pm_constant_pool.h", "include/prism/util/pm_integer.h", "include/prism/util/pm_list.h", - "include/prism/util/pm_memchr.h", "include/prism/util/pm_line_offset_list.h", "include/prism/util/pm_string.h", "include/prism/util/pm_strpbrk.h", @@ -162,6 +162,7 @@ Gem::Specification.new do |spec| "src/buffer.c", "src/diagnostic.c", "src/encoding.c", + "src/memchr.c", "src/node.c", "src/options.c", "src/prettyprint.c", @@ -176,7 +177,6 @@ Gem::Specification.new do |spec| "src/util/pm_constant_pool.c", "src/util/pm_integer.c", "src/util/pm_list.c", - "src/util/pm_memchr.c", "src/util/pm_line_offset_list.c", "src/util/pm_string.c", "src/util/pm_strpbrk.c" diff --git a/src/util/pm_memchr.c b/src/memchr.c similarity index 85% rename from src/util/pm_memchr.c rename to src/memchr.c index 7ea20ace6d..6266d4ca7a 100644 --- a/src/util/pm_memchr.c +++ b/src/memchr.c @@ -1,6 +1,10 @@ -#include "prism/util/pm_memchr.h" +#include "prism/internal/memchr.h" -#define PRISM_MEMCHR_TRAILING_BYTE_MINIMUM 0x40 +#include +#include +#include + +#define TRAILING_BYTE_MINIMUM 0x40 /** * We need to roll our own memchr to handle cases where the encoding changes and @@ -9,7 +13,7 @@ */ void * pm_memchr(const void *memory, int character, size_t number, bool encoding_changed, const pm_encoding_t *encoding) { - if (encoding_changed && encoding->multibyte && character >= PRISM_MEMCHR_TRAILING_BYTE_MINIMUM) { + if (encoding_changed && encoding->multibyte && character >= TRAILING_BYTE_MINIMUM) { const uint8_t *source = (const uint8_t *) memory; size_t index = 0; @@ -31,5 +35,3 @@ pm_memchr(const void *memory, int character, size_t number, bool encoding_change return memchr(memory, character, number); } } - -#undef PRISM_MEMCHR_TRAILING_BYTE_MINIMUM From 0862b48f0c1fb81fa353f8e4b06996cb9fbbe764 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 17 Mar 2026 09:32:29 -0400 Subject: [PATCH 006/100] Remove unnecessary PRISM_EXPORTED_FUNCTION from source files --- src/options.c | 34 ++++++++++++++++----------------- src/prism.c | 26 ++++++++++++------------- src/regexp.c | 2 +- src/util/pm_integer.c | 4 ++-- src/util/pm_list.c | 6 +++--- src/util/pm_string.c | 12 ++++++------ templates/src/node.c.erb | 8 ++++---- templates/src/prettyprint.c.erb | 2 +- templates/src/serialize.c.erb | 4 ++-- templates/src/token_type.c.erb | 2 +- 10 files changed, 50 insertions(+), 50 deletions(-) diff --git a/src/options.c b/src/options.c index 961d52330f..ecd88a8021 100644 --- a/src/options.c +++ b/src/options.c @@ -3,7 +3,7 @@ /** * Set the shebang callback option on the given options struct. */ -PRISM_EXPORTED_FUNCTION void +void pm_options_shebang_callback_set(pm_options_t *options, pm_options_shebang_callback_t shebang_callback, void *shebang_callback_data) { options->shebang_callback = shebang_callback; options->shebang_callback_data = shebang_callback_data; @@ -12,7 +12,7 @@ pm_options_shebang_callback_set(pm_options_t *options, pm_options_shebang_callba /** * Set the filepath option on the given options struct. */ -PRISM_EXPORTED_FUNCTION void +void pm_options_filepath_set(pm_options_t *options, const char *filepath) { pm_string_constant_init(&options->filepath, filepath, strlen(filepath)); } @@ -20,7 +20,7 @@ pm_options_filepath_set(pm_options_t *options, const char *filepath) { /** * Set the encoding option on the given options struct. */ -PRISM_EXPORTED_FUNCTION void +void pm_options_encoding_set(pm_options_t *options, const char *encoding) { pm_string_constant_init(&options->encoding, encoding, strlen(encoding)); } @@ -28,7 +28,7 @@ pm_options_encoding_set(pm_options_t *options, const char *encoding) { /** * Set the encoding_locked option on the given options struct. */ -PRISM_EXPORTED_FUNCTION void +void pm_options_encoding_locked_set(pm_options_t *options, bool encoding_locked) { options->encoding_locked = encoding_locked; } @@ -36,7 +36,7 @@ pm_options_encoding_locked_set(pm_options_t *options, bool encoding_locked) { /** * Set the line option on the given options struct. */ -PRISM_EXPORTED_FUNCTION void +void pm_options_line_set(pm_options_t *options, int32_t line) { options->line = line; } @@ -44,7 +44,7 @@ pm_options_line_set(pm_options_t *options, int32_t line) { /** * Set the frozen string literal option on the given options struct. */ -PRISM_EXPORTED_FUNCTION void +void pm_options_frozen_string_literal_set(pm_options_t *options, bool frozen_string_literal) { options->frozen_string_literal = frozen_string_literal ? PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED : PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED; } @@ -52,7 +52,7 @@ pm_options_frozen_string_literal_set(pm_options_t *options, bool frozen_string_l /** * Sets the command line option on the given options struct. */ -PRISM_EXPORTED_FUNCTION void +void pm_options_command_line_set(pm_options_t *options, uint8_t command_line) { options->command_line = command_line; } @@ -70,7 +70,7 @@ is_number(const char *string, size_t length) { * string. If the string contains an invalid option, this returns false. * Otherwise, it returns true. */ -PRISM_EXPORTED_FUNCTION bool +bool pm_options_version_set(pm_options_t *options, const char *version, size_t length) { if (version == NULL) { options->version = PM_OPTIONS_VERSION_LATEST; @@ -136,7 +136,7 @@ pm_options_version_set(pm_options_t *options, const char *version, size_t length /** * Set the main script option on the given options struct. */ -PRISM_EXPORTED_FUNCTION void +void pm_options_main_script_set(pm_options_t *options, bool main_script) { options->main_script = main_script; } @@ -144,7 +144,7 @@ pm_options_main_script_set(pm_options_t *options, bool main_script) { /** * Set the partial script option on the given options struct. */ -PRISM_EXPORTED_FUNCTION void +void pm_options_partial_script_set(pm_options_t *options, bool partial_script) { options->partial_script = partial_script; } @@ -152,7 +152,7 @@ pm_options_partial_script_set(pm_options_t *options, bool partial_script) { /** * Set the freeze option on the given options struct. */ -PRISM_EXPORTED_FUNCTION void +void pm_options_freeze_set(pm_options_t *options, bool freeze) { options->freeze = freeze; } @@ -168,7 +168,7 @@ pm_options_freeze_set(pm_options_t *options, bool freeze) { /** * Allocate and zero out the scopes array on the given options struct. */ -PRISM_EXPORTED_FUNCTION bool +bool pm_options_scopes_init(pm_options_t *options, size_t scopes_count) { options->scopes_count = scopes_count; options->scopes = xcalloc(scopes_count, sizeof(pm_options_scope_t)); @@ -178,7 +178,7 @@ pm_options_scopes_init(pm_options_t *options, size_t scopes_count) { /** * Return a pointer to the scope at the given index within the given options. */ -PRISM_EXPORTED_FUNCTION const pm_options_scope_t * +const pm_options_scope_t * pm_options_scope_get(const pm_options_t *options, size_t index) { return &options->scopes[index]; } @@ -187,7 +187,7 @@ pm_options_scope_get(const pm_options_t *options, size_t index) { * Create a new options scope struct. This will hold a set of locals that are in * scope surrounding the code that is being parsed. */ -PRISM_EXPORTED_FUNCTION bool +bool pm_options_scope_init(pm_options_scope_t *scope, size_t locals_count) { scope->locals_count = locals_count; scope->locals = xcalloc(locals_count, sizeof(pm_string_t)); @@ -198,7 +198,7 @@ pm_options_scope_init(pm_options_scope_t *scope, size_t locals_count) { /** * Return a pointer to the local at the given index within the given scope. */ -PRISM_EXPORTED_FUNCTION const pm_string_t * +const pm_string_t * pm_options_scope_local_get(const pm_options_scope_t *scope, size_t index) { return &scope->locals[index]; } @@ -206,7 +206,7 @@ pm_options_scope_local_get(const pm_options_scope_t *scope, size_t index) { /** * Set the forwarding option on the given scope struct. */ -PRISM_EXPORTED_FUNCTION void +void pm_options_scope_forwarding_set(pm_options_scope_t *scope, uint8_t forwarding) { scope->forwarding = forwarding; } @@ -214,7 +214,7 @@ pm_options_scope_forwarding_set(pm_options_scope_t *scope, uint8_t forwarding) { /** * Free the internal memory associated with the options. */ -PRISM_EXPORTED_FUNCTION void +void pm_options_free(pm_options_t *options) { pm_string_free(&options->filepath); pm_string_free(&options->encoding); diff --git a/src/prism.c b/src/prism.c index dc7cbef2d4..8dfaf085eb 100644 --- a/src/prism.c +++ b/src/prism.c @@ -22140,7 +22140,7 @@ pm_parser_init_shebang(pm_parser_t *parser, const pm_options_t *options, const c /** * Initialize a parser with the given start and end pointers. */ -PRISM_EXPORTED_FUNCTION void +void pm_parser_init(pm_arena_t *arena, pm_parser_t *parser, const uint8_t *source, size_t size, const pm_options_t *options) { assert(arena != NULL); assert(source != NULL); @@ -22401,7 +22401,7 @@ pm_parser_init(pm_arena_t *arena, pm_parser_t *parser, const uint8_t *source, si * Register a callback that will be called whenever prism changes the encoding * it is using to parse based on the magic comment. */ -PRISM_EXPORTED_FUNCTION void +void pm_parser_register_encoding_changed_callback(pm_parser_t *parser, pm_encoding_changed_callback_t callback) { parser->encoding_changed_callback = callback; } @@ -22409,7 +22409,7 @@ pm_parser_register_encoding_changed_callback(pm_parser_t *parser, pm_encoding_ch /** * Free any memory associated with the given parser. */ -PRISM_EXPORTED_FUNCTION void +void pm_parser_free(pm_parser_t *parser) { pm_string_free(&parser->filepath); pm_arena_free(&parser->metadata_arena); @@ -22592,7 +22592,7 @@ pm_parse_continuable(pm_parser_t *parser) { /** * Parse the Ruby source associated with the given parser and return the tree. */ -PRISM_EXPORTED_FUNCTION pm_node_t * +pm_node_t * pm_parse(pm_parser_t *parser) { pm_node_t *node = parse_program(parser); pm_parse_continuable(parser); @@ -22659,7 +22659,7 @@ pm_parse_stream_read(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t * Prism is designed around having the entire source in memory at once, but you * can stream stdin in to Ruby so we need to support a streaming API. */ -PRISM_EXPORTED_FUNCTION pm_node_t * +pm_node_t * pm_parse_stream(pm_arena_t *arena, pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const pm_options_t *options) { pm_buffer_init(buffer); @@ -22683,7 +22683,7 @@ pm_parse_stream(pm_arena_t *arena, pm_parser_t *parser, pm_buffer_t *buffer, voi /** * Parse the source and return true if it parses without errors or warnings. */ -PRISM_EXPORTED_FUNCTION bool +bool pm_parse_success_p(const uint8_t *source, size_t size, const char *data) { pm_options_t options = { 0 }; pm_options_read(&options, data); @@ -22724,7 +22724,7 @@ pm_serialize_header(pm_buffer_t *buffer) { /** * Serialize the AST represented by the given node to the given buffer. */ -PRISM_EXPORTED_FUNCTION void +void pm_serialize(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) { pm_serialize_header(buffer); pm_serialize_content(parser, node, buffer); @@ -22735,7 +22735,7 @@ pm_serialize(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) { * Parse and serialize the AST represented by the given source to the given * buffer. */ -PRISM_EXPORTED_FUNCTION void +void pm_serialize_parse(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) { pm_options_t options = { 0 }; pm_options_read(&options, data); @@ -22759,7 +22759,7 @@ pm_serialize_parse(pm_buffer_t *buffer, const uint8_t *source, size_t size, cons * Parse and serialize the AST represented by the source that is read out of the * given stream into to the given buffer. */ -PRISM_EXPORTED_FUNCTION void +void pm_serialize_parse_stream(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const char *data) { pm_arena_t arena = { 0 }; pm_parser_t parser; @@ -22781,7 +22781,7 @@ pm_serialize_parse_stream(pm_buffer_t *buffer, void *stream, pm_parse_stream_fge /** * Parse and serialize the comments in the given source to the given buffer. */ -PRISM_EXPORTED_FUNCTION void +void pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) { pm_options_t options = { 0 }; pm_options_read(&options, data); @@ -22887,7 +22887,7 @@ pm_slice_type(const uint8_t *source, size_t length, const char *encoding_name) { /** * Check that the slice is a valid local variable name. */ -PRISM_EXPORTED_FUNCTION pm_string_query_t +pm_string_query_t pm_string_query_local(const uint8_t *source, size_t length, const char *encoding_name) { switch (pm_slice_type(source, length, encoding_name)) { case PM_SLICE_TYPE_ERROR: @@ -22907,7 +22907,7 @@ pm_string_query_local(const uint8_t *source, size_t length, const char *encoding /** * Check that the slice is a valid constant name. */ -PRISM_EXPORTED_FUNCTION pm_string_query_t +pm_string_query_t pm_string_query_constant(const uint8_t *source, size_t length, const char *encoding_name) { switch (pm_slice_type(source, length, encoding_name)) { case PM_SLICE_TYPE_ERROR: @@ -22927,7 +22927,7 @@ pm_string_query_constant(const uint8_t *source, size_t length, const char *encod /** * Check that the slice is a valid method name. */ -PRISM_EXPORTED_FUNCTION pm_string_query_t +pm_string_query_t pm_string_query_method_name(const uint8_t *source, size_t length, const char *encoding_name) { #define B(p) ((p) ? PM_STRING_QUERY_TRUE : PM_STRING_QUERY_FALSE) #define C1(c) (*source == c) diff --git a/src/regexp.c b/src/regexp.c index 115b3ed7e1..33a546dde8 100644 --- a/src/regexp.c +++ b/src/regexp.c @@ -1595,7 +1595,7 @@ pm_regexp_validate_encoding(pm_regexp_parser_t *parser, bool ascii_only, pm_node * extraction walks the unescaped content since escape sequences in group names * (e.g., line continuations) have already been processed by the lexer. */ -PRISM_EXPORTED_FUNCTION pm_node_flags_t +pm_node_flags_t pm_regexp_parse(pm_parser_t *parser, pm_regular_expression_node_t *node, pm_regexp_name_callback_t name_callback, pm_regexp_name_data_t *name_data) { const uint8_t *source = parser->start + node->content_loc.start; size_t size = node->content_loc.length; diff --git a/src/util/pm_integer.c b/src/util/pm_integer.c index 2b77a4b5d2..8e3404d03b 100644 --- a/src/util/pm_integer.c +++ b/src/util/pm_integer.c @@ -604,7 +604,7 @@ void pm_integers_reduce(pm_integer_t *numerator, pm_integer_t *denominator) { /** * Convert an integer to a decimal string. */ -PRISM_EXPORTED_FUNCTION void +void pm_integer_string(pm_buffer_t *buffer, const pm_integer_t *integer) { if (integer->negative) { pm_buffer_append_byte(buffer, '-'); @@ -663,7 +663,7 @@ pm_integer_string(pm_buffer_t *buffer, const pm_integer_t *integer) { * Free the internal memory of an integer. This memory will only be allocated if * the integer exceeds the size of a single uint32_t. */ -PRISM_EXPORTED_FUNCTION void +void pm_integer_free(pm_integer_t *integer) { if (integer->values) { xfree(integer->values); diff --git a/src/util/pm_list.c b/src/util/pm_list.c index 940baffb64..48486d6cf9 100644 --- a/src/util/pm_list.c +++ b/src/util/pm_list.c @@ -3,7 +3,7 @@ /** * Returns true if the given list is empty. */ -PRISM_EXPORTED_FUNCTION bool +bool pm_list_empty_p(pm_list_t *list) { return list->head == NULL; } @@ -11,7 +11,7 @@ pm_list_empty_p(pm_list_t *list) { /** * Returns the size of the list. */ -PRISM_EXPORTED_FUNCTION size_t +size_t pm_list_size(pm_list_t *list) { return list->size; } @@ -34,7 +34,7 @@ pm_list_append(pm_list_t *list, pm_list_node_t *node) { /** * Deallocate the internal state of the given list. */ -PRISM_EXPORTED_FUNCTION void +void pm_list_free(pm_list_t *list) { pm_list_node_t *node = list->head; pm_list_node_t *next; diff --git a/src/util/pm_string.c b/src/util/pm_string.c index 5ba8c78ec1..c2c85e1614 100644 --- a/src/util/pm_string.c +++ b/src/util/pm_string.c @@ -6,7 +6,7 @@ static const uint8_t empty_source[] = ""; * Returns the size of the pm_string_t struct. This is necessary to allocate the * correct amount of memory in the FFI backend. */ -PRISM_EXPORTED_FUNCTION size_t +size_t pm_string_sizeof(void) { return sizeof(pm_string_t); } @@ -117,7 +117,7 @@ pm_string_file_handle_close(pm_string_file_handle_t *handle) { * `MapViewOfFile`, on POSIX systems that have access to `mmap` we'll use * `mmap`, and on other POSIX systems we'll use `read`. */ -PRISM_EXPORTED_FUNCTION pm_string_init_result_t +pm_string_init_result_t pm_string_mapped_init(pm_string_t *string, const char *filepath) { #ifdef _WIN32 // Open the file for reading. @@ -207,7 +207,7 @@ pm_string_mapped_init(pm_string_t *string, const char *filepath) { * contents and size into the given `pm_string_t`. The given `pm_string_t` * should be freed using `pm_string_free` when it is no longer used. */ -PRISM_EXPORTED_FUNCTION pm_string_init_result_t +pm_string_init_result_t pm_string_file_init(pm_string_t *string, const char *filepath) { #ifdef _WIN32 // Open the file for reading. @@ -348,7 +348,7 @@ pm_string_compare(const pm_string_t *left, const pm_string_t *right) { /** * Returns the length associated with the string. */ -PRISM_EXPORTED_FUNCTION size_t +size_t pm_string_length(const pm_string_t *string) { return string->length; } @@ -356,7 +356,7 @@ pm_string_length(const pm_string_t *string) { /** * Returns the start pointer associated with the string. */ -PRISM_EXPORTED_FUNCTION const uint8_t * +const uint8_t * pm_string_source(const pm_string_t *string) { return string->source; } @@ -364,7 +364,7 @@ pm_string_source(const pm_string_t *string) { /** * Free the associated memory of the given string. */ -PRISM_EXPORTED_FUNCTION void +void pm_string_free(pm_string_t *string) { void *memory = (void *) string->source; diff --git a/templates/src/node.c.erb b/templates/src/node.c.erb index 93ea275a54..f7a2089d30 100644 --- a/templates/src/node.c.erb +++ b/templates/src/node.c.erb @@ -74,7 +74,7 @@ pm_node_list_concat(pm_arena_t *arena, pm_node_list_t *list, pm_node_list_t *oth /** * Returns a string representation of the given node type. */ -PRISM_EXPORTED_FUNCTION const char * +const char * pm_node_type_to_str(pm_node_type_t node_type) { switch (node_type) { @@ -94,7 +94,7 @@ pm_node_type_to_str(pm_node_type_t node_type) * pointer and is passed to the visitor callback for consumers to use as they * see fit. */ -PRISM_EXPORTED_FUNCTION void +void pm_visit_node(const pm_node_t *node, bool (*visitor)(const pm_node_t *node, void *data), void *data) { if (visitor(node, data)) pm_visit_child_nodes(node, visitor, data); } @@ -104,7 +104,7 @@ pm_visit_node(const pm_node_t *node, bool (*visitor)(const pm_node_t *node, void * default behavior for walking the tree that is called from pm_visit_node if * the callback returns true. */ -PRISM_EXPORTED_FUNCTION void +void pm_visit_child_nodes(const pm_node_t *node, bool (*visitor)(const pm_node_t *node, void *data), void *data) { switch (PM_NODE_TYPE(node)) { <%- nodes.each do |node| -%> @@ -161,7 +161,7 @@ pm_dump_json_location(pm_buffer_t *buffer, const pm_location_t *location) { /** * Dump JSON to the given buffer. */ -PRISM_EXPORTED_FUNCTION void +void pm_dump_json(pm_buffer_t *buffer, const pm_parser_t *parser, const pm_node_t *node) { switch (PM_NODE_TYPE(node)) { <%- nodes.each do |node| -%> diff --git a/templates/src/prettyprint.c.erb b/templates/src/prettyprint.c.erb index 44423ca42b..05386336ab 100644 --- a/templates/src/prettyprint.c.erb +++ b/templates/src/prettyprint.c.erb @@ -156,7 +156,7 @@ prettyprint_node(pm_buffer_t *output_buffer, const pm_parser_t *parser, const pm /** * Pretty-prints the AST represented by the given node to the given buffer. */ -PRISM_EXPORTED_FUNCTION void +void pm_prettyprint(pm_buffer_t *output_buffer, const pm_parser_t *parser, const pm_node_t *node) { pm_buffer_t prefix_buffer = { 0 }; prettyprint_node(output_buffer, parser, node, &prefix_buffer); diff --git a/templates/src/serialize.c.erb b/templates/src/serialize.c.erb index 78e4f34893..f6da95f031 100644 --- a/templates/src/serialize.c.erb +++ b/templates/src/serialize.c.erb @@ -310,7 +310,7 @@ serialize_token(void *data, pm_parser_t *parser, pm_token_t *token) { /** * Lex the given source and serialize to the given buffer. */ -PRISM_EXPORTED_FUNCTION void +void pm_serialize_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) { pm_options_t options = { 0 }; pm_options_read(&options, data); @@ -341,7 +341,7 @@ pm_serialize_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, const * Parse and serialize both the AST and the tokens represented by the given * source to the given buffer. */ -PRISM_EXPORTED_FUNCTION void +void pm_serialize_parse_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) { pm_options_t options = { 0 }; pm_options_read(&options, data); diff --git a/templates/src/token_type.c.erb b/templates/src/token_type.c.erb index 94e41ec4ba..c08a0e7921 100644 --- a/templates/src/token_type.c.erb +++ b/templates/src/token_type.c.erb @@ -5,7 +5,7 @@ /** * Returns a string representation of the given token type. */ -PRISM_EXPORTED_FUNCTION const char * +const char * pm_token_type_name(pm_token_type_t token_type) { switch (token_type) { <%- tokens.each do |token| -%> From cdc9b97f4d420146a3c78ad0eeac3e9a7fcdda48 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 17 Mar 2026 09:37:48 -0400 Subject: [PATCH 007/100] Move integer out of utils --- include/prism.h | 2 +- include/prism/{util/pm_integer.h => integer.h} | 6 +++--- prism.gemspec | 4 ++-- src/{util/pm_integer.c => integer.c} | 2 +- templates/include/prism/ast.h.erb | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) rename include/prism/{util/pm_integer.h => integer.h} (98%) rename src/{util/pm_integer.c => integer.c} (99%) diff --git a/include/prism.h b/include/prism.h index 9d18c8ccb2..f9e410bab7 100644 --- a/include/prism.h +++ b/include/prism.h @@ -13,13 +13,13 @@ extern "C" { #include "prism/defines.h" #include "prism/util/pm_arena.h" #include "prism/util/pm_char.h" -#include "prism/util/pm_integer.h" #include "prism/util/pm_strpbrk.h" #include "prism/internal/buffer.h" #include "prism/internal/memchr.h" #include "prism/internal/strncasecmp.h" #include "prism/ast.h" #include "prism/diagnostic.h" +#include "prism/integer.h" #include "prism/node.h" #include "prism/options.h" #include "prism/parser.h" diff --git a/include/prism/util/pm_integer.h b/include/prism/integer.h similarity index 98% rename from include/prism/util/pm_integer.h rename to include/prism/integer.h index 673db20419..dddf29ce10 100644 --- a/include/prism/util/pm_integer.h +++ b/include/prism/integer.h @@ -1,10 +1,10 @@ /** - * @file pm_integer.h + * @file integer.h * * This module provides functions for working with arbitrary-sized integers. */ -#ifndef PRISM_NUMBER_H -#define PRISM_NUMBER_H +#ifndef PRISM_INTEGER_H +#define PRISM_INTEGER_H #include "prism/defines.h" #include "prism/internal/buffer.h" diff --git a/prism.gemspec b/prism.gemspec index 43bb030c98..ac0811a180 100644 --- a/prism.gemspec +++ b/prism.gemspec @@ -52,6 +52,7 @@ Gem::Specification.new do |spec| "include/prism/defines.h", "include/prism/diagnostic.h", "include/prism/encoding.h", + "include/prism/integer.h", "include/prism/node.h", "include/prism/node_new.h", "include/prism/options.h", @@ -65,7 +66,6 @@ Gem::Specification.new do |spec| "include/prism/util/pm_arena.h", "include/prism/util/pm_char.h", "include/prism/util/pm_constant_pool.h", - "include/prism/util/pm_integer.h", "include/prism/util/pm_list.h", "include/prism/util/pm_line_offset_list.h", "include/prism/util/pm_string.h", @@ -162,6 +162,7 @@ Gem::Specification.new do |spec| "src/buffer.c", "src/diagnostic.c", "src/encoding.c", + "src/integer.c", "src/memchr.c", "src/node.c", "src/options.c", @@ -175,7 +176,6 @@ Gem::Specification.new do |spec| "src/util/pm_arena.c", "src/util/pm_char.c", "src/util/pm_constant_pool.c", - "src/util/pm_integer.c", "src/util/pm_list.c", "src/util/pm_line_offset_list.c", "src/util/pm_string.c", diff --git a/src/util/pm_integer.c b/src/integer.c similarity index 99% rename from src/util/pm_integer.c rename to src/integer.c index 8e3404d03b..fd27188109 100644 --- a/src/util/pm_integer.c +++ b/src/integer.c @@ -1,4 +1,4 @@ -#include "prism/util/pm_integer.h" +#include "prism/integer.h" /** * Pull out the length and values from the integer, regardless of the form in diff --git a/templates/include/prism/ast.h.erb b/templates/include/prism/ast.h.erb index 0612341772..2026d3a6c6 100644 --- a/templates/include/prism/ast.h.erb +++ b/templates/include/prism/ast.h.erb @@ -10,8 +10,8 @@ #include "prism/defines.h" #include "prism/util/pm_constant_pool.h" -#include "prism/util/pm_integer.h" #include "prism/util/pm_string.h" +#include "prism/integer.h" #include #include From 16fd2bf08adb3ce85bbba09e005db0ab93221d03 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 17 Mar 2026 09:58:18 -0400 Subject: [PATCH 008/100] Split up integer between public and private headers --- include/prism.h | 1 - include/prism/integer.h | 91 +------------------------------- include/prism/internal/integer.h | 87 ++++++++++++++++++++++++++++++ include/prism/static_literals.h | 1 + prism.gemspec | 4 ++ src/integer.c | 33 +++++++----- src/prism.c | 2 + src/static_literals.c | 2 + templates/src/node.c.erb | 1 + templates/src/prettyprint.c.erb | 2 + 10 files changed, 121 insertions(+), 103 deletions(-) create mode 100644 include/prism/internal/integer.h diff --git a/include/prism.h b/include/prism.h index f9e410bab7..cb4439e77a 100644 --- a/include/prism.h +++ b/include/prism.h @@ -19,7 +19,6 @@ extern "C" { #include "prism/internal/strncasecmp.h" #include "prism/ast.h" #include "prism/diagnostic.h" -#include "prism/integer.h" #include "prism/node.h" #include "prism/options.h" #include "prism/parser.h" diff --git a/include/prism/integer.h b/include/prism/integer.h index dddf29ce10..9285986885 100644 --- a/include/prism/integer.h +++ b/include/prism/integer.h @@ -6,13 +6,9 @@ #ifndef PRISM_INTEGER_H #define PRISM_INTEGER_H -#include "prism/defines.h" -#include "prism/internal/buffer.h" - -#include #include +#include #include -#include /** * A structure represents an arbitrary-sized integer. @@ -42,89 +38,4 @@ typedef struct { bool negative; } pm_integer_t; -/** - * An enum controlling the base of an integer. It is expected that the base is - * already known before parsing the integer, even though it could be derived - * from the string itself. - */ -typedef enum { - /** The default decimal base, with no prefix. Leading 0s will be ignored. */ - PM_INTEGER_BASE_DEFAULT, - - /** The binary base, indicated by a 0b or 0B prefix. */ - PM_INTEGER_BASE_BINARY, - - /** The octal base, indicated by a 0, 0o, or 0O prefix. */ - PM_INTEGER_BASE_OCTAL, - - /** The decimal base, indicated by a 0d, 0D, or empty prefix. */ - PM_INTEGER_BASE_DECIMAL, - - /** The hexadecimal base, indicated by a 0x or 0X prefix. */ - PM_INTEGER_BASE_HEXADECIMAL, - - /** - * An unknown base, in which case pm_integer_parse will derive it based on - * the content of the string. This is less efficient and does more - * comparisons, so if callers know the base ahead of time, they should use - * that instead. - */ - PM_INTEGER_BASE_UNKNOWN -} pm_integer_base_t; - -/** - * Parse an integer from a string. This assumes that the format of the integer - * has already been validated, as internal validation checks are not performed - * here. - * - * @param integer The integer to parse into. - * @param base The base of the integer. - * @param start The start of the string. - * @param end The end of the string. - */ -void pm_integer_parse(pm_integer_t *integer, pm_integer_base_t base, const uint8_t *start, const uint8_t *end); - -/** - * Compare two integers. This function returns -1 if the left integer is less - * than the right integer, 0 if they are equal, and 1 if the left integer is - * greater than the right integer. - * - * @param left The left integer to compare. - * @param right The right integer to compare. - * @return The result of the comparison. - */ -int pm_integer_compare(const pm_integer_t *left, const pm_integer_t *right); - -/** - * Reduce a ratio of integers to its simplest form. - * - * If either the numerator or denominator do not fit into a 32-bit integer, then - * this function is a no-op. In the future, we may consider reducing even the - * larger numbers, but for now we're going to keep it simple. - * - * @param numerator The numerator of the ratio. - * @param denominator The denominator of the ratio. - */ -void pm_integers_reduce(pm_integer_t *numerator, pm_integer_t *denominator); - -/** - * Convert an integer to a decimal string. - * - * @param buffer The buffer to append the string to. - * @param integer The integer to convert to a string. - * - * \public \memberof pm_integer_t - */ -PRISM_EXPORTED_FUNCTION void pm_integer_string(pm_buffer_t *buffer, const pm_integer_t *integer); - -/** - * Free the internal memory of an integer. This memory will only be allocated if - * the integer exceeds the size of a single node in the linked list. - * - * @param integer The integer to free. - * - * \public \memberof pm_integer_t - */ -PRISM_EXPORTED_FUNCTION void pm_integer_free(pm_integer_t *integer); - #endif diff --git a/include/prism/internal/integer.h b/include/prism/internal/integer.h new file mode 100644 index 0000000000..8bf21ae69d --- /dev/null +++ b/include/prism/internal/integer.h @@ -0,0 +1,87 @@ +/** + * @file internal/integer.h + * + * This module provides functions for working with arbitrary-sized integers. + */ +#ifndef PRISM_INTERNAL_INTEGER_H +#define PRISM_INTERNAL_INTEGER_H + +#include "prism/buffer.h" +#include "prism/integer.h" + +#include + +/** + * An enum controlling the base of an integer. It is expected that the base is + * already known before parsing the integer, even though it could be derived + * from the string itself. + */ +typedef enum { + /** The default decimal base, with no prefix. Leading 0s will be ignored. */ + PM_INTEGER_BASE_DEFAULT, + + /** The binary base, indicated by a 0b or 0B prefix. */ + PM_INTEGER_BASE_BINARY, + + /** The octal base, indicated by a 0, 0o, or 0O prefix. */ + PM_INTEGER_BASE_OCTAL, + + /** The decimal base, indicated by a 0d, 0D, or empty prefix. */ + PM_INTEGER_BASE_DECIMAL, + + /** The hexadecimal base, indicated by a 0x or 0X prefix. */ + PM_INTEGER_BASE_HEXADECIMAL, + + /** + * An unknown base, in which case pm_integer_parse will derive it based on + * the content of the string. This is less efficient and does more + * comparisons, so if callers know the base ahead of time, they should use + * that instead. + */ + PM_INTEGER_BASE_UNKNOWN +} pm_integer_base_t; + +/** + * Parse an integer from a string. This assumes that the format of the integer + * has already been validated, as internal validation checks are not performed + * here. + * + * @param integer The integer to parse into. + * @param base The base of the integer. + * @param start The start of the string. + * @param end The end of the string. + */ +void pm_integer_parse(pm_integer_t *integer, pm_integer_base_t base, const uint8_t *start, const uint8_t *end); + +/** + * Compare two integers. This function returns -1 if the left integer is less + * than the right integer, 0 if they are equal, and 1 if the left integer is + * greater than the right integer. + * + * @param left The left integer to compare. + * @param right The right integer to compare. + * @return The result of the comparison. + */ +int pm_integer_compare(const pm_integer_t *left, const pm_integer_t *right); + +/** + * Reduce a ratio of integers to its simplest form. + * + * If either the numerator or denominator do not fit into a 32-bit integer, then + * this function is a no-op. In the future, we may consider reducing even the + * larger numbers, but for now we're going to keep it simple. + * + * @param numerator The numerator of the ratio. + * @param denominator The denominator of the ratio. + */ +void pm_integers_reduce(pm_integer_t *numerator, pm_integer_t *denominator); + +/** + * Convert an integer to a decimal string. + * + * @param buffer The buffer to append the string to. + * @param integer The integer to convert to a string. + */ +void pm_integer_string(pm_buffer_t *buffer, const pm_integer_t *integer); + +#endif diff --git a/include/prism/static_literals.h b/include/prism/static_literals.h index 6d73e5d04f..0e8ddbcfc6 100644 --- a/include/prism/static_literals.h +++ b/include/prism/static_literals.h @@ -8,6 +8,7 @@ #include "prism/defines.h" #include "prism/ast.h" +#include "prism/buffer.h" #include "prism/util/pm_line_offset_list.h" #include diff --git a/prism.gemspec b/prism.gemspec index ac0811a180..f3923eeda4 100644 --- a/prism.gemspec +++ b/prism.gemspec @@ -46,12 +46,15 @@ Gem::Specification.new do |spec| "ext/prism/extension.c", "ext/prism/extension.h", "include/prism.h", + "include/prism/allocator.h", "include/prism/ast.h", "include/prism/buffer.h", "include/prism/debug_allocator.h", "include/prism/defines.h", "include/prism/diagnostic.h", "include/prism/encoding.h", + "include/prism/exported.h", + "include/prism/format.h", "include/prism/integer.h", "include/prism/node.h", "include/prism/node_new.h", @@ -61,6 +64,7 @@ Gem::Specification.new do |spec| "include/prism/regexp.h", "include/prism/static_literals.h", "include/prism/internal/buffer.h", + "include/prism/internal/integer.h", "include/prism/internal/memchr.h", "include/prism/internal/strncasecmp.h", "include/prism/util/pm_arena.h", diff --git a/src/integer.c b/src/integer.c index fd27188109..35dd5e0eea 100644 --- a/src/integer.c +++ b/src/integer.c @@ -1,4 +1,24 @@ -#include "prism/integer.h" +#include "prism/allocator.h" +#include "prism/internal/buffer.h" +#include "prism/internal/integer.h" + +#include +#include +#include +#include +#include +#include + +/** + * Free the internal memory of an integer. This memory will only be allocated if + * the integer exceeds the size of a single uint32_t. + */ +static void +pm_integer_free(pm_integer_t *integer) { + if (integer->values) { + xfree(integer->values); + } +} /** * Pull out the length and values from the integer, regardless of the form in @@ -658,14 +678,3 @@ pm_integer_string(pm_buffer_t *buffer, const pm_integer_t *integer) { xfree_sized(digits, sizeof(char) * digits_length); pm_integer_free(&converted); } - -/** - * Free the internal memory of an integer. This memory will only be allocated if - * the integer exceeds the size of a single uint32_t. - */ -void -pm_integer_free(pm_integer_t *integer) { - if (integer->values) { - xfree(integer->values); - } -} diff --git a/src/prism.c b/src/prism.c index 8dfaf085eb..fdf3bacb42 100644 --- a/src/prism.c +++ b/src/prism.c @@ -1,3 +1,5 @@ +#include "prism/internal/integer.h" + #include "prism.h" #include "prism/node_new.h" diff --git a/src/static_literals.c b/src/static_literals.c index f3a5650d31..e6d66cd691 100644 --- a/src/static_literals.c +++ b/src/static_literals.c @@ -1,3 +1,5 @@ +#include "prism/internal/buffer.h" +#include "prism/internal/integer.h" #include "prism/static_literals.h" /** diff --git a/templates/src/node.c.erb b/templates/src/node.c.erb index f7a2089d30..30e2d6fc58 100644 --- a/templates/src/node.c.erb +++ b/templates/src/node.c.erb @@ -1,4 +1,5 @@ #line <%= __LINE__ + 1 %> "prism/templates/src/<%= File.basename(__FILE__) %>" +#include "prism/internal/integer.h" #include "prism/node.h" /** diff --git a/templates/src/prettyprint.c.erb b/templates/src/prettyprint.c.erb index 05386336ab..aefb092df2 100644 --- a/templates/src/prettyprint.c.erb +++ b/templates/src/prettyprint.c.erb @@ -1,4 +1,6 @@ <%# encoding: ASCII -%> +#include "prism/internal/buffer.h" +#include "prism/internal/integer.h" #include "prism/prettyprint.h" // We optionally support pretty printing nodes. For systems that don't want or From 5966ab67f3057f9ffd3c4133845a154d9ceed7a7 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 17 Mar 2026 10:04:11 -0400 Subject: [PATCH 009/100] Move line offset list out of util --- .../prism/{util/pm_line_offset_list.h => line_offset_list.h} | 2 +- include/prism/parser.h | 2 +- include/prism/static_literals.h | 2 +- include/prism/util/pm_char.h | 2 +- prism.gemspec | 4 ++-- src/{util/pm_line_offset_list.c => line_offset_list.c} | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) rename include/prism/{util/pm_line_offset_list.h => line_offset_list.h} (99%) rename src/{util/pm_line_offset_list.c => line_offset_list.c} (98%) diff --git a/include/prism/util/pm_line_offset_list.h b/include/prism/line_offset_list.h similarity index 99% rename from include/prism/util/pm_line_offset_list.h rename to include/prism/line_offset_list.h index 62a52da4ec..53e697ed86 100644 --- a/include/prism/util/pm_line_offset_list.h +++ b/include/prism/line_offset_list.h @@ -1,5 +1,5 @@ /** - * @file pm_line_offset_list.h + * @file line_offset_list.h * * A list of byte offsets of newlines in a string. * diff --git a/include/prism/parser.h b/include/prism/parser.h index 66df791244..1d8f28fff6 100644 --- a/include/prism/parser.h +++ b/include/prism/parser.h @@ -9,12 +9,12 @@ #include "prism/defines.h" #include "prism/ast.h" #include "prism/encoding.h" +#include "prism/line_offset_list.h" #include "prism/options.h" #include "prism/static_literals.h" #include "prism/util/pm_arena.h" #include "prism/util/pm_constant_pool.h" #include "prism/util/pm_list.h" -#include "prism/util/pm_line_offset_list.h" #include "prism/util/pm_string.h" #include diff --git a/include/prism/static_literals.h b/include/prism/static_literals.h index 0e8ddbcfc6..cdb00e6a83 100644 --- a/include/prism/static_literals.h +++ b/include/prism/static_literals.h @@ -9,7 +9,7 @@ #include "prism/defines.h" #include "prism/ast.h" #include "prism/buffer.h" -#include "prism/util/pm_line_offset_list.h" +#include "prism/line_offset_list.h" #include #include diff --git a/include/prism/util/pm_char.h b/include/prism/util/pm_char.h index 516390b21c..0be57d5a52 100644 --- a/include/prism/util/pm_char.h +++ b/include/prism/util/pm_char.h @@ -7,7 +7,7 @@ #define PRISM_CHAR_H #include "prism/defines.h" -#include "prism/util/pm_line_offset_list.h" +#include "prism/line_offset_list.h" #include #include diff --git a/prism.gemspec b/prism.gemspec index f3923eeda4..302fe17aca 100644 --- a/prism.gemspec +++ b/prism.gemspec @@ -56,6 +56,7 @@ Gem::Specification.new do |spec| "include/prism/exported.h", "include/prism/format.h", "include/prism/integer.h", + "include/prism/line_offset_list.h", "include/prism/node.h", "include/prism/node_new.h", "include/prism/options.h", @@ -71,7 +72,6 @@ Gem::Specification.new do |spec| "include/prism/util/pm_char.h", "include/prism/util/pm_constant_pool.h", "include/prism/util/pm_list.h", - "include/prism/util/pm_line_offset_list.h", "include/prism/util/pm_string.h", "include/prism/util/pm_strpbrk.h", "include/prism/version.h", @@ -167,6 +167,7 @@ Gem::Specification.new do |spec| "src/diagnostic.c", "src/encoding.c", "src/integer.c", + "src/line_offset_list.c", "src/memchr.c", "src/node.c", "src/options.c", @@ -181,7 +182,6 @@ Gem::Specification.new do |spec| "src/util/pm_char.c", "src/util/pm_constant_pool.c", "src/util/pm_list.c", - "src/util/pm_line_offset_list.c", "src/util/pm_string.c", "src/util/pm_strpbrk.c" ] diff --git a/src/util/pm_line_offset_list.c b/src/line_offset_list.c similarity index 98% rename from src/util/pm_line_offset_list.c rename to src/line_offset_list.c index 0648901e29..8513fdb46e 100644 --- a/src/util/pm_line_offset_list.c +++ b/src/line_offset_list.c @@ -1,4 +1,4 @@ -#include "prism/util/pm_line_offset_list.h" +#include "prism/line_offset_list.h" /** * Initialize a new line offset list with the given capacity. From 58586902b19d358ee104cc76c753738beaa29bae Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 17 Mar 2026 10:14:08 -0400 Subject: [PATCH 010/100] Split up line offset list into public and internal --- include/prism/align.h | 38 ++++++++++++ include/prism/defines.h | 45 +------------- include/prism/force_inline.h | 21 +++++++ include/prism/internal/line_offset_list.h | 73 +++++++++++++++++++++++ include/prism/line_offset_list.h | 59 +----------------- include/prism/util/pm_char.h | 1 + prism.gemspec | 3 + src/buffer.c | 1 + src/line_offset_list.c | 6 +- src/prism.c | 1 + src/util/pm_char.c | 1 + 11 files changed, 148 insertions(+), 101 deletions(-) create mode 100644 include/prism/align.h create mode 100644 include/prism/force_inline.h create mode 100644 include/prism/internal/line_offset_list.h diff --git a/include/prism/align.h b/include/prism/align.h new file mode 100644 index 0000000000..bb120d488e --- /dev/null +++ b/include/prism/align.h @@ -0,0 +1,38 @@ +/** + * @file align.h + * + * Alignment macros used throughout the prism library. + */ +#ifndef PRISM_ALIGN_H +#define PRISM_ALIGN_H + +/** + * Compiler-agnostic macros for specifying alignment of types and variables. + */ +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L /* C11 or later */ + /** Specify alignment for a type or variable. */ + #define PRISM_ALIGNAS _Alignas + + /** Get the alignment requirement of a type. */ + #define PRISM_ALIGNOF _Alignof +#elif defined(__GNUC__) || defined(__clang__) + /** Specify alignment for a type or variable. */ + #define PRISM_ALIGNAS(size) __attribute__((aligned(size))) + + /** Get the alignment requirement of a type. */ + #define PRISM_ALIGNOF(type) __alignof__(type) +#elif defined(_MSC_VER) + /** Specify alignment for a type or variable. */ + #define PRISM_ALIGNAS(size) __declspec(align(size)) + + /** Get the alignment requirement of a type. */ + #define PRISM_ALIGNOF(type) __alignof(type) +#else + /** Void because this platform does not support specifying alignment. */ + #define PRISM_ALIGNAS(size) + + /** Fallback to sizeof as alignment requirement of a type. */ + #define PRISM_ALIGNOF(type) sizeof(type) +#endif + +#endif diff --git a/include/prism/defines.h b/include/prism/defines.h index c533ba4ffe..fcfc8b84a1 100644 --- a/include/prism/defines.h +++ b/include/prism/defines.h @@ -9,8 +9,10 @@ #ifndef PRISM_DEFINES_H #define PRISM_DEFINES_H +#include "prism/align.h" #include "prism/allocator.h" #include "prism/exported.h" +#include "prism/force_inline.h" #include "prism/format.h" #include @@ -60,18 +62,6 @@ # define inline __inline #endif -/** - * Force a function to be inlined at every call site. Use sparingly — only for - * small, hot functions where the compiler's heuristics fail to inline. - */ -#if defined(_MSC_VER) -# define PRISM_FORCE_INLINE __forceinline -#elif defined(__GNUC__) || defined(__clang__) -# define PRISM_FORCE_INLINE inline __attribute__((always_inline)) -#else -# define PRISM_FORCE_INLINE inline -#endif - /** * Old Visual Studio versions before 2015 do not implement sprintf, but instead * implement _snprintf. We standard that here. @@ -314,35 +304,4 @@ #define PM_FLEX_ARY_LEN 1 /* data[1] */ #endif -/** - * We need to align nodes in the AST to a pointer boundary so that it can be - * safely cast to different node types. Use PRISM_ALIGNAS/PRISM_ALIGNOF to - * specify alignment in a compiler-agnostic way. - */ -#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L /* C11 or later */ - /** Specify alignment for a type or variable. */ - #define PRISM_ALIGNAS _Alignas - - /** Get the alignment requirement of a type. */ - #define PRISM_ALIGNOF _Alignof -#elif defined(__GNUC__) || defined(__clang__) - /** Specify alignment for a type or variable. */ - #define PRISM_ALIGNAS(size) __attribute__((aligned(size))) - - /** Get the alignment requirement of a type. */ - #define PRISM_ALIGNOF(type) __alignof__(type) -#elif defined(_MSC_VER) - /** Specify alignment for a type or variable. */ - #define PRISM_ALIGNAS(size) __declspec(align(size)) - - /** Get the alignment requirement of a type. */ - #define PRISM_ALIGNOF(type) __alignof(type) -#else - /** Void because this platform does not support specifying alignment. */ - #define PRISM_ALIGNAS(size) - - /** Fallback to sizeof as alignment requirement of a type. */ - #define PRISM_ALIGNOF(type) sizeof(type) -#endif - #endif diff --git a/include/prism/force_inline.h b/include/prism/force_inline.h new file mode 100644 index 0000000000..c205e5ddf6 --- /dev/null +++ b/include/prism/force_inline.h @@ -0,0 +1,21 @@ +/** + * @file force_inline.h + * + * Macro definitions for forcing a function to be inlined at every call site. + */ +#ifndef PRISM_FORCE_INLINE_H +#define PRISM_FORCE_INLINE_H + +/** + * Force a function to be inlined at every call site. Use sparingly — only for + * small, hot functions where the compiler's heuristics fail to inline. + */ +#if defined(_MSC_VER) +# define PRISM_FORCE_INLINE __forceinline +#elif defined(__GNUC__) || defined(__clang__) +# define PRISM_FORCE_INLINE inline __attribute__((always_inline)) +#else +# define PRISM_FORCE_INLINE inline +#endif + +#endif diff --git a/include/prism/internal/line_offset_list.h b/include/prism/internal/line_offset_list.h new file mode 100644 index 0000000000..4b7d9fc1a6 --- /dev/null +++ b/include/prism/internal/line_offset_list.h @@ -0,0 +1,73 @@ +/** + * @file internal/line_offset_list.h + * + * A list of byte offsets of newlines in a string. + * + * When compiling the syntax tree, it's necessary to know the line and column + * of many nodes. This is necessary to support things like error messages, + * tracepoints, etc. + * + * It's possible that we could store the start line, start column, end line, and + * end column on every node in addition to the offsets that we already store, + * but that would be quite a lot of memory overhead. + */ +#ifndef PRISM_INTERNAL_LINE_OFFSET_LIST_H +#define PRISM_INTERNAL_LINE_OFFSET_LIST_H + +#include "prism/line_offset_list.h" +#include "prism/force_inline.h" +#include "prism/util/pm_arena.h" + +/** + * Initialize a new line offset list with the given capacity. + * + * @param arena The arena to allocate from. + * @param list The list to initialize. + * @param capacity The initial capacity of the list. + */ +void pm_line_offset_list_init(pm_arena_t *arena, pm_line_offset_list_t *list, size_t capacity); + +/** + * Clear out the offsets that have been appended to the list. + * + * @param list The list to clear. + */ +void pm_line_offset_list_clear(pm_line_offset_list_t *list); + +/** + * Append a new offset to the list (slow path with resize). + * + * @param arena The arena to allocate from. + * @param list The list to append to. + * @param cursor The offset to append. + */ +void pm_line_offset_list_append_slow(pm_arena_t *arena, pm_line_offset_list_t *list, uint32_t cursor); + +/** + * Append a new offset to the list. + * + * @param arena The arena to allocate from. + * @param list The list to append to. + * @param cursor The offset to append. + */ +static PRISM_FORCE_INLINE void +pm_line_offset_list_append(pm_arena_t *arena, pm_line_offset_list_t *list, uint32_t cursor) { + if (list->size < list->capacity) { + list->offsets[list->size++] = cursor; + } else { + pm_line_offset_list_append_slow(arena, list, cursor); + } +} + +/** + * Returns the line of the given offset. If the offset is not in the list, the + * line of the closest offset less than the given offset is returned. + * + * @param list The list to search. + * @param cursor The offset to search for. + * @param start_line The line to start counting from. + * @return The line of the given offset. + */ +int32_t pm_line_offset_list_line(const pm_line_offset_list_t *list, uint32_t cursor, int32_t start_line); + +#endif diff --git a/include/prism/line_offset_list.h b/include/prism/line_offset_list.h index 53e697ed86..33bdd03aaa 100644 --- a/include/prism/line_offset_list.h +++ b/include/prism/line_offset_list.h @@ -14,13 +14,10 @@ #ifndef PRISM_LINE_OFFSET_LIST_H #define PRISM_LINE_OFFSET_LIST_H -#include "prism/defines.h" -#include "prism/util/pm_arena.h" +#include "prism/exported.h" -#include -#include #include -#include +#include /** * A list of offsets of the start of lines in a string. The offsets are assumed @@ -48,58 +45,6 @@ typedef struct { uint32_t column; } pm_line_column_t; -/** - * Initialize a new line offset list with the given capacity. - * - * @param arena The arena to allocate from. - * @param list The list to initialize. - * @param capacity The initial capacity of the list. - */ -void pm_line_offset_list_init(pm_arena_t *arena, pm_line_offset_list_t *list, size_t capacity); - -/** - * Clear out the offsets that have been appended to the list. - * - * @param list The list to clear. - */ -void pm_line_offset_list_clear(pm_line_offset_list_t *list); - -/** - * Append a new offset to the list (slow path with resize). - * - * @param arena The arena to allocate from. - * @param list The list to append to. - * @param cursor The offset to append. - */ -void pm_line_offset_list_append_slow(pm_arena_t *arena, pm_line_offset_list_t *list, uint32_t cursor); - -/** - * Append a new offset to the list. - * - * @param arena The arena to allocate from. - * @param list The list to append to. - * @param cursor The offset to append. - */ -static PRISM_FORCE_INLINE void -pm_line_offset_list_append(pm_arena_t *arena, pm_line_offset_list_t *list, uint32_t cursor) { - if (list->size < list->capacity) { - list->offsets[list->size++] = cursor; - } else { - pm_line_offset_list_append_slow(arena, list, cursor); - } -} - -/** - * Returns the line of the given offset. If the offset is not in the list, the - * line of the closest offset less than the given offset is returned. - * - * @param list The list to search. - * @param cursor The offset to search for. - * @param start_line The line to start counting from. - * @return The line of the given offset. - */ -int32_t pm_line_offset_list_line(const pm_line_offset_list_t *list, uint32_t cursor, int32_t start_line); - /** * Returns the line and column of the given offset. If the offset is not in the * list, the line and column of the closest offset less than the given offset diff --git a/include/prism/util/pm_char.h b/include/prism/util/pm_char.h index 0be57d5a52..a6ffd1b107 100644 --- a/include/prism/util/pm_char.h +++ b/include/prism/util/pm_char.h @@ -7,6 +7,7 @@ #define PRISM_CHAR_H #include "prism/defines.h" +#include "prism/util/pm_arena.h" #include "prism/line_offset_list.h" #include diff --git a/prism.gemspec b/prism.gemspec index 302fe17aca..e3c0b34d20 100644 --- a/prism.gemspec +++ b/prism.gemspec @@ -46,6 +46,7 @@ Gem::Specification.new do |spec| "ext/prism/extension.c", "ext/prism/extension.h", "include/prism.h", + "include/prism/align.h", "include/prism/allocator.h", "include/prism/ast.h", "include/prism/buffer.h", @@ -54,6 +55,7 @@ Gem::Specification.new do |spec| "include/prism/diagnostic.h", "include/prism/encoding.h", "include/prism/exported.h", + "include/prism/force_inline.h", "include/prism/format.h", "include/prism/integer.h", "include/prism/line_offset_list.h", @@ -66,6 +68,7 @@ Gem::Specification.new do |spec| "include/prism/static_literals.h", "include/prism/internal/buffer.h", "include/prism/internal/integer.h", + "include/prism/internal/line_offset_list.h", "include/prism/internal/memchr.h", "include/prism/internal/strncasecmp.h", "include/prism/util/pm_arena.h", diff --git a/src/buffer.c b/src/buffer.c index dbbb05c7ef..865212bb80 100644 --- a/src/buffer.c +++ b/src/buffer.c @@ -2,6 +2,7 @@ #include "prism/allocator.h" #include "prism/util/pm_char.h" +#include #include #include #include diff --git a/src/line_offset_list.c b/src/line_offset_list.c index 8513fdb46e..81976aacb3 100644 --- a/src/line_offset_list.c +++ b/src/line_offset_list.c @@ -1,4 +1,8 @@ -#include "prism/line_offset_list.h" +#include "prism/internal/line_offset_list.h" +#include "prism/align.h" + +#include +#include /** * Initialize a new line offset list with the given capacity. diff --git a/src/prism.c b/src/prism.c index fdf3bacb42..a4a7faafbf 100644 --- a/src/prism.c +++ b/src/prism.c @@ -1,4 +1,5 @@ #include "prism/internal/integer.h" +#include "prism/internal/line_offset_list.h" #include "prism.h" #include "prism/node_new.h" diff --git a/src/util/pm_char.c b/src/util/pm_char.c index ac283af356..346f15feb8 100644 --- a/src/util/pm_char.c +++ b/src/util/pm_char.c @@ -1,4 +1,5 @@ #include "prism/util/pm_char.h" +#include "prism/internal/line_offset_list.h" #define PRISM_CHAR_BIT_REGEXP_OPTION (1 << 2) From 895b395b99bdace27f2cdcf7883963ca21a3b6d2 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 17 Mar 2026 10:19:54 -0400 Subject: [PATCH 011/100] Move char to internal headers --- include/prism.h | 1 - .../prism/{util/pm_char.h => internal/char.h} | 20 +++++++++++++------ include/prism/options.h | 1 - prism.gemspec | 4 ++-- rakelib/char.rake | 2 +- src/buffer.c | 2 +- src/{util/pm_char.c => char.c} | 15 +------------- src/options.c | 1 + src/prism.c | 4 +++- src/regexp.c | 3 ++- 10 files changed, 25 insertions(+), 28 deletions(-) rename include/prism/{util/pm_char.h => internal/char.h} (96%) rename src/{util/pm_char.c => char.c} (95%) diff --git a/include/prism.h b/include/prism.h index cb4439e77a..46a6a6fa6c 100644 --- a/include/prism.h +++ b/include/prism.h @@ -12,7 +12,6 @@ extern "C" { #include "prism/defines.h" #include "prism/util/pm_arena.h" -#include "prism/util/pm_char.h" #include "prism/util/pm_strpbrk.h" #include "prism/internal/buffer.h" #include "prism/internal/memchr.h" diff --git a/include/prism/util/pm_char.h b/include/prism/internal/char.h similarity index 96% rename from include/prism/util/pm_char.h rename to include/prism/internal/char.h index a6ffd1b107..17e696142d 100644 --- a/include/prism/util/pm_char.h +++ b/include/prism/internal/char.h @@ -1,17 +1,25 @@ /** - * @file pm_char.h + * @file internal/char.h * * Functions for working with characters and strings. */ -#ifndef PRISM_CHAR_H -#define PRISM_CHAR_H +#ifndef PRISM_INTERNAL_CHAR_H +#define PRISM_INTERNAL_CHAR_H -#include "prism/defines.h" -#include "prism/util/pm_arena.h" +// #include "prism/defines.h" +// #include "prism/util/pm_arena.h" +// #include "prism/line_offset_list.h" + +// #include +// #include + +#include "prism/force_inline.h" #include "prism/line_offset_list.h" +#include "prism/util/pm_arena.h" #include #include +#include /** Bit flag for whitespace characters in pm_byte_table. */ #define PRISM_CHAR_BIT_WHITESPACE (1 << 0) @@ -21,7 +29,7 @@ /** * A lookup table for classifying bytes. Each entry is a bitfield of - * PRISM_CHAR_BIT_* flags. Defined in pm_char.c. + * PRISM_CHAR_BIT_* flags. Defined in char.c. */ extern const uint8_t pm_byte_table[256]; diff --git a/include/prism/options.h b/include/prism/options.h index 9a19a2aead..41f3bc7f42 100644 --- a/include/prism/options.h +++ b/include/prism/options.h @@ -7,7 +7,6 @@ #define PRISM_OPTIONS_H #include "prism/defines.h" -#include "prism/util/pm_char.h" #include "prism/util/pm_string.h" #include diff --git a/prism.gemspec b/prism.gemspec index e3c0b34d20..4666d975f5 100644 --- a/prism.gemspec +++ b/prism.gemspec @@ -67,12 +67,12 @@ Gem::Specification.new do |spec| "include/prism/regexp.h", "include/prism/static_literals.h", "include/prism/internal/buffer.h", + "include/prism/internal/char.h", "include/prism/internal/integer.h", "include/prism/internal/line_offset_list.h", "include/prism/internal/memchr.h", "include/prism/internal/strncasecmp.h", "include/prism/util/pm_arena.h", - "include/prism/util/pm_char.h", "include/prism/util/pm_constant_pool.h", "include/prism/util/pm_list.h", "include/prism/util/pm_string.h", @@ -167,6 +167,7 @@ Gem::Specification.new do |spec| "sig/generated/prism/parse_result/errors.rbs", "sig/generated/prism/parse_result/newlines.rbs", "src/buffer.c", + "src/char.c", "src/diagnostic.c", "src/encoding.c", "src/integer.c", @@ -182,7 +183,6 @@ Gem::Specification.new do |spec| "src/strncasecmp.c", "src/token_type.c", "src/util/pm_arena.c", - "src/util/pm_char.c", "src/util/pm_constant_pool.c", "src/util/pm_list.c", "src/util/pm_string.c", diff --git a/rakelib/char.rake b/rakelib/char.rake index 112e20b50d..d1486a440e 100644 --- a/rakelib/char.rake +++ b/rakelib/char.rake @@ -1,6 +1,6 @@ # frozen_string_literal: true -desc "Generate the lookup tables for pm_char.c" +desc "Generate the lookup tables for char.c" namespace :generate do task :char do puts "static const uint8_t pm_char_table[256] = {" diff --git a/src/buffer.c b/src/buffer.c index 865212bb80..b416519795 100644 --- a/src/buffer.c +++ b/src/buffer.c @@ -1,6 +1,6 @@ #include "prism/internal/buffer.h" +#include "prism/internal/char.h" #include "prism/allocator.h" -#include "prism/util/pm_char.h" #include #include diff --git a/src/util/pm_char.c b/src/char.c similarity index 95% rename from src/util/pm_char.c rename to src/char.c index 346f15feb8..c119ee8a19 100644 --- a/src/util/pm_char.c +++ b/src/char.c @@ -1,4 +1,4 @@ -#include "prism/util/pm_char.h" +#include "prism/internal/char.h" #include "prism/internal/line_offset_list.h" #define PRISM_CHAR_BIT_REGEXP_OPTION (1 << 2) @@ -270,16 +270,3 @@ bool pm_char_is_hexadecimal_digit(const uint8_t b) { return pm_char_is_number_kind(b, PRISM_NUMBER_BIT_HEXADECIMAL_DIGIT); } - -#undef PRISM_CHAR_BIT_WHITESPACE -#undef PRISM_CHAR_BIT_INLINE_WHITESPACE -#undef PRISM_CHAR_BIT_REGEXP_OPTION - -#undef PRISM_NUMBER_BIT_BINARY_DIGIT -#undef PRISM_NUMBER_BIT_BINARY_NUMBER -#undef PRISM_NUMBER_BIT_OCTAL_DIGIT -#undef PRISM_NUMBER_BIT_OCTAL_NUMBER -#undef PRISM_NUMBER_BIT_DECIMAL_DIGIT -#undef PRISM_NUMBER_BIT_DECIMAL_NUMBER -#undef PRISM_NUMBER_BIT_HEXADECIMAL_NUMBER -#undef PRISM_NUMBER_BIT_HEXADECIMAL_DIGIT diff --git a/src/options.c b/src/options.c index ecd88a8021..c89515964c 100644 --- a/src/options.c +++ b/src/options.c @@ -1,4 +1,5 @@ #include "prism/options.h" +#include "prism/internal/char.h" /** * Set the shebang callback option on the given options struct. diff --git a/src/prism.c b/src/prism.c index a4a7faafbf..64c2680ee3 100644 --- a/src/prism.c +++ b/src/prism.c @@ -1,7 +1,9 @@ +#include "prism.h" + +#include "prism/internal/char.h" #include "prism/internal/integer.h" #include "prism/internal/line_offset_list.h" -#include "prism.h" #include "prism/node_new.h" /** diff --git a/src/regexp.c b/src/regexp.c index 33a546dde8..60ebcd86ee 100644 --- a/src/regexp.c +++ b/src/regexp.c @@ -1,6 +1,7 @@ +#include "prism/regexp.h" #include "prism/internal/buffer.h" +#include "prism/internal/char.h" #include "prism/internal/strncasecmp.h" -#include "prism/regexp.h" #include "prism/diagnostic.h" /** The maximum depth of nested groups allowed in a regular expression. */ From c74b7c3f5287aadeb136497ddf575cc350449bb8 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 17 Mar 2026 10:23:11 -0400 Subject: [PATCH 012/100] Move arena out of utils --- include/prism.h | 2 +- include/prism/{util/pm_arena.h => arena.h} | 2 +- include/prism/internal/char.h | 9 +-------- include/prism/internal/line_offset_list.h | 2 +- include/prism/parser.h | 2 +- include/prism/util/pm_constant_pool.h | 2 +- prism.gemspec | 4 ++-- src/{util/pm_arena.c => arena.c} | 2 +- src/util/pm_constant_pool.c | 2 +- templates/include/prism/diagnostic.h.erb | 4 ++-- templates/src/diagnostic.c.erb | 2 +- 11 files changed, 13 insertions(+), 20 deletions(-) rename include/prism/{util/pm_arena.h => arena.h} (99%) rename src/{util/pm_arena.c => arena.c} (98%) diff --git a/include/prism.h b/include/prism.h index 46a6a6fa6c..e4310ff395 100644 --- a/include/prism.h +++ b/include/prism.h @@ -11,11 +11,11 @@ extern "C" { #endif #include "prism/defines.h" -#include "prism/util/pm_arena.h" #include "prism/util/pm_strpbrk.h" #include "prism/internal/buffer.h" #include "prism/internal/memchr.h" #include "prism/internal/strncasecmp.h" +#include "prism/arena.h" #include "prism/ast.h" #include "prism/diagnostic.h" #include "prism/node.h" diff --git a/include/prism/util/pm_arena.h b/include/prism/arena.h similarity index 99% rename from include/prism/util/pm_arena.h rename to include/prism/arena.h index 175b39c6df..deeaba19a9 100644 --- a/include/prism/util/pm_arena.h +++ b/include/prism/arena.h @@ -1,5 +1,5 @@ /** - * @file pm_arena.h + * @file arena.h * * A bump allocator for the prism parser. */ diff --git a/include/prism/internal/char.h b/include/prism/internal/char.h index 17e696142d..7b394e43dc 100644 --- a/include/prism/internal/char.h +++ b/include/prism/internal/char.h @@ -6,16 +6,9 @@ #ifndef PRISM_INTERNAL_CHAR_H #define PRISM_INTERNAL_CHAR_H -// #include "prism/defines.h" -// #include "prism/util/pm_arena.h" -// #include "prism/line_offset_list.h" - -// #include -// #include - +#include "prism/arena.h" #include "prism/force_inline.h" #include "prism/line_offset_list.h" -#include "prism/util/pm_arena.h" #include #include diff --git a/include/prism/internal/line_offset_list.h b/include/prism/internal/line_offset_list.h index 4b7d9fc1a6..112603cdb9 100644 --- a/include/prism/internal/line_offset_list.h +++ b/include/prism/internal/line_offset_list.h @@ -15,8 +15,8 @@ #define PRISM_INTERNAL_LINE_OFFSET_LIST_H #include "prism/line_offset_list.h" +#include "prism/arena.h" #include "prism/force_inline.h" -#include "prism/util/pm_arena.h" /** * Initialize a new line offset list with the given capacity. diff --git a/include/prism/parser.h b/include/prism/parser.h index 1d8f28fff6..0922a2069a 100644 --- a/include/prism/parser.h +++ b/include/prism/parser.h @@ -7,12 +7,12 @@ #define PRISM_PARSER_H #include "prism/defines.h" +#include "prism/arena.h" #include "prism/ast.h" #include "prism/encoding.h" #include "prism/line_offset_list.h" #include "prism/options.h" #include "prism/static_literals.h" -#include "prism/util/pm_arena.h" #include "prism/util/pm_constant_pool.h" #include "prism/util/pm_list.h" #include "prism/util/pm_string.h" diff --git a/include/prism/util/pm_constant_pool.h b/include/prism/util/pm_constant_pool.h index c527343273..22599cdec1 100644 --- a/include/prism/util/pm_constant_pool.h +++ b/include/prism/util/pm_constant_pool.h @@ -11,7 +11,7 @@ #define PRISM_CONSTANT_POOL_H #include "prism/defines.h" -#include "prism/util/pm_arena.h" +#include "prism/arena.h" #include #include diff --git a/prism.gemspec b/prism.gemspec index 4666d975f5..3cbd0dc958 100644 --- a/prism.gemspec +++ b/prism.gemspec @@ -48,6 +48,7 @@ Gem::Specification.new do |spec| "include/prism.h", "include/prism/align.h", "include/prism/allocator.h", + "include/prism/arena.h", "include/prism/ast.h", "include/prism/buffer.h", "include/prism/debug_allocator.h", @@ -72,7 +73,6 @@ Gem::Specification.new do |spec| "include/prism/internal/line_offset_list.h", "include/prism/internal/memchr.h", "include/prism/internal/strncasecmp.h", - "include/prism/util/pm_arena.h", "include/prism/util/pm_constant_pool.h", "include/prism/util/pm_list.h", "include/prism/util/pm_string.h", @@ -166,6 +166,7 @@ Gem::Specification.new do |spec| "sig/generated/prism/parse_result/comments.rbs", "sig/generated/prism/parse_result/errors.rbs", "sig/generated/prism/parse_result/newlines.rbs", + "src/arena.c", "src/buffer.c", "src/char.c", "src/diagnostic.c", @@ -182,7 +183,6 @@ Gem::Specification.new do |spec| "src/static_literals.c", "src/strncasecmp.c", "src/token_type.c", - "src/util/pm_arena.c", "src/util/pm_constant_pool.c", "src/util/pm_list.c", "src/util/pm_string.c", diff --git a/src/util/pm_arena.c b/src/arena.c similarity index 98% rename from src/util/pm_arena.c rename to src/arena.c index 6b07e25210..aa12b1b836 100644 --- a/src/util/pm_arena.c +++ b/src/arena.c @@ -1,4 +1,4 @@ -#include "prism/util/pm_arena.h" +#include "prism/arena.h" #include diff --git a/src/util/pm_constant_pool.c b/src/util/pm_constant_pool.c index 74e2a12524..3694b95c82 100644 --- a/src/util/pm_constant_pool.c +++ b/src/util/pm_constant_pool.c @@ -1,5 +1,5 @@ #include "prism/util/pm_constant_pool.h" -#include "prism/util/pm_arena.h" +#include "prism/arena.h" /** * Initialize a list of constant ids. diff --git a/templates/include/prism/diagnostic.h.erb b/templates/include/prism/diagnostic.h.erb index 935fb663ea..2982a46587 100644 --- a/templates/include/prism/diagnostic.h.erb +++ b/templates/include/prism/diagnostic.h.erb @@ -6,9 +6,9 @@ #ifndef PRISM_DIAGNOSTIC_H #define PRISM_DIAGNOSTIC_H -#include "prism/ast.h" #include "prism/defines.h" -#include "prism/util/pm_arena.h" +#include "prism/arena.h" +#include "prism/ast.h" #include "prism/util/pm_list.h" #include diff --git a/templates/src/diagnostic.c.erb b/templates/src/diagnostic.c.erb index b02714637d..8cb3eb7302 100644 --- a/templates/src/diagnostic.c.erb +++ b/templates/src/diagnostic.c.erb @@ -1,5 +1,5 @@ #include "prism/diagnostic.h" -#include "prism/util/pm_arena.h" +#include "prism/arena.h" #define PM_DIAGNOSTIC_ID_MAX <%= errors.length + warnings.length %> From c03b69b16e583d178a0c2ecb7bc1e0702439a288 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 17 Mar 2026 10:35:09 -0400 Subject: [PATCH 013/100] Split up arena headers into public and internal --- include/prism/arena.h | 52 +++--------------------------- include/prism/defines.h | 13 +------- include/prism/flex_array.h | 21 ++++++++++++ include/prism/internal/arena.h | 58 ++++++++++++++++++++++++++++++++++ prism.gemspec | 2 ++ src/arena.c | 5 ++- src/line_offset_list.c | 1 + src/prism.c | 1 + src/util/pm_constant_pool.c | 2 +- templates/src/diagnostic.c.erb | 2 +- 10 files changed, 94 insertions(+), 63 deletions(-) create mode 100644 include/prism/flex_array.h create mode 100644 include/prism/internal/arena.h diff --git a/include/prism/arena.h b/include/prism/arena.h index deeaba19a9..752ca766ae 100644 --- a/include/prism/arena.h +++ b/include/prism/arena.h @@ -6,12 +6,11 @@ #ifndef PRISM_ARENA_H #define PRISM_ARENA_H -#include "prism/defines.h" +#include "prism/exported.h" +#include "prism/flex_array.h" +#include "prism/force_inline.h" #include -#include -#include -#include /** * A single block of memory in the arena. Blocks are linked via prev pointers so @@ -28,7 +27,7 @@ typedef struct pm_arena_block { size_t used; /** The block's data. */ - char data[PM_FLEX_ARY_LEN]; + char data[PM_FLEX_ARRAY_LENGTH]; } pm_arena_block_t; /** @@ -44,16 +43,6 @@ typedef struct { size_t block_count; } pm_arena_t; -/** - * Ensure the arena has at least `capacity` bytes available in its current - * block, allocating a new block if necessary. This allows callers to - * pre-size the arena to avoid repeated small block allocations. - * - * @param arena The arena to pre-size. - * @param capacity The minimum number of bytes to ensure are available. - */ -void pm_arena_reserve(pm_arena_t *arena, size_t capacity); - /** * Slow path for pm_arena_alloc: allocate a new block and return a pointer to * the first `size` bytes. Do not call directly — use pm_arena_alloc instead. @@ -91,39 +80,6 @@ pm_arena_alloc(pm_arena_t *arena, size_t size, size_t alignment) { return pm_arena_alloc_slow(arena, size); } -/** - * Allocate zero-initialized memory from the arena. This function is infallible - * — it aborts on allocation failure. - * - * @param arena The arena to allocate from. - * @param size The number of bytes to allocate. - * @param alignment The required alignment (must be a power of 2). - * @returns A pointer to the allocated, zero-initialized memory. - */ -static inline void * -pm_arena_zalloc(pm_arena_t *arena, size_t size, size_t alignment) { - void *ptr = pm_arena_alloc(arena, size, alignment); - memset(ptr, 0, size); - return ptr; -} - -/** - * Allocate memory from the arena and copy the given data into it. This is a - * convenience wrapper around pm_arena_alloc + memcpy. - * - * @param arena The arena to allocate from. - * @param src The source data to copy. - * @param size The number of bytes to allocate and copy. - * @param alignment The required alignment (must be a power of 2). - * @returns A pointer to the allocated copy. - */ -static inline void * -pm_arena_memdup(pm_arena_t *arena, const void *src, size_t size, size_t alignment) { - void *dst = pm_arena_alloc(arena, size, alignment); - memcpy(dst, src, size); - return dst; -} - /** * Free all blocks in the arena. After this call, all pointers returned by * pm_arena_alloc and pm_arena_zalloc are invalid. diff --git a/include/prism/defines.h b/include/prism/defines.h index fcfc8b84a1..048e967b62 100644 --- a/include/prism/defines.h +++ b/include/prism/defines.h @@ -12,6 +12,7 @@ #include "prism/align.h" #include "prism/allocator.h" #include "prism/exported.h" +#include "prism/flex_array.h" #include "prism/force_inline.h" #include "prism/format.h" @@ -292,16 +293,4 @@ #define PRISM_FALLTHROUGH #endif -/** - * A macro for defining a flexible array member. C99 supports `data[]`, GCC - * supports `data[0]` as an extension, and older compilers require `data[1]`. - */ -#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) - #define PM_FLEX_ARY_LEN /* data[] */ -#elif defined(__GNUC__) && !defined(__STRICT_ANSI__) - #define PM_FLEX_ARY_LEN 0 /* data[0] */ -#else - #define PM_FLEX_ARY_LEN 1 /* data[1] */ -#endif - #endif diff --git a/include/prism/flex_array.h b/include/prism/flex_array.h new file mode 100644 index 0000000000..d21e336311 --- /dev/null +++ b/include/prism/flex_array.h @@ -0,0 +1,21 @@ +/** + * @file flex_array.h + * + * Macro definitions for working with flexible array members. + */ +#ifndef PRISM_FLEX_ARRAY_H +#define PRISM_FLEX_ARRAY_H + +/** + * A macro for defining a flexible array member. C99 supports `data[]`, GCC + * supports `data[0]` as an extension, and older compilers require `data[1]`. + */ +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) + #define PM_FLEX_ARRAY_LENGTH /* data[] */ +#elif defined(__GNUC__) && !defined(__STRICT_ANSI__) + #define PM_FLEX_ARRAY_LENGTH 0 /* data[0] */ +#else + #define PM_FLEX_ARRAY_LENGTH 1 /* data[1] */ +#endif + +#endif diff --git a/include/prism/internal/arena.h b/include/prism/internal/arena.h new file mode 100644 index 0000000000..32a62735ab --- /dev/null +++ b/include/prism/internal/arena.h @@ -0,0 +1,58 @@ +/** + * @file internal/arena.h + * + * A bump allocator for the prism parser. + */ +#ifndef PRISM_INTERNAL_ARENA_H +#define PRISM_INTERNAL_ARENA_H + +#include "prism/arena.h" +#include "prism/exported.h" + +#include +#include + +/** + * Ensure the arena has at least `capacity` bytes available in its current + * block, allocating a new block if necessary. This allows callers to + * pre-size the arena to avoid repeated small block allocations. + * + * @param arena The arena to pre-size. + * @param capacity The minimum number of bytes to ensure are available. + */ +void pm_arena_reserve(pm_arena_t *arena, size_t capacity); + +/** + * Allocate zero-initialized memory from the arena. This function is infallible + * — it aborts on allocation failure. + * + * @param arena The arena to allocate from. + * @param size The number of bytes to allocate. + * @param alignment The required alignment (must be a power of 2). + * @returns A pointer to the allocated, zero-initialized memory. + */ +static inline void * +pm_arena_zalloc(pm_arena_t *arena, size_t size, size_t alignment) { + void *ptr = pm_arena_alloc(arena, size, alignment); + memset(ptr, 0, size); + return ptr; +} + +/** + * Allocate memory from the arena and copy the given data into it. This is a + * convenience wrapper around pm_arena_alloc + memcpy. + * + * @param arena The arena to allocate from. + * @param src The source data to copy. + * @param size The number of bytes to allocate and copy. + * @param alignment The required alignment (must be a power of 2). + * @returns A pointer to the allocated copy. + */ +static inline void * +pm_arena_memdup(pm_arena_t *arena, const void *src, size_t size, size_t alignment) { + void *dst = pm_arena_alloc(arena, size, alignment); + memcpy(dst, src, size); + return dst; +} + +#endif diff --git a/prism.gemspec b/prism.gemspec index 3cbd0dc958..ef64750809 100644 --- a/prism.gemspec +++ b/prism.gemspec @@ -56,6 +56,7 @@ Gem::Specification.new do |spec| "include/prism/diagnostic.h", "include/prism/encoding.h", "include/prism/exported.h", + "include/prism/flex_array.h", "include/prism/force_inline.h", "include/prism/format.h", "include/prism/integer.h", @@ -67,6 +68,7 @@ Gem::Specification.new do |spec| "include/prism/prettyprint.h", "include/prism/regexp.h", "include/prism/static_literals.h", + "include/prism/internal/arena.h", "include/prism/internal/buffer.h", "include/prism/internal/char.h", "include/prism/internal/integer.h", diff --git a/src/arena.c b/src/arena.c index aa12b1b836..d7ce9c043c 100644 --- a/src/arena.c +++ b/src/arena.c @@ -1,10 +1,13 @@ #include "prism/arena.h" +#include "prism/allocator.h" #include +#include +#include /** * Compute the block allocation size using offsetof so it is correct regardless - * of PM_FLEX_ARY_LEN. + * of PM_FLEX_ARRAY_LENGTH. */ #define PM_ARENA_BLOCK_SIZE(data_size) (offsetof(pm_arena_block_t, data) + (data_size)) diff --git a/src/line_offset_list.c b/src/line_offset_list.c index 81976aacb3..752ea934f5 100644 --- a/src/line_offset_list.c +++ b/src/line_offset_list.c @@ -1,4 +1,5 @@ #include "prism/internal/line_offset_list.h" +#include "prism/internal/arena.h" #include "prism/align.h" #include diff --git a/src/prism.c b/src/prism.c index 64c2680ee3..4633ec6fb4 100644 --- a/src/prism.c +++ b/src/prism.c @@ -1,5 +1,6 @@ #include "prism.h" +#include "prism/internal/arena.h" #include "prism/internal/char.h" #include "prism/internal/integer.h" #include "prism/internal/line_offset_list.h" diff --git a/src/util/pm_constant_pool.c b/src/util/pm_constant_pool.c index 3694b95c82..679d3a6b86 100644 --- a/src/util/pm_constant_pool.c +++ b/src/util/pm_constant_pool.c @@ -1,5 +1,5 @@ #include "prism/util/pm_constant_pool.h" -#include "prism/arena.h" +#include "prism/internal/arena.h" /** * Initialize a list of constant ids. diff --git a/templates/src/diagnostic.c.erb b/templates/src/diagnostic.c.erb index 8cb3eb7302..0d696772d5 100644 --- a/templates/src/diagnostic.c.erb +++ b/templates/src/diagnostic.c.erb @@ -1,5 +1,5 @@ #include "prism/diagnostic.h" -#include "prism/arena.h" +#include "prism/internal/arena.h" #define PM_DIAGNOSTIC_ID_MAX <%= errors.length + warnings.length %> From a18e3ca1789985fdd9bcb9224cb0ed04a783a893 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 17 Mar 2026 10:39:31 -0400 Subject: [PATCH 014/100] Move compiler macro definitions into include/prism/attribute --- include/prism/arena.h | 6 +++--- include/prism/{ => attribute}/align.h | 2 +- include/prism/{ => attribute}/exported.h | 2 +- include/prism/{ => attribute}/flex_array.h | 2 +- include/prism/{ => attribute}/force_inline.h | 2 +- include/prism/{ => attribute}/format.h | 2 +- include/prism/buffer.h | 2 +- include/prism/defines.h | 10 +++++----- include/prism/internal/arena.h | 2 +- include/prism/internal/buffer.h | 2 +- include/prism/internal/char.h | 2 +- include/prism/internal/line_offset_list.h | 2 +- include/prism/line_offset_list.h | 2 +- prism.gemspec | 10 +++++----- src/line_offset_list.c | 2 +- 15 files changed, 25 insertions(+), 25 deletions(-) rename include/prism/{ => attribute}/align.h (97%) rename include/prism/{ => attribute}/exported.h (95%) rename include/prism/{ => attribute}/flex_array.h (94%) rename include/prism/{ => attribute}/force_inline.h (94%) rename include/prism/{ => attribute}/format.h (97%) diff --git a/include/prism/arena.h b/include/prism/arena.h index 752ca766ae..dd8ee09ceb 100644 --- a/include/prism/arena.h +++ b/include/prism/arena.h @@ -6,9 +6,9 @@ #ifndef PRISM_ARENA_H #define PRISM_ARENA_H -#include "prism/exported.h" -#include "prism/flex_array.h" -#include "prism/force_inline.h" +#include "prism/attribute/exported.h" +#include "prism/attribute/flex_array.h" +#include "prism/attribute/force_inline.h" #include diff --git a/include/prism/align.h b/include/prism/attribute/align.h similarity index 97% rename from include/prism/align.h rename to include/prism/attribute/align.h index bb120d488e..9a21b8d6ec 100644 --- a/include/prism/align.h +++ b/include/prism/attribute/align.h @@ -1,5 +1,5 @@ /** - * @file align.h + * @file attribute/align.h * * Alignment macros used throughout the prism library. */ diff --git a/include/prism/exported.h b/include/prism/attribute/exported.h similarity index 95% rename from include/prism/exported.h rename to include/prism/attribute/exported.h index 74476b0efb..8cb24848d1 100644 --- a/include/prism/exported.h +++ b/include/prism/attribute/exported.h @@ -1,5 +1,5 @@ /** - * @file exported.h + * @file attribute/exported.h * * Macro definitions for make functions publically visible. */ diff --git a/include/prism/flex_array.h b/include/prism/attribute/flex_array.h similarity index 94% rename from include/prism/flex_array.h rename to include/prism/attribute/flex_array.h index d21e336311..8daefba32b 100644 --- a/include/prism/flex_array.h +++ b/include/prism/attribute/flex_array.h @@ -1,5 +1,5 @@ /** - * @file flex_array.h + * @file attribute/flex_array.h * * Macro definitions for working with flexible array members. */ diff --git a/include/prism/force_inline.h b/include/prism/attribute/force_inline.h similarity index 94% rename from include/prism/force_inline.h rename to include/prism/attribute/force_inline.h index c205e5ddf6..1d2c494d6e 100644 --- a/include/prism/force_inline.h +++ b/include/prism/attribute/force_inline.h @@ -1,5 +1,5 @@ /** - * @file force_inline.h + * @file attribute/force_inline.h * * Macro definitions for forcing a function to be inlined at every call site. */ diff --git a/include/prism/format.h b/include/prism/attribute/format.h similarity index 97% rename from include/prism/format.h rename to include/prism/attribute/format.h index 973fc3fe4e..4ad99fe125 100644 --- a/include/prism/format.h +++ b/include/prism/attribute/format.h @@ -1,5 +1,5 @@ /** - * @file format.h + * @file attribute/format.h * * Macro definition for specifying that a function accepts variadic parameters * that look like printf format strings. diff --git a/include/prism/buffer.h b/include/prism/buffer.h index ffc07461fc..90f7c9198e 100644 --- a/include/prism/buffer.h +++ b/include/prism/buffer.h @@ -6,7 +6,7 @@ #ifndef PRISM_BUFFER_H #define PRISM_BUFFER_H -#include "prism/exported.h" +#include "prism/attribute/exported.h" #include #include diff --git a/include/prism/defines.h b/include/prism/defines.h index 048e967b62..1c271a80c8 100644 --- a/include/prism/defines.h +++ b/include/prism/defines.h @@ -9,12 +9,12 @@ #ifndef PRISM_DEFINES_H #define PRISM_DEFINES_H -#include "prism/align.h" +#include "prism/attribute/align.h" +#include "prism/attribute/exported.h" +#include "prism/attribute/flex_array.h" +#include "prism/attribute/force_inline.h" +#include "prism/attribute/format.h" #include "prism/allocator.h" -#include "prism/exported.h" -#include "prism/flex_array.h" -#include "prism/force_inline.h" -#include "prism/format.h" #include #include diff --git a/include/prism/internal/arena.h b/include/prism/internal/arena.h index 32a62735ab..e86b89903e 100644 --- a/include/prism/internal/arena.h +++ b/include/prism/internal/arena.h @@ -6,8 +6,8 @@ #ifndef PRISM_INTERNAL_ARENA_H #define PRISM_INTERNAL_ARENA_H +#include "prism/attribute/exported.h" #include "prism/arena.h" -#include "prism/exported.h" #include #include diff --git a/include/prism/internal/buffer.h b/include/prism/internal/buffer.h index a5c46ed530..b1d360c91e 100644 --- a/include/prism/internal/buffer.h +++ b/include/prism/internal/buffer.h @@ -7,7 +7,7 @@ #define PRISM_INTERNAL_BUFFER_H #include "prism/buffer.h" -#include "prism/format.h" +#include "prism/attribute/format.h" #include diff --git a/include/prism/internal/char.h b/include/prism/internal/char.h index 7b394e43dc..b3975862dd 100644 --- a/include/prism/internal/char.h +++ b/include/prism/internal/char.h @@ -6,8 +6,8 @@ #ifndef PRISM_INTERNAL_CHAR_H #define PRISM_INTERNAL_CHAR_H +#include "prism/attribute/force_inline.h" #include "prism/arena.h" -#include "prism/force_inline.h" #include "prism/line_offset_list.h" #include diff --git a/include/prism/internal/line_offset_list.h b/include/prism/internal/line_offset_list.h index 112603cdb9..87af0bb524 100644 --- a/include/prism/internal/line_offset_list.h +++ b/include/prism/internal/line_offset_list.h @@ -15,8 +15,8 @@ #define PRISM_INTERNAL_LINE_OFFSET_LIST_H #include "prism/line_offset_list.h" +#include "prism/attribute/force_inline.h" #include "prism/arena.h" -#include "prism/force_inline.h" /** * Initialize a new line offset list with the given capacity. diff --git a/include/prism/line_offset_list.h b/include/prism/line_offset_list.h index 33bdd03aaa..0211b990fe 100644 --- a/include/prism/line_offset_list.h +++ b/include/prism/line_offset_list.h @@ -14,7 +14,7 @@ #ifndef PRISM_LINE_OFFSET_LIST_H #define PRISM_LINE_OFFSET_LIST_H -#include "prism/exported.h" +#include "prism/attribute/exported.h" #include #include diff --git a/prism.gemspec b/prism.gemspec index ef64750809..d9a3d15e62 100644 --- a/prism.gemspec +++ b/prism.gemspec @@ -46,7 +46,11 @@ Gem::Specification.new do |spec| "ext/prism/extension.c", "ext/prism/extension.h", "include/prism.h", - "include/prism/align.h", + "include/prism/attribute/align.h", + "include/prism/attribute/exported.h", + "include/prism/attribute/flex_array.h", + "include/prism/attribute/force_inline.h", + "include/prism/attribute/format.h", "include/prism/allocator.h", "include/prism/arena.h", "include/prism/ast.h", @@ -55,10 +59,6 @@ Gem::Specification.new do |spec| "include/prism/defines.h", "include/prism/diagnostic.h", "include/prism/encoding.h", - "include/prism/exported.h", - "include/prism/flex_array.h", - "include/prism/force_inline.h", - "include/prism/format.h", "include/prism/integer.h", "include/prism/line_offset_list.h", "include/prism/node.h", diff --git a/src/line_offset_list.c b/src/line_offset_list.c index 752ea934f5..17946a224a 100644 --- a/src/line_offset_list.c +++ b/src/line_offset_list.c @@ -1,6 +1,6 @@ +#include "prism/attribute/align.h" #include "prism/internal/line_offset_list.h" #include "prism/internal/arena.h" -#include "prism/align.h" #include #include From 104b6abd10a749448eeb3c5ec7073d99a1334f2a Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 17 Mar 2026 10:54:29 -0400 Subject: [PATCH 015/100] Move strpbrk into internal --- include/prism.h | 1 - include/prism/attribute/unused.h | 21 +++ include/prism/defines.h | 135 +----------------- include/prism/internal/accel.h | 21 +++ include/prism/internal/bit.h | 40 ++++++ .../{util/pm_strpbrk.h => internal/strpbrk.h} | 10 +- prism.gemspec | 9 +- src/prism.c | 3 + src/{util/pm_strpbrk.c => strpbrk.c} | 11 +- 9 files changed, 109 insertions(+), 142 deletions(-) create mode 100644 include/prism/attribute/unused.h create mode 100644 include/prism/internal/accel.h create mode 100644 include/prism/internal/bit.h rename include/prism/{util/pm_strpbrk.h => internal/strpbrk.h} (92%) rename src/{util/pm_strpbrk.c => strpbrk.c} (98%) diff --git a/include/prism.h b/include/prism.h index e4310ff395..3627c459a8 100644 --- a/include/prism.h +++ b/include/prism.h @@ -11,7 +11,6 @@ extern "C" { #endif #include "prism/defines.h" -#include "prism/util/pm_strpbrk.h" #include "prism/internal/buffer.h" #include "prism/internal/memchr.h" #include "prism/internal/strncasecmp.h" diff --git a/include/prism/attribute/unused.h b/include/prism/attribute/unused.h new file mode 100644 index 0000000000..37a7b00f40 --- /dev/null +++ b/include/prism/attribute/unused.h @@ -0,0 +1,21 @@ +/** + * @file attribute/unused.h + * + * Macro definitions for marking functions and parameters as unused to suppress + * compiler warnings. + */ +#ifndef PRISM_ATTRIBUTE_UNUSED_H +#define PRISM_ATTRIBUTE_UNUSED_H + +/** + * GCC will warn if you specify a function or parameter that is unused at + * runtime. This macro allows you to mark a function or parameter as unused in a + * compiler-agnostic way. + */ +#if defined(__GNUC__) +# define PRISM_ATTRIBUTE_UNUSED __attribute__((unused)) +#else +# define PRISM_ATTRIBUTE_UNUSED +#endif + +#endif diff --git a/include/prism/defines.h b/include/prism/defines.h index 1c271a80c8..ad1cf2b75c 100644 --- a/include/prism/defines.h +++ b/include/prism/defines.h @@ -14,7 +14,11 @@ #include "prism/attribute/flex_array.h" #include "prism/attribute/force_inline.h" #include "prism/attribute/format.h" +#include "prism/attribute/unused.h" + #include "prism/allocator.h" +#include "prism/internal/accel.h" +#include "prism/internal/bit.h" #include #include @@ -44,17 +48,6 @@ #define PRISM_DEPTH_MAXIMUM 10000 #endif -/** - * GCC will warn if you specify a function or parameter that is unused at - * runtime. This macro allows you to mark a function or parameter as unused in a - * compiler-agnostic way. - */ -#if defined(__GNUC__) -# define PRISM_ATTRIBUTE_UNUSED __attribute__((unused)) -#else -# define PRISM_ATTRIBUTE_UNUSED -#endif - /** * Old Visual Studio versions do not support the inline keyword, so we need to * define it to be __inline. @@ -123,83 +116,6 @@ #define PRISM_ISINF(x) isinf(x) #endif -/** - * If you build prism with a custom allocator, configure it with - * "-D PRISM_XALLOCATOR" to use your own allocator that defines xmalloc, - * xrealloc, xcalloc, and xfree. - * - * For example, your `prism_xallocator.h` file could look like this: - * - * ``` - * #ifndef PRISM_XALLOCATOR_H - * #define PRISM_XALLOCATOR_H - * #define xmalloc my_malloc - * #define xrealloc my_realloc - * #define xcalloc my_calloc - * #define xfree my_free - * #define xrealloc_sized my_realloc_sized // (optional) - * #define xfree_sized my_free_sized // (optional) - * #endif - * ``` - */ -#ifdef PRISM_XALLOCATOR - #include "prism_xallocator.h" -#else - #ifndef xmalloc - /** - * The malloc function that should be used. This can be overridden with - * the PRISM_XALLOCATOR define. - */ - #define xmalloc malloc - #endif - - #ifndef xrealloc - /** - * The realloc function that should be used. This can be overridden with - * the PRISM_XALLOCATOR define. - */ - #define xrealloc realloc - #endif - - #ifndef xcalloc - /** - * The calloc function that should be used. This can be overridden with - * the PRISM_XALLOCATOR define. - */ - #define xcalloc calloc - #endif - - #ifndef xfree - /** - * The free function that should be used. This can be overridden with the - * PRISM_XALLOCATOR define. - */ - #define xfree free - #endif -#endif - -#ifndef xfree_sized -/** - * The free_sized function that should be used. This can be overridden with the - * PRISM_XALLOCATOR define. - * If not defined, defaults to calling xfree. - */ - #define xfree_sized(p, s) xfree(((void)(s), (p))) -#endif - -#ifndef xrealloc_sized -/** - * The xrealloc_sized function that should be used. This can be overridden with the - * PRISM_XALLOCATOR define. - * If not defined, defaults to calling xrealloc. - */ - #define xrealloc_sized(p, ns, os) xrealloc((p), ((void)(os), (ns))) -#endif - -#ifdef PRISM_BUILD_DEBUG - #include "prism/debug_allocator.h" -#endif - /** * If PRISM_BUILD_MINIMAL is defined, then we're going to define every possible * switch that will turn off certain features of prism. @@ -236,49 +152,6 @@ #define PRISM_UNLIKELY(x) (x) #endif -/** - * Platform detection for SIMD / fast-path implementations. At most one of - * these macros is defined, selecting the best available vectorization strategy. - */ -#if (defined(__aarch64__) && defined(__ARM_NEON)) || (defined(_MSC_VER) && defined(_M_ARM64)) - #define PRISM_HAS_NEON -#elif (defined(__x86_64__) && defined(__SSSE3__)) || (defined(_MSC_VER) && defined(_M_X64)) - #define PRISM_HAS_SSSE3 -#elif defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - #define PRISM_HAS_SWAR -#endif - -/** - * Count trailing zero bits in a 64-bit value. Used by SWAR identifier scanning - * to find the first non-matching byte in a word. - * - * Precondition: v must be nonzero. The result is undefined when v == 0 - * (matching the behavior of __builtin_ctzll and _BitScanForward64). - */ -#if defined(__GNUC__) || defined(__clang__) - #define pm_ctzll(v) ((unsigned) __builtin_ctzll(v)) -#elif defined(_MSC_VER) - #include - static inline unsigned pm_ctzll(uint64_t v) { - unsigned long index; - _BitScanForward64(&index, v); - return (unsigned) index; - } -#else - static inline unsigned - pm_ctzll(uint64_t v) { - unsigned c = 0; - v &= (uint64_t) (-(int64_t) v); - if (v & 0x00000000FFFFFFFFULL) c += 0; else c += 32; - if (v & 0x0000FFFF0000FFFFULL) c += 0; else c += 16; - if (v & 0x00FF00FF00FF00FFULL) c += 0; else c += 8; - if (v & 0x0F0F0F0F0F0F0F0FULL) c += 0; else c += 4; - if (v & 0x3333333333333333ULL) c += 0; else c += 2; - if (v & 0x5555555555555555ULL) c += 0; else c += 1; - return c; - } -#endif - /** * We use -Wimplicit-fallthrough to guard potentially unintended fall-through between cases of a switch. * Use PRISM_FALLTHROUGH to explicitly annotate cases where the fallthrough is intentional. diff --git a/include/prism/internal/accel.h b/include/prism/internal/accel.h new file mode 100644 index 0000000000..971c9b2473 --- /dev/null +++ b/include/prism/internal/accel.h @@ -0,0 +1,21 @@ +/** + * @file internal/accel.h + * + * Platform detection for acceleration implementations. + */ +#ifndef PRISM_INTERNAL_ACCEL_H +#define PRISM_INTERNAL_ACCEL_H + +/** + * Platform detection for SIMD / fast-path implementations. At most one of + * these macros is defined, selecting the best available vectorization strategy. + */ +#if (defined(__aarch64__) && defined(__ARM_NEON)) || (defined(_MSC_VER) && defined(_M_ARM64)) + #define PRISM_HAS_NEON +#elif (defined(__x86_64__) && defined(__SSSE3__)) || (defined(_MSC_VER) && defined(_M_X64)) + #define PRISM_HAS_SSSE3 +#elif defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + #define PRISM_HAS_SWAR +#endif + +#endif diff --git a/include/prism/internal/bit.h b/include/prism/internal/bit.h new file mode 100644 index 0000000000..110d4d68cb --- /dev/null +++ b/include/prism/internal/bit.h @@ -0,0 +1,40 @@ +/** + * @file internal/bit.h + * + * Bit manipulation utilities used throughout the prism library. + */ +#ifndef PRISM_INTERNAL_BIT_H +#define PRISM_INTERNAL_BIT_H + +/** + * Count trailing zero bits in a 64-bit value. Used by SWAR identifier scanning + * to find the first non-matching byte in a word. + * + * Precondition: v must be nonzero. The result is undefined when v == 0 + * (matching the behavior of __builtin_ctzll and _BitScanForward64). + */ +#if defined(__GNUC__) || defined(__clang__) + #define pm_ctzll(v) ((unsigned) __builtin_ctzll(v)) +#elif defined(_MSC_VER) + #include + static inline unsigned pm_ctzll(uint64_t v) { + unsigned long index; + _BitScanForward64(&index, v); + return (unsigned) index; + } +#else + static inline unsigned + pm_ctzll(uint64_t v) { + unsigned c = 0; + v &= (uint64_t) (-(int64_t) v); + if (v & 0x00000000FFFFFFFFULL) c += 0; else c += 32; + if (v & 0x0000FFFF0000FFFFULL) c += 0; else c += 16; + if (v & 0x00FF00FF00FF00FFULL) c += 0; else c += 8; + if (v & 0x0F0F0F0F0F0F0F0FULL) c += 0; else c += 4; + if (v & 0x3333333333333333ULL) c += 0; else c += 2; + if (v & 0x5555555555555555ULL) c += 0; else c += 1; + return c; + } +#endif + +#endif diff --git a/include/prism/util/pm_strpbrk.h b/include/prism/internal/strpbrk.h similarity index 92% rename from include/prism/util/pm_strpbrk.h rename to include/prism/internal/strpbrk.h index f387bd5782..ca5692d25c 100644 --- a/include/prism/util/pm_strpbrk.h +++ b/include/prism/internal/strpbrk.h @@ -1,17 +1,15 @@ /** - * @file pm_strpbrk.h + * @file internal/strpbrk.h * * A custom strpbrk implementation. */ -#ifndef PRISM_STRPBRK_H -#define PRISM_STRPBRK_H +#ifndef PRISM_INTERNAL_STRPBRK_H +#define PRISM_INTERNAL_STRPBRK_H -#include "prism/defines.h" -#include "prism/diagnostic.h" #include "prism/parser.h" #include -#include +#include /** * Here we have rolled our own version of strpbrk. The standard library strpbrk diff --git a/prism.gemspec b/prism.gemspec index d9a3d15e62..5ba050cf5d 100644 --- a/prism.gemspec +++ b/prism.gemspec @@ -51,6 +51,7 @@ Gem::Specification.new do |spec| "include/prism/attribute/flex_array.h", "include/prism/attribute/force_inline.h", "include/prism/attribute/format.h", + "include/prism/attribute/unused.h", "include/prism/allocator.h", "include/prism/arena.h", "include/prism/ast.h", @@ -68,17 +69,19 @@ Gem::Specification.new do |spec| "include/prism/prettyprint.h", "include/prism/regexp.h", "include/prism/static_literals.h", + "include/prism/internal/accel.h", "include/prism/internal/arena.h", + "include/prism/internal/bit.h", "include/prism/internal/buffer.h", "include/prism/internal/char.h", "include/prism/internal/integer.h", "include/prism/internal/line_offset_list.h", "include/prism/internal/memchr.h", "include/prism/internal/strncasecmp.h", + "include/prism/internal/strpbrk.h", "include/prism/util/pm_constant_pool.h", "include/prism/util/pm_list.h", "include/prism/util/pm_string.h", - "include/prism/util/pm_strpbrk.h", "include/prism/version.h", "lib/prism.rb", "lib/prism/compiler.rb", @@ -184,11 +187,11 @@ Gem::Specification.new do |spec| "src/serialize.c", "src/static_literals.c", "src/strncasecmp.c", + "src/strpbrk.c", "src/token_type.c", "src/util/pm_constant_pool.c", "src/util/pm_list.c", - "src/util/pm_string.c", - "src/util/pm_strpbrk.c" + "src/util/pm_string.c" ] spec.extensions = ["ext/prism/extconf.rb"] diff --git a/src/prism.c b/src/prism.c index 4633ec6fb4..d7e7d661ff 100644 --- a/src/prism.c +++ b/src/prism.c @@ -1,9 +1,12 @@ #include "prism.h" +#include "prism/internal/accel.h" #include "prism/internal/arena.h" +#include "prism/internal/bit.h" #include "prism/internal/char.h" #include "prism/internal/integer.h" #include "prism/internal/line_offset_list.h" +#include "prism/internal/strpbrk.h" #include "prism/node_new.h" diff --git a/src/util/pm_strpbrk.c b/src/strpbrk.c similarity index 98% rename from src/util/pm_strpbrk.c rename to src/strpbrk.c index fdd2ab4567..b5a2089875 100644 --- a/src/util/pm_strpbrk.c +++ b/src/strpbrk.c @@ -1,4 +1,13 @@ -#include "prism/util/pm_strpbrk.h" +#include "prism/internal/strpbrk.h" + +#include "prism/attribute/unused.h" +#include "prism/internal/accel.h" +#include "prism/internal/bit.h" +#include "prism/diagnostic.h" + +#include +#include +#include /** * Add an invalid multibyte character error to the parser. From af683324eaa499a6a8793a0ef411c308fcf37d7a Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 17 Mar 2026 11:00:14 -0400 Subject: [PATCH 016/100] Split up diagnostic headers into public and internal --- include/prism/internal/diagnostic.h | 38 +++++++++++++++++++++++ prism.gemspec | 1 + src/prism.c | 1 + src/regexp.c | 3 +- src/strpbrk.c | 2 +- templates/include/prism/diagnostic.h.erb | 39 ------------------------ 6 files changed, 43 insertions(+), 41 deletions(-) create mode 100644 include/prism/internal/diagnostic.h diff --git a/include/prism/internal/diagnostic.h b/include/prism/internal/diagnostic.h new file mode 100644 index 0000000000..b06ab69124 --- /dev/null +++ b/include/prism/internal/diagnostic.h @@ -0,0 +1,38 @@ +/** + * @file internal/diagnostic.h + * + * A list of diagnostics generated during parsing. + */ +#ifndef PRISM_INTERNAL_DIAGNOSTIC_H +#define PRISM_INTERNAL_DIAGNOSTIC_H + +#include "prism/diagnostic.h" +#include "prism/arena.h" +#include "prism/util/pm_list.h" + +/** + * Append a diagnostic to the given list of diagnostics that is using shared + * memory for its message. + * + * @param arena The arena to allocate from. + * @param list The list to append to. + * @param start The source offset of the start of the diagnostic. + * @param length The length of the diagnostic. + * @param diag_id The diagnostic ID. + */ +void pm_diagnostic_list_append(pm_arena_t *arena, pm_list_t *list, uint32_t start, uint32_t length, pm_diagnostic_id_t diag_id); + +/** + * Append a diagnostic to the given list of diagnostics that is using a format + * string for its message. + * + * @param arena The arena to allocate from. + * @param list The list to append to. + * @param start The source offset of the start of the diagnostic. + * @param length The length of the diagnostic. + * @param diag_id The diagnostic ID. + * @param ... The arguments to the format string for the message. + */ +void pm_diagnostic_list_append_format(pm_arena_t *arena, pm_list_t *list, uint32_t start, uint32_t length, pm_diagnostic_id_t diag_id, ...); + +#endif diff --git a/prism.gemspec b/prism.gemspec index 5ba050cf5d..2bbce41c98 100644 --- a/prism.gemspec +++ b/prism.gemspec @@ -74,6 +74,7 @@ Gem::Specification.new do |spec| "include/prism/internal/bit.h", "include/prism/internal/buffer.h", "include/prism/internal/char.h", + "include/prism/internal/diagnostic.h", "include/prism/internal/integer.h", "include/prism/internal/line_offset_list.h", "include/prism/internal/memchr.h", diff --git a/src/prism.c b/src/prism.c index d7e7d661ff..8365b8bec2 100644 --- a/src/prism.c +++ b/src/prism.c @@ -4,6 +4,7 @@ #include "prism/internal/arena.h" #include "prism/internal/bit.h" #include "prism/internal/char.h" +#include "prism/internal/diagnostic.h" #include "prism/internal/integer.h" #include "prism/internal/line_offset_list.h" #include "prism/internal/strpbrk.h" diff --git a/src/regexp.c b/src/regexp.c index 60ebcd86ee..9d8584ac17 100644 --- a/src/regexp.c +++ b/src/regexp.c @@ -1,8 +1,9 @@ #include "prism/regexp.h" + #include "prism/internal/buffer.h" #include "prism/internal/char.h" +#include "prism/internal/diagnostic.h" #include "prism/internal/strncasecmp.h" -#include "prism/diagnostic.h" /** The maximum depth of nested groups allowed in a regular expression. */ #define PM_REGEXP_PARSE_DEPTH_MAX 4096 diff --git a/src/strpbrk.c b/src/strpbrk.c index b5a2089875..fe7a1ab67b 100644 --- a/src/strpbrk.c +++ b/src/strpbrk.c @@ -3,7 +3,7 @@ #include "prism/attribute/unused.h" #include "prism/internal/accel.h" #include "prism/internal/bit.h" -#include "prism/diagnostic.h" +#include "prism/internal/diagnostic.h" #include #include diff --git a/templates/include/prism/diagnostic.h.erb b/templates/include/prism/diagnostic.h.erb index 2982a46587..5560ea54e1 100644 --- a/templates/include/prism/diagnostic.h.erb +++ b/templates/include/prism/diagnostic.h.erb @@ -6,15 +6,9 @@ #ifndef PRISM_DIAGNOSTIC_H #define PRISM_DIAGNOSTIC_H -#include "prism/defines.h" -#include "prism/arena.h" #include "prism/ast.h" #include "prism/util/pm_list.h" -#include -#include -#include - /** * The diagnostic IDs of all of the diagnostics, used to communicate the types * of errors between the parser and the user. @@ -81,37 +75,4 @@ typedef enum { PM_WARNING_LEVEL_VERBOSE = 1 } pm_warning_level_t; -/** - * Get the human-readable name of the given diagnostic ID. - * - * @param diag_id The diagnostic ID. - * @return The human-readable name of the diagnostic ID. - */ -const char * pm_diagnostic_id_human(pm_diagnostic_id_t diag_id); - -/** - * Append a diagnostic to the given list of diagnostics that is using shared - * memory for its message. - * - * @param arena The arena to allocate from. - * @param list The list to append to. - * @param start The source offset of the start of the diagnostic. - * @param length The length of the diagnostic. - * @param diag_id The diagnostic ID. - */ -void pm_diagnostic_list_append(pm_arena_t *arena, pm_list_t *list, uint32_t start, uint32_t length, pm_diagnostic_id_t diag_id); - -/** - * Append a diagnostic to the given list of diagnostics that is using a format - * string for its message. - * - * @param arena The arena to allocate from. - * @param list The list to append to. - * @param start The source offset of the start of the diagnostic. - * @param length The length of the diagnostic. - * @param diag_id The diagnostic ID. - * @param ... The arguments to the format string for the message. - */ -void pm_diagnostic_list_append_format(pm_arena_t *arena, pm_list_t *list, uint32_t start, uint32_t length, pm_diagnostic_id_t diag_id, ...); - #endif From cc97110ba1b3fe85d6b4ac2359faec6f47be28a7 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 17 Mar 2026 11:02:08 -0400 Subject: [PATCH 017/100] Move constant pool out of utils --- include/prism/{util/pm_constant_pool.h => constant_pool.h} | 2 +- include/prism/parser.h | 2 +- prism.gemspec | 4 ++-- src/{util/pm_constant_pool.c => constant_pool.c} | 3 ++- templates/include/prism/ast.h.erb | 2 +- 5 files changed, 7 insertions(+), 6 deletions(-) rename include/prism/{util/pm_constant_pool.h => constant_pool.h} (99%) rename src/{util/pm_constant_pool.c => constant_pool.c} (99%) diff --git a/include/prism/util/pm_constant_pool.h b/include/prism/constant_pool.h similarity index 99% rename from include/prism/util/pm_constant_pool.h rename to include/prism/constant_pool.h index 22599cdec1..7ffa2efec6 100644 --- a/include/prism/util/pm_constant_pool.h +++ b/include/prism/constant_pool.h @@ -1,5 +1,5 @@ /** - * @file pm_constant_pool.h + * @file constant_pool.h * * A data structure that stores a set of strings. * diff --git a/include/prism/parser.h b/include/prism/parser.h index 0922a2069a..312e8bd0cf 100644 --- a/include/prism/parser.h +++ b/include/prism/parser.h @@ -9,11 +9,11 @@ #include "prism/defines.h" #include "prism/arena.h" #include "prism/ast.h" +#include "prism/constant_pool.h" #include "prism/encoding.h" #include "prism/line_offset_list.h" #include "prism/options.h" #include "prism/static_literals.h" -#include "prism/util/pm_constant_pool.h" #include "prism/util/pm_list.h" #include "prism/util/pm_string.h" diff --git a/prism.gemspec b/prism.gemspec index 2bbce41c98..316d36ef22 100644 --- a/prism.gemspec +++ b/prism.gemspec @@ -56,6 +56,7 @@ Gem::Specification.new do |spec| "include/prism/arena.h", "include/prism/ast.h", "include/prism/buffer.h", + "include/prism/constant_pool.h", "include/prism/debug_allocator.h", "include/prism/defines.h", "include/prism/diagnostic.h", @@ -80,7 +81,6 @@ Gem::Specification.new do |spec| "include/prism/internal/memchr.h", "include/prism/internal/strncasecmp.h", "include/prism/internal/strpbrk.h", - "include/prism/util/pm_constant_pool.h", "include/prism/util/pm_list.h", "include/prism/util/pm_string.h", "include/prism/version.h", @@ -175,6 +175,7 @@ Gem::Specification.new do |spec| "src/arena.c", "src/buffer.c", "src/char.c", + "src/constant_pool.c", "src/diagnostic.c", "src/encoding.c", "src/integer.c", @@ -190,7 +191,6 @@ Gem::Specification.new do |spec| "src/strncasecmp.c", "src/strpbrk.c", "src/token_type.c", - "src/util/pm_constant_pool.c", "src/util/pm_list.c", "src/util/pm_string.c" ] diff --git a/src/util/pm_constant_pool.c b/src/constant_pool.c similarity index 99% rename from src/util/pm_constant_pool.c rename to src/constant_pool.c index 679d3a6b86..117b295d9b 100644 --- a/src/util/pm_constant_pool.c +++ b/src/constant_pool.c @@ -1,4 +1,5 @@ -#include "prism/util/pm_constant_pool.h" +#include "prism/constant_pool.h" + #include "prism/internal/arena.h" /** diff --git a/templates/include/prism/ast.h.erb b/templates/include/prism/ast.h.erb index 2026d3a6c6..48c757a0e6 100644 --- a/templates/include/prism/ast.h.erb +++ b/templates/include/prism/ast.h.erb @@ -9,7 +9,7 @@ #define PRISM_AST_H #include "prism/defines.h" -#include "prism/util/pm_constant_pool.h" +#include "prism/constant_pool.h" #include "prism/util/pm_string.h" #include "prism/integer.h" From 281244ff57131d25b95582fadf560d17528aecee Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 17 Mar 2026 11:06:28 -0400 Subject: [PATCH 018/100] Split up constant pool headers between public and internal --- include/prism/constant_pool.h | 117 +---------------------- include/prism/internal/constant_pool.h | 124 +++++++++++++++++++++++++ prism.gemspec | 1 + src/constant_pool.c | 4 + src/prism.c | 1 + templates/src/node.c.erb | 4 +- templates/src/prettyprint.c.erb | 4 +- 7 files changed, 137 insertions(+), 118 deletions(-) create mode 100644 include/prism/internal/constant_pool.h diff --git a/include/prism/constant_pool.h b/include/prism/constant_pool.h index 7ffa2efec6..cc426bb0ab 100644 --- a/include/prism/constant_pool.h +++ b/include/prism/constant_pool.h @@ -10,14 +10,8 @@ #ifndef PRISM_CONSTANT_POOL_H #define PRISM_CONSTANT_POOL_H -#include "prism/defines.h" -#include "prism/arena.h" - -#include -#include +#include #include -#include -#include /** * When we allocate constants into the pool, we reserve 0 to mean that the slot @@ -45,49 +39,6 @@ typedef struct { pm_constant_id_t *ids; } pm_constant_id_list_t; -/** - * Initialize a list of constant ids. - * - * @param list The list to initialize. - */ -void pm_constant_id_list_init(pm_constant_id_list_t *list); - -/** - * Initialize a list of constant ids with a given capacity. - * - * @param arena The arena to allocate from. - * @param list The list to initialize. - * @param capacity The initial capacity of the list. - */ -void pm_constant_id_list_init_capacity(pm_arena_t *arena, pm_constant_id_list_t *list, size_t capacity); - -/** - * Append a constant id to a list of constant ids. - * - * @param arena The arena to allocate from. - * @param list The list to append to. - * @param id The id to append. - */ -void pm_constant_id_list_append(pm_arena_t *arena, pm_constant_id_list_t *list, pm_constant_id_t id); - -/** - * Insert a constant id into a list of constant ids at the specified index. - * - * @param list The list to insert into. - * @param index The index at which to insert. - * @param id The id to insert. - */ -void pm_constant_id_list_insert(pm_constant_id_list_t *list, size_t index, pm_constant_id_t id); - -/** - * Checks if the current constant id list includes the given constant id. - * - * @param list The list to check. - * @param id The id to check for. - * @return Whether the list includes the given id. - */ -bool pm_constant_id_list_includes(pm_constant_id_list_t *list, pm_constant_id_t id); - /** * The type of bucket in the constant pool hash map. This determines how the * bucket should be freed. @@ -148,70 +99,4 @@ typedef struct { uint32_t capacity; } pm_constant_pool_t; -/** - * Initialize a new constant pool with a given capacity. - * - * @param arena The arena to allocate from. - * @param pool The pool to initialize. - * @param capacity The initial capacity of the pool. - */ -void pm_constant_pool_init(pm_arena_t *arena, pm_constant_pool_t *pool, uint32_t capacity); - -/** - * Return a pointer to the constant indicated by the given constant id. - * - * @param pool The pool to get the constant from. - * @param constant_id The id of the constant to get. - * @return A pointer to the constant. - */ -pm_constant_t * pm_constant_pool_id_to_constant(const pm_constant_pool_t *pool, pm_constant_id_t constant_id); - -/** - * Find a constant in a constant pool. Returns the id of the constant, or 0 if - * the constant is not found. - * - * @param pool The pool to find the constant in. - * @param start A pointer to the start of the constant. - * @param length The length of the constant. - * @return The id of the constant. - */ -pm_constant_id_t pm_constant_pool_find(const pm_constant_pool_t *pool, const uint8_t *start, size_t length); - -/** - * Insert a constant into a constant pool that is a slice of a source string. - * Returns the id of the constant, or 0 if any potential calls to resize fail. - * - * @param arena The arena to allocate from. - * @param pool The pool to insert the constant into. - * @param start A pointer to the start of the constant. - * @param length The length of the constant. - * @return The id of the constant. - */ -pm_constant_id_t pm_constant_pool_insert_shared(pm_arena_t *arena, pm_constant_pool_t *pool, const uint8_t *start, size_t length); - -/** - * Insert a constant into a constant pool from memory that is now owned by the - * constant pool. Returns the id of the constant, or 0 if any potential calls to - * resize fail. - * - * @param arena The arena to allocate from. - * @param pool The pool to insert the constant into. - * @param start A pointer to the start of the constant. - * @param length The length of the constant. - * @return The id of the constant. - */ -pm_constant_id_t pm_constant_pool_insert_owned(pm_arena_t *arena, pm_constant_pool_t *pool, uint8_t *start, size_t length); - -/** - * Insert a constant into a constant pool from memory that is constant. Returns - * the id of the constant, or 0 if any potential calls to resize fail. - * - * @param arena The arena to allocate from. - * @param pool The pool to insert the constant into. - * @param start A pointer to the start of the constant. - * @param length The length of the constant. - * @return The id of the constant. - */ -pm_constant_id_t pm_constant_pool_insert_constant(pm_arena_t *arena, pm_constant_pool_t *pool, const uint8_t *start, size_t length); - #endif diff --git a/include/prism/internal/constant_pool.h b/include/prism/internal/constant_pool.h new file mode 100644 index 0000000000..b204dfe39f --- /dev/null +++ b/include/prism/internal/constant_pool.h @@ -0,0 +1,124 @@ +/** + * @file internal/constant_pool.h + * + * A data structure that stores a set of strings. + * + * Each string is assigned a unique id, which can be used to compare strings for + * equality. This comparison ends up being much faster than strcmp, since it + * only requires a single integer comparison. + */ +#ifndef PRISM_INTERNAL_CONSTANT_POOL_H +#define PRISM_INTERNAL_CONSTANT_POOL_H + +#include "prism/constant_pool.h" + +/** + * Initialize a list of constant ids. + * + * @param list The list to initialize. + */ +void pm_constant_id_list_init(pm_constant_id_list_t *list); + +/** + * Initialize a list of constant ids with a given capacity. + * + * @param arena The arena to allocate from. + * @param list The list to initialize. + * @param capacity The initial capacity of the list. + */ +void pm_constant_id_list_init_capacity(pm_arena_t *arena, pm_constant_id_list_t *list, size_t capacity); + +/** + * Append a constant id to a list of constant ids. + * + * @param arena The arena to allocate from. + * @param list The list to append to. + * @param id The id to append. + */ +void pm_constant_id_list_append(pm_arena_t *arena, pm_constant_id_list_t *list, pm_constant_id_t id); + +/** + * Insert a constant id into a list of constant ids at the specified index. + * + * @param list The list to insert into. + * @param index The index at which to insert. + * @param id The id to insert. + */ +void pm_constant_id_list_insert(pm_constant_id_list_t *list, size_t index, pm_constant_id_t id); + +/** + * Checks if the current constant id list includes the given constant id. + * + * @param list The list to check. + * @param id The id to check for. + * @return Whether the list includes the given id. + */ +bool pm_constant_id_list_includes(pm_constant_id_list_t *list, pm_constant_id_t id); + +/** + * Initialize a new constant pool with a given capacity. + * + * @param arena The arena to allocate from. + * @param pool The pool to initialize. + * @param capacity The initial capacity of the pool. + */ +void pm_constant_pool_init(pm_arena_t *arena, pm_constant_pool_t *pool, uint32_t capacity); + +/** + * Return a pointer to the constant indicated by the given constant id. + * + * @param pool The pool to get the constant from. + * @param constant_id The id of the constant to get. + * @return A pointer to the constant. + */ +pm_constant_t * pm_constant_pool_id_to_constant(const pm_constant_pool_t *pool, pm_constant_id_t constant_id); + +/** + * Find a constant in a constant pool. Returns the id of the constant, or 0 if + * the constant is not found. + * + * @param pool The pool to find the constant in. + * @param start A pointer to the start of the constant. + * @param length The length of the constant. + * @return The id of the constant. + */ +pm_constant_id_t pm_constant_pool_find(const pm_constant_pool_t *pool, const uint8_t *start, size_t length); + +/** + * Insert a constant into a constant pool that is a slice of a source string. + * Returns the id of the constant, or 0 if any potential calls to resize fail. + * + * @param arena The arena to allocate from. + * @param pool The pool to insert the constant into. + * @param start A pointer to the start of the constant. + * @param length The length of the constant. + * @return The id of the constant. + */ +pm_constant_id_t pm_constant_pool_insert_shared(pm_arena_t *arena, pm_constant_pool_t *pool, const uint8_t *start, size_t length); + +/** + * Insert a constant into a constant pool from memory that is now owned by the + * constant pool. Returns the id of the constant, or 0 if any potential calls to + * resize fail. + * + * @param arena The arena to allocate from. + * @param pool The pool to insert the constant into. + * @param start A pointer to the start of the constant. + * @param length The length of the constant. + * @return The id of the constant. + */ +pm_constant_id_t pm_constant_pool_insert_owned(pm_arena_t *arena, pm_constant_pool_t *pool, uint8_t *start, size_t length); + +/** + * Insert a constant into a constant pool from memory that is constant. Returns + * the id of the constant, or 0 if any potential calls to resize fail. + * + * @param arena The arena to allocate from. + * @param pool The pool to insert the constant into. + * @param start A pointer to the start of the constant. + * @param length The length of the constant. + * @return The id of the constant. + */ +pm_constant_id_t pm_constant_pool_insert_constant(pm_arena_t *arena, pm_constant_pool_t *pool, const uint8_t *start, size_t length); + +#endif diff --git a/prism.gemspec b/prism.gemspec index 316d36ef22..644bf14e0a 100644 --- a/prism.gemspec +++ b/prism.gemspec @@ -75,6 +75,7 @@ Gem::Specification.new do |spec| "include/prism/internal/bit.h", "include/prism/internal/buffer.h", "include/prism/internal/char.h", + "include/prism/internal/constant_pool.h", "include/prism/internal/diagnostic.h", "include/prism/internal/integer.h", "include/prism/internal/line_offset_list.h", diff --git a/src/constant_pool.c b/src/constant_pool.c index 117b295d9b..106566df90 100644 --- a/src/constant_pool.c +++ b/src/constant_pool.c @@ -1,7 +1,11 @@ #include "prism/constant_pool.h" +#include "prism/attribute/align.h" #include "prism/internal/arena.h" +#include +#include + /** * Initialize a list of constant ids. */ diff --git a/src/prism.c b/src/prism.c index 8365b8bec2..49d5ce018e 100644 --- a/src/prism.c +++ b/src/prism.c @@ -4,6 +4,7 @@ #include "prism/internal/arena.h" #include "prism/internal/bit.h" #include "prism/internal/char.h" +#include "prism/internal/constant_pool.h" #include "prism/internal/diagnostic.h" #include "prism/internal/integer.h" #include "prism/internal/line_offset_list.h" diff --git a/templates/src/node.c.erb b/templates/src/node.c.erb index 30e2d6fc58..8f89c500f8 100644 --- a/templates/src/node.c.erb +++ b/templates/src/node.c.erb @@ -1,7 +1,9 @@ #line <%= __LINE__ + 1 %> "prism/templates/src/<%= File.basename(__FILE__) %>" -#include "prism/internal/integer.h" #include "prism/node.h" +#include "prism/internal/constant_pool.h" +#include "prism/internal/integer.h" + /** * Attempts to grow the node list to the next size. If there is already * capacity in the list, this function does nothing. Otherwise it allocates a diff --git a/templates/src/prettyprint.c.erb b/templates/src/prettyprint.c.erb index aefb092df2..60665faba6 100644 --- a/templates/src/prettyprint.c.erb +++ b/templates/src/prettyprint.c.erb @@ -1,7 +1,9 @@ <%# encoding: ASCII -%> +#include "prism/prettyprint.h" + #include "prism/internal/buffer.h" +#include "prism/internal/constant_pool.h" #include "prism/internal/integer.h" -#include "prism/prettyprint.h" // We optionally support pretty printing nodes. For systems that don't want or // need this functionality, it can be turned off with the From b27fd8276d448be62c8399cfec5c2c255e08db57 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 17 Mar 2026 11:14:37 -0400 Subject: [PATCH 019/100] Move strings out of util --- include/prism/defines.h | 24 +------------- include/prism/internal/files.h | 32 +++++++++++++++++++ include/prism/options.h | 2 +- include/prism/parser.h | 2 +- include/prism/regexp.h | 2 +- include/prism/{util/pm_string.h => strings.h} | 6 ++-- lib/prism/ffi.rb | 2 +- prism.gemspec | 6 ++-- src/{util/pm_string.c => strings.c} | 2 +- templates/include/prism/ast.h.erb | 2 +- 10 files changed, 45 insertions(+), 35 deletions(-) create mode 100644 include/prism/internal/files.h rename include/prism/{util/pm_string.h => strings.h} (98%) rename src/{util/pm_string.c => strings.c} (99%) diff --git a/include/prism/defines.h b/include/prism/defines.h index ad1cf2b75c..1bb6c47e86 100644 --- a/include/prism/defines.h +++ b/include/prism/defines.h @@ -19,6 +19,7 @@ #include "prism/allocator.h" #include "prism/internal/accel.h" #include "prism/internal/bit.h" +#include "prism/internal/files.h" #include #include @@ -81,29 +82,6 @@ # define PM_STATIC_ASSERT(line, condition, message) typedef char PM_CONCATENATE(static_assert_, line)[(condition) ? 1 : -1] #endif -/** - * In general, libc for embedded systems does not support memory-mapped files. - * If the target platform is POSIX or Windows, we can map a file in memory and - * read it in a more efficient manner. - */ -#ifdef _WIN32 -# define PRISM_HAS_MMAP -#else -# include -# ifdef _POSIX_MAPPED_FILES -# define PRISM_HAS_MMAP -# endif -#endif - -/** - * If PRISM_HAS_NO_FILESYSTEM is defined, then we want to exclude all filesystem - * related code from the library. All filesystem related code should be guarded - * by PRISM_HAS_FILESYSTEM. - */ -#ifndef PRISM_HAS_NO_FILESYSTEM -# define PRISM_HAS_FILESYSTEM -#endif - /** * isinf on POSIX systems it accepts a float, a double, or a long double. * But mingw didn't provide an isinf macro, only an isinf function that only diff --git a/include/prism/internal/files.h b/include/prism/internal/files.h new file mode 100644 index 0000000000..bb00ae4232 --- /dev/null +++ b/include/prism/internal/files.h @@ -0,0 +1,32 @@ +/** + * @file internal/files.h + * + * Platform detection for mmap and filesystem support. + */ +#ifndef PRISM_INTERNAL_FILES_H +#define PRISM_INTERNAL_FILES_H + +/** + * In general, libc for embedded systems does not support memory-mapped files. + * If the target platform is POSIX or Windows, we can map a file in memory and + * read it in a more efficient manner. + */ +#ifdef _WIN32 +# define PRISM_HAS_MMAP +#else +# include +# ifdef _POSIX_MAPPED_FILES +# define PRISM_HAS_MMAP +# endif +#endif + +/** + * If PRISM_HAS_NO_FILESYSTEM is defined, then we want to exclude all filesystem + * related code from the library. All filesystem related code should be guarded + * by PRISM_HAS_FILESYSTEM. + */ +#ifndef PRISM_HAS_NO_FILESYSTEM +# define PRISM_HAS_FILESYSTEM +#endif + +#endif diff --git a/include/prism/options.h b/include/prism/options.h index 41f3bc7f42..cf144f649c 100644 --- a/include/prism/options.h +++ b/include/prism/options.h @@ -7,7 +7,7 @@ #define PRISM_OPTIONS_H #include "prism/defines.h" -#include "prism/util/pm_string.h" +#include "prism/strings.h" #include #include diff --git a/include/prism/parser.h b/include/prism/parser.h index 312e8bd0cf..f6037c384c 100644 --- a/include/prism/parser.h +++ b/include/prism/parser.h @@ -14,8 +14,8 @@ #include "prism/line_offset_list.h" #include "prism/options.h" #include "prism/static_literals.h" +#include "prism/strings.h" #include "prism/util/pm_list.h" -#include "prism/util/pm_string.h" #include diff --git a/include/prism/regexp.h b/include/prism/regexp.h index 60a84b9838..9baafe6d36 100644 --- a/include/prism/regexp.h +++ b/include/prism/regexp.h @@ -10,7 +10,7 @@ #include "prism/parser.h" #include "prism/encoding.h" #include "prism/internal/memchr.h" -#include "prism/util/pm_string.h" +#include "prism/strings.h" #include #include diff --git a/include/prism/util/pm_string.h b/include/prism/strings.h similarity index 98% rename from include/prism/util/pm_string.h rename to include/prism/strings.h index 76942180b6..d3fdcf6983 100644 --- a/include/prism/util/pm_string.h +++ b/include/prism/strings.h @@ -1,10 +1,10 @@ /** - * @file pm_string.h + * @file strings.h * * A generic string type that can have various ownership semantics. */ -#ifndef PRISM_STRING_H -#define PRISM_STRING_H +#ifndef PRISM_STRINGS_H +#define PRISM_STRINGS_H #include "prism/defines.h" diff --git a/lib/prism/ffi.rb b/lib/prism/ffi.rb index 72187fa490..eb8cf3f4ca 100644 --- a/lib/prism/ffi.rb +++ b/lib/prism/ffi.rb @@ -116,7 +116,7 @@ def self.load_exported_functions_from(header, *functions, callbacks) ) load_exported_functions_from( - "prism/util/pm_string.h", + "prism/strings.h", "pm_string_mapped_init", "pm_string_free", "pm_string_source", diff --git a/prism.gemspec b/prism.gemspec index 644bf14e0a..2c6d4ad9da 100644 --- a/prism.gemspec +++ b/prism.gemspec @@ -70,6 +70,7 @@ Gem::Specification.new do |spec| "include/prism/prettyprint.h", "include/prism/regexp.h", "include/prism/static_literals.h", + "include/prism/strings.h", "include/prism/internal/accel.h", "include/prism/internal/arena.h", "include/prism/internal/bit.h", @@ -83,7 +84,6 @@ Gem::Specification.new do |spec| "include/prism/internal/strncasecmp.h", "include/prism/internal/strpbrk.h", "include/prism/util/pm_list.h", - "include/prism/util/pm_string.h", "include/prism/version.h", "lib/prism.rb", "lib/prism/compiler.rb", @@ -189,11 +189,11 @@ Gem::Specification.new do |spec| "src/regexp.c", "src/serialize.c", "src/static_literals.c", + "src/strings.c", "src/strncasecmp.c", "src/strpbrk.c", "src/token_type.c", - "src/util/pm_list.c", - "src/util/pm_string.c" + "src/util/pm_list.c" ] spec.extensions = ["ext/prism/extconf.rb"] diff --git a/src/util/pm_string.c b/src/strings.c similarity index 99% rename from src/util/pm_string.c rename to src/strings.c index c2c85e1614..1f5220a0a6 100644 --- a/src/util/pm_string.c +++ b/src/strings.c @@ -1,4 +1,4 @@ -#include "prism/util/pm_string.h" +#include "prism/strings.h" static const uint8_t empty_source[] = ""; diff --git a/templates/include/prism/ast.h.erb b/templates/include/prism/ast.h.erb index 48c757a0e6..783eaca2dd 100644 --- a/templates/include/prism/ast.h.erb +++ b/templates/include/prism/ast.h.erb @@ -10,8 +10,8 @@ #include "prism/defines.h" #include "prism/constant_pool.h" -#include "prism/util/pm_string.h" #include "prism/integer.h" +#include "prism/strings.h" #include #include From 60e105fc0c566c71dbb39f8dc7e3b0cacc701a8c Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 17 Mar 2026 11:25:21 -0400 Subject: [PATCH 020/100] Split up public and internal strings headers --- include/prism/defines.h | 5 ++- include/prism/{internal => }/files.h | 6 +-- include/prism/internal/strings.h | 55 +++++++++++++++++++++++ include/prism/strings.h | 67 +++------------------------- prism.gemspec | 2 + src/options.c | 4 ++ src/prism.c | 1 + src/regexp.c | 1 + src/static_literals.c | 7 ++- src/strings.c | 20 ++++++++- 10 files changed, 99 insertions(+), 69 deletions(-) rename include/prism/{internal => }/files.h (88%) create mode 100644 include/prism/internal/strings.h diff --git a/include/prism/defines.h b/include/prism/defines.h index 1bb6c47e86..04082e9c29 100644 --- a/include/prism/defines.h +++ b/include/prism/defines.h @@ -16,10 +16,11 @@ #include "prism/attribute/format.h" #include "prism/attribute/unused.h" -#include "prism/allocator.h" #include "prism/internal/accel.h" #include "prism/internal/bit.h" -#include "prism/internal/files.h" + +#include "prism/allocator.h" +#include "prism/files.h" #include #include diff --git a/include/prism/internal/files.h b/include/prism/files.h similarity index 88% rename from include/prism/internal/files.h rename to include/prism/files.h index bb00ae4232..8a74e03be8 100644 --- a/include/prism/internal/files.h +++ b/include/prism/files.h @@ -1,10 +1,10 @@ /** - * @file internal/files.h + * @file files.h * * Platform detection for mmap and filesystem support. */ -#ifndef PRISM_INTERNAL_FILES_H -#define PRISM_INTERNAL_FILES_H +#ifndef PRISM_FILES_H +#define PRISM_FILES_H /** * In general, libc for embedded systems does not support memory-mapped files. diff --git a/include/prism/internal/strings.h b/include/prism/internal/strings.h new file mode 100644 index 0000000000..f46aa86a81 --- /dev/null +++ b/include/prism/internal/strings.h @@ -0,0 +1,55 @@ +/** + * @file internal/strings.h + * + * A generic string type that can have various ownership semantics. + */ +#ifndef PRISM_INTERNAL_STRINGS_H +#define PRISM_INTERNAL_STRINGS_H + +#include "prism/strings.h" + +/** + * Defines an empty string. This is useful for initializing a string that will + * be filled in later. + */ +#define PM_STRING_EMPTY ((pm_string_t) { .type = PM_STRING_CONSTANT, .source = NULL, .length = 0 }) + +/** + * Initialize a shared string that is based on initial input. + * + * @param string The string to initialize. + * @param start The start of the string. + * @param end The end of the string. + */ +void pm_string_shared_init(pm_string_t *string, const uint8_t *start, const uint8_t *end); + +/** + * Initialize an owned string that is responsible for freeing allocated memory. + * + * @param string The string to initialize. + * @param source The source of the string. + * @param length The length of the string. + */ +void pm_string_owned_init(pm_string_t *string, uint8_t *source, size_t length); + +/** + * Ensure the string is owned. If it is not, then reinitialize it as owned and + * copy over the previous source. + * + * @param string The string to ensure is owned. + */ +void pm_string_ensure_owned(pm_string_t *string); + +/** + * Compare the underlying lengths and bytes of two strings. Returns 0 if the + * strings are equal, a negative number if the left string is less than the + * right string, and a positive number if the left string is greater than the + * right string. + * + * @param left The left string to compare. + * @param right The right string to compare. + * @return The comparison result. + */ +int pm_string_compare(const pm_string_t *left, const pm_string_t *right); + +#endif diff --git a/include/prism/strings.h b/include/prism/strings.h index d3fdcf6983..da28b76940 100644 --- a/include/prism/strings.h +++ b/include/prism/strings.h @@ -6,26 +6,11 @@ #ifndef PRISM_STRINGS_H #define PRISM_STRINGS_H -#include "prism/defines.h" +#include "prism/attribute/exported.h" +#include "prism/files.h" -#include -#include -#include #include -#include -#include - -// The following headers are necessary to read files using demand paging. -#ifdef _WIN32 -#include -#elif defined(_POSIX_MAPPED_FILES) -#include -#include -#include -#elif defined(PRISM_HAS_FILESYSTEM) -#include -#include -#endif +#include /** * A generic string type that can have various ownership semantics. @@ -63,30 +48,6 @@ typedef struct { */ PRISM_EXPORTED_FUNCTION size_t pm_string_sizeof(void); -/** - * Defines an empty string. This is useful for initializing a string that will - * be filled in later. - */ -#define PM_STRING_EMPTY ((pm_string_t) { .type = PM_STRING_CONSTANT, .source = NULL, .length = 0 }) - -/** - * Initialize a shared string that is based on initial input. - * - * @param string The string to initialize. - * @param start The start of the string. - * @param end The end of the string. - */ -void pm_string_shared_init(pm_string_t *string, const uint8_t *start, const uint8_t *end); - -/** - * Initialize an owned string that is responsible for freeing allocated memory. - * - * @param string The string to initialize. - * @param source The source of the string. - * @param length The length of the string. - */ -void pm_string_owned_init(pm_string_t *string, uint8_t *source, size_t length); - /** * Initialize a constant string that doesn't own its memory source. * @@ -105,11 +66,13 @@ PRISM_EXPORTED_FUNCTION void pm_string_constant_init(pm_string_t *string, const typedef enum { /** Indicates that the string was successfully initialized. */ PM_STRING_INIT_SUCCESS = 0, + /** * Indicates a generic error from a string_*_init function, where the type * of error should be read from `errno` or `GetLastError()`. */ PM_STRING_INIT_ERROR_GENERIC = 1, + /** * Indicates that the file that was attempted to be opened was a directory. */ @@ -148,26 +111,6 @@ PRISM_EXPORTED_FUNCTION pm_string_init_result_t pm_string_mapped_init(pm_string_ */ PRISM_EXPORTED_FUNCTION pm_string_init_result_t pm_string_file_init(pm_string_t *string, const char *filepath); -/** - * Ensure the string is owned. If it is not, then reinitialize it as owned and - * copy over the previous source. - * - * @param string The string to ensure is owned. - */ -void pm_string_ensure_owned(pm_string_t *string); - -/** - * Compare the underlying lengths and bytes of two strings. Returns 0 if the - * strings are equal, a negative number if the left string is less than the - * right string, and a positive number if the left string is greater than the - * right string. - * - * @param left The left string to compare. - * @param right The right string to compare. - * @return The comparison result. - */ -int pm_string_compare(const pm_string_t *left, const pm_string_t *right); - /** * Returns the length associated with the string. * diff --git a/prism.gemspec b/prism.gemspec index 2c6d4ad9da..dd644ba2c9 100644 --- a/prism.gemspec +++ b/prism.gemspec @@ -61,6 +61,7 @@ Gem::Specification.new do |spec| "include/prism/defines.h", "include/prism/diagnostic.h", "include/prism/encoding.h", + "include/prism/files.h", "include/prism/integer.h", "include/prism/line_offset_list.h", "include/prism/node.h", @@ -82,6 +83,7 @@ Gem::Specification.new do |spec| "include/prism/internal/line_offset_list.h", "include/prism/internal/memchr.h", "include/prism/internal/strncasecmp.h", + "include/prism/internal/strings.h", "include/prism/internal/strpbrk.h", "include/prism/util/pm_list.h", "include/prism/version.h", diff --git a/src/options.c b/src/options.c index c89515964c..42ea38ba83 100644 --- a/src/options.c +++ b/src/options.c @@ -1,5 +1,9 @@ #include "prism/options.h" + #include "prism/internal/char.h" +#include "prism/allocator.h" + +#include /** * Set the shebang callback option on the given options struct. diff --git a/src/prism.c b/src/prism.c index 49d5ce018e..9f6bc2f380 100644 --- a/src/prism.c +++ b/src/prism.c @@ -8,6 +8,7 @@ #include "prism/internal/diagnostic.h" #include "prism/internal/integer.h" #include "prism/internal/line_offset_list.h" +#include "prism/internal/strings.h" #include "prism/internal/strpbrk.h" #include "prism/node_new.h" diff --git a/src/regexp.c b/src/regexp.c index 9d8584ac17..3dfe826104 100644 --- a/src/regexp.c +++ b/src/regexp.c @@ -3,6 +3,7 @@ #include "prism/internal/buffer.h" #include "prism/internal/char.h" #include "prism/internal/diagnostic.h" +#include "prism/internal/strings.h" #include "prism/internal/strncasecmp.h" /** The maximum depth of nested groups allowed in a regular expression. */ diff --git a/src/static_literals.c b/src/static_literals.c index e6d66cd691..3323ea9e1b 100644 --- a/src/static_literals.c +++ b/src/static_literals.c @@ -1,6 +1,11 @@ +#include "prism/static_literals.h" + #include "prism/internal/buffer.h" #include "prism/internal/integer.h" -#include "prism/static_literals.h" +#include "prism/internal/strings.h" +#include "prism/allocator.h" + +#include /** * A small struct used for passing around a subset of the information that is diff --git a/src/strings.c b/src/strings.c index 1f5220a0a6..da7548112b 100644 --- a/src/strings.c +++ b/src/strings.c @@ -1,4 +1,22 @@ -#include "prism/strings.h" +#include "prism/internal/strings.h" + +#include "prism/allocator.h" + +#include +#include +#include + +/* The following headers are necessary to read files using demand paging. */ +#ifdef _WIN32 +#include +#elif defined(_POSIX_MAPPED_FILES) +#include +#include +#include +#elif defined(PRISM_HAS_FILESYSTEM) +#include +#include +#endif static const uint8_t empty_source[] = ""; From c6610459c7d282d1fc4d4e2b65ed94100b258356 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 17 Mar 2026 11:38:29 -0400 Subject: [PATCH 021/100] Move list out of utils --- include/prism/internal/diagnostic.h | 1 - include/prism/{util/pm_list.h => list.h} | 30 +-------------- include/prism/parser.h | 2 +- prism.gemspec | 6 +-- rust/ruby-prism-sys/build/main.rs | 2 - rust/ruby-prism-sys/tests/utils_tests.rs | 20 +--------- src/list.c | 24 ++++++++++++ src/util/pm_list.c | 49 ------------------------ templates/include/prism/diagnostic.h.erb | 11 +++++- templates/src/node.c.erb | 2 + 10 files changed, 43 insertions(+), 104 deletions(-) rename include/prism/{util/pm_list.h => list.h} (75%) create mode 100644 src/list.c delete mode 100644 src/util/pm_list.c diff --git a/include/prism/internal/diagnostic.h b/include/prism/internal/diagnostic.h index b06ab69124..edaaa33a14 100644 --- a/include/prism/internal/diagnostic.h +++ b/include/prism/internal/diagnostic.h @@ -8,7 +8,6 @@ #include "prism/diagnostic.h" #include "prism/arena.h" -#include "prism/util/pm_list.h" /** * Append a diagnostic to the given list of diagnostics that is using shared diff --git a/include/prism/util/pm_list.h b/include/prism/list.h similarity index 75% rename from include/prism/util/pm_list.h rename to include/prism/list.h index f544bb2943..ba9adfba0f 100644 --- a/include/prism/util/pm_list.h +++ b/include/prism/list.h @@ -1,17 +1,12 @@ /** - * @file pm_list.h + * @file list.h * * An abstract linked list. */ #ifndef PRISM_LIST_H #define PRISM_LIST_H -#include "prism/defines.h" - -#include #include -#include -#include /** * This struct represents an abstract linked list that provides common @@ -63,25 +58,13 @@ typedef struct { pm_list_node_t *tail; } pm_list_t; -/** - * Returns true if the given list is empty. - * - * @param list The list to check. - * @return True if the given list is empty, otherwise false. - * - * \public \memberof pm_list_t - */ -PRISM_EXPORTED_FUNCTION bool pm_list_empty_p(pm_list_t *list); - /** * Returns the size of the list. * * @param list The list to check. * @return The size of the list. - * - * \public \memberof pm_list_t */ -PRISM_EXPORTED_FUNCTION size_t pm_list_size(pm_list_t *list); +size_t pm_list_size(pm_list_t *list); /** * Append a node to the given list. @@ -91,13 +74,4 @@ PRISM_EXPORTED_FUNCTION size_t pm_list_size(pm_list_t *list); */ void pm_list_append(pm_list_t *list, pm_list_node_t *node); -/** - * Deallocate the internal state of the given list. - * - * @param list The list to free. - * - * \public \memberof pm_list_t - */ -PRISM_EXPORTED_FUNCTION void pm_list_free(pm_list_t *list); - #endif diff --git a/include/prism/parser.h b/include/prism/parser.h index f6037c384c..6a4a7cade4 100644 --- a/include/prism/parser.h +++ b/include/prism/parser.h @@ -12,10 +12,10 @@ #include "prism/constant_pool.h" #include "prism/encoding.h" #include "prism/line_offset_list.h" +#include "prism/list.h" #include "prism/options.h" #include "prism/static_literals.h" #include "prism/strings.h" -#include "prism/util/pm_list.h" #include diff --git a/prism.gemspec b/prism.gemspec index dd644ba2c9..3394bb6ce7 100644 --- a/prism.gemspec +++ b/prism.gemspec @@ -64,6 +64,7 @@ Gem::Specification.new do |spec| "include/prism/files.h", "include/prism/integer.h", "include/prism/line_offset_list.h", + "include/prism/list.h", "include/prism/node.h", "include/prism/node_new.h", "include/prism/options.h", @@ -85,7 +86,6 @@ Gem::Specification.new do |spec| "include/prism/internal/strncasecmp.h", "include/prism/internal/strings.h", "include/prism/internal/strpbrk.h", - "include/prism/util/pm_list.h", "include/prism/version.h", "lib/prism.rb", "lib/prism/compiler.rb", @@ -183,6 +183,7 @@ Gem::Specification.new do |spec| "src/encoding.c", "src/integer.c", "src/line_offset_list.c", + "src/list.c", "src/memchr.c", "src/node.c", "src/options.c", @@ -194,8 +195,7 @@ Gem::Specification.new do |spec| "src/strings.c", "src/strncasecmp.c", "src/strpbrk.c", - "src/token_type.c", - "src/util/pm_list.c" + "src/token_type.c" ] spec.extensions = ["ext/prism/extconf.rb"] diff --git a/rust/ruby-prism-sys/build/main.rs b/rust/ruby-prism-sys/build/main.rs index 50a8f51d6c..0de2b3caba 100644 --- a/rust/ruby-prism-sys/build/main.rs +++ b/rust/ruby-prism-sys/build/main.rs @@ -149,8 +149,6 @@ fn generate_bindings(ruby_include_path: &Path) -> bindgen::Bindings { // Functions .allowlist_function("pm_arena_free") .allowlist_function("pm_line_offset_list_line_column") - .allowlist_function("pm_list_empty_p") - .allowlist_function("pm_list_free") .allowlist_function("pm_options_command_line_set") .allowlist_function("pm_options_encoding_locked_set") .allowlist_function("pm_options_encoding_set") diff --git a/rust/ruby-prism-sys/tests/utils_tests.rs b/rust/ruby-prism-sys/tests/utils_tests.rs index 13de5e8761..2c42750dd9 100644 --- a/rust/ruby-prism-sys/tests/utils_tests.rs +++ b/rust/ruby-prism-sys/tests/utils_tests.rs @@ -1,7 +1,4 @@ -use std::{ - ffi::{CStr, CString}, - mem::MaybeUninit, -}; +use std::ffi::{CStr, CString}; #[test] fn version_test() { @@ -15,21 +12,6 @@ fn version_test() { assert_eq!(&cstring.to_string_lossy(), "1.9.0"); } -#[test] -fn list_test() { - use ruby_prism_sys::{pm_list_empty_p, pm_list_free, pm_list_t}; - - let mut list = MaybeUninit::::zeroed(); - - unsafe { - let list = list.assume_init_mut(); - - assert!(pm_list_empty_p(list)); - - pm_list_free(list); - } -} - mod string { use ruby_prism_sys::{ pm_string_free, pm_string_length, pm_string_source, pm_string_t, pm_string_t__bindgen_ty_1, PM_STRING_CONSTANT, diff --git a/src/list.c b/src/list.c new file mode 100644 index 0000000000..419fd8952a --- /dev/null +++ b/src/list.c @@ -0,0 +1,24 @@ +#include "prism/list.h" + +/** + * Returns the size of the list. + */ +size_t +pm_list_size(pm_list_t *list) { + return list->size; +} + +/** + * Append a node to the given list. + */ +void +pm_list_append(pm_list_t *list, pm_list_node_t *node) { + if (list->head == NULL) { + list->head = node; + } else { + list->tail->next = node; + } + + list->tail = node; + list->size++; +} diff --git a/src/util/pm_list.c b/src/util/pm_list.c deleted file mode 100644 index 48486d6cf9..0000000000 --- a/src/util/pm_list.c +++ /dev/null @@ -1,49 +0,0 @@ -#include "prism/util/pm_list.h" - -/** - * Returns true if the given list is empty. - */ -bool -pm_list_empty_p(pm_list_t *list) { - return list->head == NULL; -} - -/** - * Returns the size of the list. - */ -size_t -pm_list_size(pm_list_t *list) { - return list->size; -} - -/** - * Append a node to the given list. - */ -void -pm_list_append(pm_list_t *list, pm_list_node_t *node) { - if (list->head == NULL) { - list->head = node; - } else { - list->tail->next = node; - } - - list->tail = node; - list->size++; -} - -/** - * Deallocate the internal state of the given list. - */ -void -pm_list_free(pm_list_t *list) { - pm_list_node_t *node = list->head; - pm_list_node_t *next; - - while (node != NULL) { - next = node->next; - xfree_sized(node, sizeof(pm_list_node_t)); - node = next; - } - - list->size = 0; -} diff --git a/templates/include/prism/diagnostic.h.erb b/templates/include/prism/diagnostic.h.erb index 5560ea54e1..a3964cc40c 100644 --- a/templates/include/prism/diagnostic.h.erb +++ b/templates/include/prism/diagnostic.h.erb @@ -6,8 +6,9 @@ #ifndef PRISM_DIAGNOSTIC_H #define PRISM_DIAGNOSTIC_H +#include "prism/attribute/exported.h" #include "prism/ast.h" -#include "prism/util/pm_list.h" +#include "prism/list.h" /** * The diagnostic IDs of all of the diagnostics, used to communicate the types @@ -75,4 +76,12 @@ typedef enum { PM_WARNING_LEVEL_VERBOSE = 1 } pm_warning_level_t; +/** + * Get the human-readable name of the given diagnostic ID. + * + * @param diag_id The diagnostic ID to get the name of. + * @returns The human-readable name of the given diagnostic ID. + */ +PRISM_EXPORTED_FUNCTION const char * pm_diagnostic_id_human(pm_diagnostic_id_t diag_id); + #endif diff --git a/templates/src/node.c.erb b/templates/src/node.c.erb index 8f89c500f8..4e1b1cdaae 100644 --- a/templates/src/node.c.erb +++ b/templates/src/node.c.erb @@ -4,6 +4,8 @@ #include "prism/internal/constant_pool.h" #include "prism/internal/integer.h" +#include + /** * Attempts to grow the node list to the next size. If there is already * capacity in the list, this function does nothing. Otherwise it allocates a From 4149565d77d4173e81081ece839259d72fbb4451 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 17 Mar 2026 11:39:34 -0400 Subject: [PATCH 022/100] Fully remove util dir --- .github/workflows/cpp-bindings.yml | 2 +- Doxyfile | 2 +- README.md | 1 - ext/prism/extconf.rb | 5 +---- 4 files changed, 3 insertions(+), 7 deletions(-) diff --git a/.github/workflows/cpp-bindings.yml b/.github/workflows/cpp-bindings.yml index 2a53272c4b..5b1bfba795 100644 --- a/.github/workflows/cpp-bindings.yml +++ b/.github/workflows/cpp-bindings.yml @@ -29,6 +29,6 @@ jobs: - name: Compile prism run: bundle exec rake compile - name: Compile C++ - run: g++ -o ./cpp_test cpp/test.cpp build/static/*.o build/static/util/*.o -Iinclude + run: g++ -o ./cpp_test cpp/test.cpp build/static/*.o -Iinclude - name: Run C++ run: ./cpp_test diff --git a/Doxyfile b/Doxyfile index 4ea648ed7a..fca1e8c314 100644 --- a/Doxyfile +++ b/Doxyfile @@ -23,7 +23,7 @@ PROJECT_NAME = "Prism Ruby parser" OUTPUT_DIRECTORY = doc JAVADOC_AUTOBRIEF = YES OPTIMIZE_OUTPUT_FOR_C = YES -INPUT = src src/util include include/prism include/prism/util +INPUT = src include include/prism EXCLUDE = include/prism/debug_allocator.h HTML_OUTPUT = c SORT_MEMBER_DOCS = NO diff --git a/README.md b/README.md index e92ef7bfb3..0805698d93 100644 --- a/README.md +++ b/README.md @@ -44,7 +44,6 @@ The repository contains the infrastructure for both a shared library (libprism) │ └── prism Sample code that uses the Ruby API for documentation purposes ├── sig RBS type signatures for the Ruby library ├── src -│   ├── util various utility files │   └── prism.c main entrypoint for the shared library ├── templates contains ERB templates generated by templates/template.rb │   └── template.rb generates code from the nodes and tokens configured by config.yml diff --git a/ext/prism/extconf.rb b/ext/prism/extconf.rb index ea7cfe963e..9283d62b47 100644 --- a/ext/prism/extconf.rb +++ b/ext/prism/extconf.rb @@ -118,10 +118,7 @@ def add_libprism_source(path) src_list path end -$srcs = src_list("$(srcdir)") + - add_libprism_source("$(srcdir)/../../src") + - add_libprism_source("$(srcdir)/../../src/util") - +$srcs = src_list("$(srcdir)") + add_libprism_source("$(srcdir)/../../src") $headers += Dir["#{$srcdir}/../../include/**/*.h"] # Finally, we'll create the `Makefile` that is going to be used to configure and From 2cd264f1cdef42f5837053f5d4b65e8f5f3adc63 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 17 Mar 2026 11:42:06 -0400 Subject: [PATCH 023/100] Split up list public and internal headers --- include/prism/internal/list.h | 27 +++++++++++++++++++++++++++ include/prism/list.h | 16 ---------------- src/list.c | 2 +- src/prism.c | 1 + templates/src/diagnostic.c.erb | 2 ++ templates/src/serialize.c.erb | 2 ++ 6 files changed, 33 insertions(+), 17 deletions(-) create mode 100644 include/prism/internal/list.h diff --git a/include/prism/internal/list.h b/include/prism/internal/list.h new file mode 100644 index 0000000000..f770b1dd2d --- /dev/null +++ b/include/prism/internal/list.h @@ -0,0 +1,27 @@ +/** + * @file internal/list.h + * + * An abstract linked list. + */ +#ifndef PRISM_INTERNAL_LIST_H +#define PRISM_INTERNAL_LIST_H + +#include "prism/list.h" + +/** + * Returns the size of the list. + * + * @param list The list to check. + * @return The size of the list. + */ +size_t pm_list_size(pm_list_t *list); + +/** + * Append a node to the given list. + * + * @param list The list to append to. + * @param node The node to append. + */ +void pm_list_append(pm_list_t *list, pm_list_node_t *node); + +#endif diff --git a/include/prism/list.h b/include/prism/list.h index ba9adfba0f..c9fb18278c 100644 --- a/include/prism/list.h +++ b/include/prism/list.h @@ -58,20 +58,4 @@ typedef struct { pm_list_node_t *tail; } pm_list_t; -/** - * Returns the size of the list. - * - * @param list The list to check. - * @return The size of the list. - */ -size_t pm_list_size(pm_list_t *list); - -/** - * Append a node to the given list. - * - * @param list The list to append to. - * @param node The node to append. - */ -void pm_list_append(pm_list_t *list, pm_list_node_t *node); - #endif diff --git a/src/list.c b/src/list.c index 419fd8952a..8d4cd1be94 100644 --- a/src/list.c +++ b/src/list.c @@ -1,4 +1,4 @@ -#include "prism/list.h" +#include "prism/internal/list.h" /** * Returns the size of the list. diff --git a/src/prism.c b/src/prism.c index 9f6bc2f380..46a5960179 100644 --- a/src/prism.c +++ b/src/prism.c @@ -8,6 +8,7 @@ #include "prism/internal/diagnostic.h" #include "prism/internal/integer.h" #include "prism/internal/line_offset_list.h" +#include "prism/internal/list.h" #include "prism/internal/strings.h" #include "prism/internal/strpbrk.h" diff --git a/templates/src/diagnostic.c.erb b/templates/src/diagnostic.c.erb index 0d696772d5..80d8b5b261 100644 --- a/templates/src/diagnostic.c.erb +++ b/templates/src/diagnostic.c.erb @@ -1,5 +1,7 @@ #include "prism/diagnostic.h" + #include "prism/internal/arena.h" +#include "prism/internal/list.h" #define PM_DIAGNOSTIC_ID_MAX <%= errors.length + warnings.length %> diff --git a/templates/src/serialize.c.erb b/templates/src/serialize.c.erb index f6da95f031..bf00abb69d 100644 --- a/templates/src/serialize.c.erb +++ b/templates/src/serialize.c.erb @@ -1,5 +1,7 @@ #include "prism.h" +#include "prism/internal/list.h" + // We optionally support serializing to a binary string. For systems that don't // want or need this functionality, it can be turned off with the // PRISM_EXCLUDE_SERIALIZATION define. From f9f9cd55d27e543623222c1c210e2f678fb06b0c Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 17 Mar 2026 11:49:20 -0400 Subject: [PATCH 024/100] Split up encoding public and internal headers --- include/prism/encoding.h | 276 +---------------------------- include/prism/internal/encoding.h | 281 ++++++++++++++++++++++++++++++ include/prism/internal/memchr.h | 2 +- include/prism/regexp.h | 2 +- prism.gemspec | 4 +- src/encoding.c | 7 +- src/strpbrk.c | 1 + 7 files changed, 296 insertions(+), 277 deletions(-) create mode 100644 include/prism/internal/encoding.h diff --git a/include/prism/encoding.h b/include/prism/encoding.h index 99c2397bdc..a2061b65b6 100644 --- a/include/prism/encoding.h +++ b/include/prism/encoding.h @@ -6,278 +6,8 @@ #ifndef PRISM_ENCODING_H #define PRISM_ENCODING_H -#include "prism/defines.h" -#include "prism/internal/strncasecmp.h" - -#include -#include -#include -#include - -/** - * This struct defines the functions necessary to implement the encoding - * interface so we can determine how many bytes the subsequent character takes. - * Each callback should return the number of bytes, or 0 if the next bytes are - * invalid for the encoding and type. - */ -typedef struct { - /** - * Return the number of bytes that the next character takes if it is valid - * in the encoding. Does not read more than n bytes. It is assumed that n is - * at least 1. - */ - size_t (*char_width)(const uint8_t *b, ptrdiff_t n); - - /** - * Return the number of bytes that the next character takes if it is valid - * in the encoding and is alphabetical. Does not read more than n bytes. It - * is assumed that n is at least 1. - */ - size_t (*alpha_char)(const uint8_t *b, ptrdiff_t n); - - /** - * Return the number of bytes that the next character takes if it is valid - * in the encoding and is alphanumeric. Does not read more than n bytes. It - * is assumed that n is at least 1. - */ - size_t (*alnum_char)(const uint8_t *b, ptrdiff_t n); - - /** - * Return true if the next character is valid in the encoding and is an - * uppercase character. Does not read more than n bytes. It is assumed that - * n is at least 1. - */ - bool (*isupper_char)(const uint8_t *b, ptrdiff_t n); - - /** - * The name of the encoding. This should correspond to a value that can be - * passed to Encoding.find in Ruby. - */ - const char *name; - - /** - * Return true if the encoding is a multibyte encoding. - */ - bool multibyte; -} pm_encoding_t; - -/** - * All of the lookup tables use the first bit of each embedded byte to indicate - * whether the codepoint is alphabetical. - */ -#define PRISM_ENCODING_ALPHABETIC_BIT 1 << 0 - -/** - * All of the lookup tables use the second bit of each embedded byte to indicate - * whether the codepoint is alphanumeric. - */ -#define PRISM_ENCODING_ALPHANUMERIC_BIT 1 << 1 - -/** - * All of the lookup tables use the third bit of each embedded byte to indicate - * whether the codepoint is uppercase. - */ -#define PRISM_ENCODING_UPPERCASE_BIT 1 << 2 - -/** - * Return the size of the next character in the UTF-8 encoding. - * - * @param b The bytes to read. - * @param n The number of bytes that can be read. - * @returns The number of bytes that the next character takes if it is valid in - * the encoding, or 0 if it is not. - */ -size_t pm_encoding_utf_8_char_width(const uint8_t *b, ptrdiff_t n); - -/** - * Return the size of the next character in the UTF-8 encoding if it is an - * alphabetical character. - * - * @param b The bytes to read. - * @param n The number of bytes that can be read. - * @returns The number of bytes that the next character takes if it is valid in - * the encoding, or 0 if it is not. - */ -size_t pm_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n); - -/** - * Return the size of the next character in the UTF-8 encoding if it is an - * alphanumeric character. - * - * @param b The bytes to read. - * @param n The number of bytes that can be read. - * @returns The number of bytes that the next character takes if it is valid in - * the encoding, or 0 if it is not. - */ -size_t pm_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n); - -/** - * Return true if the next character in the UTF-8 encoding if it is an uppercase - * character. - * - * @param b The bytes to read. - * @param n The number of bytes that can be read. - * @returns True if the next character is valid in the encoding and is an - * uppercase character, or false if it is not. - */ -bool pm_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n); - -/** - * This lookup table is referenced in both the UTF-8 encoding file and the - * parser directly in order to speed up the default encoding processing. It is - * used to indicate whether a character is alphabetical, alphanumeric, or - * uppercase in unicode mappings. - */ -extern const uint8_t pm_encoding_unicode_table[256]; - -/** - * These are all of the encodings that prism supports. - */ -typedef enum { - PM_ENCODING_UTF_8 = 0, - PM_ENCODING_US_ASCII, - PM_ENCODING_ASCII_8BIT, - PM_ENCODING_EUC_JP, - PM_ENCODING_WINDOWS_31J, - -// We optionally support excluding the full set of encodings to only support the -// minimum necessary to process Ruby code without encoding comments. -#ifndef PRISM_ENCODING_EXCLUDE_FULL - PM_ENCODING_BIG5, - PM_ENCODING_BIG5_HKSCS, - PM_ENCODING_BIG5_UAO, - PM_ENCODING_CESU_8, - PM_ENCODING_CP51932, - PM_ENCODING_CP850, - PM_ENCODING_CP852, - PM_ENCODING_CP855, - PM_ENCODING_CP949, - PM_ENCODING_CP950, - PM_ENCODING_CP951, - PM_ENCODING_EMACS_MULE, - PM_ENCODING_EUC_JP_MS, - PM_ENCODING_EUC_JIS_2004, - PM_ENCODING_EUC_KR, - PM_ENCODING_EUC_TW, - PM_ENCODING_GB12345, - PM_ENCODING_GB18030, - PM_ENCODING_GB1988, - PM_ENCODING_GB2312, - PM_ENCODING_GBK, - PM_ENCODING_IBM437, - PM_ENCODING_IBM720, - PM_ENCODING_IBM737, - PM_ENCODING_IBM775, - PM_ENCODING_IBM852, - PM_ENCODING_IBM855, - PM_ENCODING_IBM857, - PM_ENCODING_IBM860, - PM_ENCODING_IBM861, - PM_ENCODING_IBM862, - PM_ENCODING_IBM863, - PM_ENCODING_IBM864, - PM_ENCODING_IBM865, - PM_ENCODING_IBM866, - PM_ENCODING_IBM869, - PM_ENCODING_ISO_8859_1, - PM_ENCODING_ISO_8859_2, - PM_ENCODING_ISO_8859_3, - PM_ENCODING_ISO_8859_4, - PM_ENCODING_ISO_8859_5, - PM_ENCODING_ISO_8859_6, - PM_ENCODING_ISO_8859_7, - PM_ENCODING_ISO_8859_8, - PM_ENCODING_ISO_8859_9, - PM_ENCODING_ISO_8859_10, - PM_ENCODING_ISO_8859_11, - PM_ENCODING_ISO_8859_13, - PM_ENCODING_ISO_8859_14, - PM_ENCODING_ISO_8859_15, - PM_ENCODING_ISO_8859_16, - PM_ENCODING_KOI8_R, - PM_ENCODING_KOI8_U, - PM_ENCODING_MAC_CENT_EURO, - PM_ENCODING_MAC_CROATIAN, - PM_ENCODING_MAC_CYRILLIC, - PM_ENCODING_MAC_GREEK, - PM_ENCODING_MAC_ICELAND, - PM_ENCODING_MAC_JAPANESE, - PM_ENCODING_MAC_ROMAN, - PM_ENCODING_MAC_ROMANIA, - PM_ENCODING_MAC_THAI, - PM_ENCODING_MAC_TURKISH, - PM_ENCODING_MAC_UKRAINE, - PM_ENCODING_SHIFT_JIS, - PM_ENCODING_SJIS_DOCOMO, - PM_ENCODING_SJIS_KDDI, - PM_ENCODING_SJIS_SOFTBANK, - PM_ENCODING_STATELESS_ISO_2022_JP, - PM_ENCODING_STATELESS_ISO_2022_JP_KDDI, - PM_ENCODING_TIS_620, - PM_ENCODING_UTF8_MAC, - PM_ENCODING_UTF8_DOCOMO, - PM_ENCODING_UTF8_KDDI, - PM_ENCODING_UTF8_SOFTBANK, - PM_ENCODING_WINDOWS_1250, - PM_ENCODING_WINDOWS_1251, - PM_ENCODING_WINDOWS_1252, - PM_ENCODING_WINDOWS_1253, - PM_ENCODING_WINDOWS_1254, - PM_ENCODING_WINDOWS_1255, - PM_ENCODING_WINDOWS_1256, - PM_ENCODING_WINDOWS_1257, - PM_ENCODING_WINDOWS_1258, - PM_ENCODING_WINDOWS_874, -#endif - - PM_ENCODING_MAXIMUM -} pm_encoding_type_t; - -/** - * This is the table of all of the encodings that prism supports. - */ -extern const pm_encoding_t pm_encodings[PM_ENCODING_MAXIMUM]; - -/** - * This is the default UTF-8 encoding. We need a reference to it to quickly - * create parsers. - */ -#define PM_ENCODING_UTF_8_ENTRY (&pm_encodings[PM_ENCODING_UTF_8]) - -/** - * This is the US-ASCII encoding. We need a reference to it to be able to - * compare against it when a string is being created because it could possibly - * need to fall back to ASCII-8BIT. - */ -#define PM_ENCODING_US_ASCII_ENTRY (&pm_encodings[PM_ENCODING_US_ASCII]) - -/** - * This is the ASCII-8BIT encoding. We need a reference to it so that pm_strpbrk - * can compare against it because invalid multibyte characters are not a thing - * in this encoding. It is also needed for handling Regexp encoding flags. - */ -#define PM_ENCODING_ASCII_8BIT_ENTRY (&pm_encodings[PM_ENCODING_ASCII_8BIT]) - -/** - * This is the EUC-JP encoding. We need a reference to it to quickly process - * regular expression modifiers. - */ -#define PM_ENCODING_EUC_JP_ENTRY (&pm_encodings[PM_ENCODING_EUC_JP]) - -/** - * This is the Windows-31J encoding. We need a reference to it to quickly - * process regular expression modifiers. - */ -#define PM_ENCODING_WINDOWS_31J_ENTRY (&pm_encodings[PM_ENCODING_WINDOWS_31J]) - -/** - * Parse the given name of an encoding and return a pointer to the corresponding - * encoding struct if one can be found, otherwise return NULL. - * - * @param start A pointer to the first byte of the name. - * @param end A pointer to the last byte of the name. - * @returns A pointer to the encoding struct if one is found, otherwise NULL. - */ -const pm_encoding_t * pm_encoding_find(const uint8_t *start, const uint8_t *end); +/* The encoding that the parser uses to process the source code. An opaque + * struct that is defined in the implementation file. */ +typedef struct pm_encoding_t pm_encoding_t; #endif diff --git a/include/prism/internal/encoding.h b/include/prism/internal/encoding.h new file mode 100644 index 0000000000..a62c1fd548 --- /dev/null +++ b/include/prism/internal/encoding.h @@ -0,0 +1,281 @@ +/** + * @file internal/encoding.h + * + * The encoding interface and implementations used by the parser. + */ +#ifndef PRISM_INTERNAL_ENCODING_H +#define PRISM_INTERNAL_ENCODING_H + +#include "prism/encoding.h" + +#include +#include +#include + +/** + * This struct defines the functions necessary to implement the encoding + * interface so we can determine how many bytes the subsequent character takes. + * Each callback should return the number of bytes, or 0 if the next bytes are + * invalid for the encoding and type. + */ +struct pm_encoding_t { + /** + * Return the number of bytes that the next character takes if it is valid + * in the encoding. Does not read more than n bytes. It is assumed that n is + * at least 1. + */ + size_t (*char_width)(const uint8_t *b, ptrdiff_t n); + + /** + * Return the number of bytes that the next character takes if it is valid + * in the encoding and is alphabetical. Does not read more than n bytes. It + * is assumed that n is at least 1. + */ + size_t (*alpha_char)(const uint8_t *b, ptrdiff_t n); + + /** + * Return the number of bytes that the next character takes if it is valid + * in the encoding and is alphanumeric. Does not read more than n bytes. It + * is assumed that n is at least 1. + */ + size_t (*alnum_char)(const uint8_t *b, ptrdiff_t n); + + /** + * Return true if the next character is valid in the encoding and is an + * uppercase character. Does not read more than n bytes. It is assumed that + * n is at least 1. + */ + bool (*isupper_char)(const uint8_t *b, ptrdiff_t n); + + /** + * The name of the encoding. This should correspond to a value that can be + * passed to Encoding.find in Ruby. + */ + const char *name; + + /** + * Return true if the encoding is a multibyte encoding. + */ + bool multibyte; +}; + +/** + * All of the lookup tables use the first bit of each embedded byte to indicate + * whether the codepoint is alphabetical. + */ +#define PRISM_ENCODING_ALPHABETIC_BIT 1 << 0 + +/** + * All of the lookup tables use the second bit of each embedded byte to indicate + * whether the codepoint is alphanumeric. + */ +#define PRISM_ENCODING_ALPHANUMERIC_BIT 1 << 1 + +/** + * All of the lookup tables use the third bit of each embedded byte to indicate + * whether the codepoint is uppercase. + */ +#define PRISM_ENCODING_UPPERCASE_BIT 1 << 2 + +/** + * Return the size of the next character in the UTF-8 encoding. + * + * @param b The bytes to read. + * @param n The number of bytes that can be read. + * @returns The number of bytes that the next character takes if it is valid in + * the encoding, or 0 if it is not. + */ +size_t pm_encoding_utf_8_char_width(const uint8_t *b, ptrdiff_t n); + +/** + * Return the size of the next character in the UTF-8 encoding if it is an + * alphabetical character. + * + * @param b The bytes to read. + * @param n The number of bytes that can be read. + * @returns The number of bytes that the next character takes if it is valid in + * the encoding, or 0 if it is not. + */ +size_t pm_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n); + +/** + * Return the size of the next character in the UTF-8 encoding if it is an + * alphanumeric character. + * + * @param b The bytes to read. + * @param n The number of bytes that can be read. + * @returns The number of bytes that the next character takes if it is valid in + * the encoding, or 0 if it is not. + */ +size_t pm_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n); + +/** + * Return true if the next character in the UTF-8 encoding if it is an uppercase + * character. + * + * @param b The bytes to read. + * @param n The number of bytes that can be read. + * @returns True if the next character is valid in the encoding and is an + * uppercase character, or false if it is not. + */ +bool pm_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n); + +/** + * This lookup table is referenced in both the UTF-8 encoding file and the + * parser directly in order to speed up the default encoding processing. It is + * used to indicate whether a character is alphabetical, alphanumeric, or + * uppercase in unicode mappings. + */ +extern const uint8_t pm_encoding_unicode_table[256]; + +/** + * These are all of the encodings that prism supports. + */ +typedef enum { + PM_ENCODING_UTF_8 = 0, + PM_ENCODING_US_ASCII, + PM_ENCODING_ASCII_8BIT, + PM_ENCODING_EUC_JP, + PM_ENCODING_WINDOWS_31J, + +// We optionally support excluding the full set of encodings to only support the +// minimum necessary to process Ruby code without encoding comments. +#ifndef PRISM_ENCODING_EXCLUDE_FULL + PM_ENCODING_BIG5, + PM_ENCODING_BIG5_HKSCS, + PM_ENCODING_BIG5_UAO, + PM_ENCODING_CESU_8, + PM_ENCODING_CP51932, + PM_ENCODING_CP850, + PM_ENCODING_CP852, + PM_ENCODING_CP855, + PM_ENCODING_CP949, + PM_ENCODING_CP950, + PM_ENCODING_CP951, + PM_ENCODING_EMACS_MULE, + PM_ENCODING_EUC_JP_MS, + PM_ENCODING_EUC_JIS_2004, + PM_ENCODING_EUC_KR, + PM_ENCODING_EUC_TW, + PM_ENCODING_GB12345, + PM_ENCODING_GB18030, + PM_ENCODING_GB1988, + PM_ENCODING_GB2312, + PM_ENCODING_GBK, + PM_ENCODING_IBM437, + PM_ENCODING_IBM720, + PM_ENCODING_IBM737, + PM_ENCODING_IBM775, + PM_ENCODING_IBM852, + PM_ENCODING_IBM855, + PM_ENCODING_IBM857, + PM_ENCODING_IBM860, + PM_ENCODING_IBM861, + PM_ENCODING_IBM862, + PM_ENCODING_IBM863, + PM_ENCODING_IBM864, + PM_ENCODING_IBM865, + PM_ENCODING_IBM866, + PM_ENCODING_IBM869, + PM_ENCODING_ISO_8859_1, + PM_ENCODING_ISO_8859_2, + PM_ENCODING_ISO_8859_3, + PM_ENCODING_ISO_8859_4, + PM_ENCODING_ISO_8859_5, + PM_ENCODING_ISO_8859_6, + PM_ENCODING_ISO_8859_7, + PM_ENCODING_ISO_8859_8, + PM_ENCODING_ISO_8859_9, + PM_ENCODING_ISO_8859_10, + PM_ENCODING_ISO_8859_11, + PM_ENCODING_ISO_8859_13, + PM_ENCODING_ISO_8859_14, + PM_ENCODING_ISO_8859_15, + PM_ENCODING_ISO_8859_16, + PM_ENCODING_KOI8_R, + PM_ENCODING_KOI8_U, + PM_ENCODING_MAC_CENT_EURO, + PM_ENCODING_MAC_CROATIAN, + PM_ENCODING_MAC_CYRILLIC, + PM_ENCODING_MAC_GREEK, + PM_ENCODING_MAC_ICELAND, + PM_ENCODING_MAC_JAPANESE, + PM_ENCODING_MAC_ROMAN, + PM_ENCODING_MAC_ROMANIA, + PM_ENCODING_MAC_THAI, + PM_ENCODING_MAC_TURKISH, + PM_ENCODING_MAC_UKRAINE, + PM_ENCODING_SHIFT_JIS, + PM_ENCODING_SJIS_DOCOMO, + PM_ENCODING_SJIS_KDDI, + PM_ENCODING_SJIS_SOFTBANK, + PM_ENCODING_STATELESS_ISO_2022_JP, + PM_ENCODING_STATELESS_ISO_2022_JP_KDDI, + PM_ENCODING_TIS_620, + PM_ENCODING_UTF8_MAC, + PM_ENCODING_UTF8_DOCOMO, + PM_ENCODING_UTF8_KDDI, + PM_ENCODING_UTF8_SOFTBANK, + PM_ENCODING_WINDOWS_1250, + PM_ENCODING_WINDOWS_1251, + PM_ENCODING_WINDOWS_1252, + PM_ENCODING_WINDOWS_1253, + PM_ENCODING_WINDOWS_1254, + PM_ENCODING_WINDOWS_1255, + PM_ENCODING_WINDOWS_1256, + PM_ENCODING_WINDOWS_1257, + PM_ENCODING_WINDOWS_1258, + PM_ENCODING_WINDOWS_874, +#endif + + PM_ENCODING_MAXIMUM +} pm_encoding_type_t; + +/** + * This is the table of all of the encodings that prism supports. + */ +extern const pm_encoding_t pm_encodings[PM_ENCODING_MAXIMUM]; + +/** + * This is the default UTF-8 encoding. We need a reference to it to quickly + * create parsers. + */ +#define PM_ENCODING_UTF_8_ENTRY (&pm_encodings[PM_ENCODING_UTF_8]) + +/** + * This is the US-ASCII encoding. We need a reference to it to be able to + * compare against it when a string is being created because it could possibly + * need to fall back to ASCII-8BIT. + */ +#define PM_ENCODING_US_ASCII_ENTRY (&pm_encodings[PM_ENCODING_US_ASCII]) + +/** + * This is the ASCII-8BIT encoding. We need a reference to it so that pm_strpbrk + * can compare against it because invalid multibyte characters are not a thing + * in this encoding. It is also needed for handling Regexp encoding flags. + */ +#define PM_ENCODING_ASCII_8BIT_ENTRY (&pm_encodings[PM_ENCODING_ASCII_8BIT]) + +/** + * This is the EUC-JP encoding. We need a reference to it to quickly process + * regular expression modifiers. + */ +#define PM_ENCODING_EUC_JP_ENTRY (&pm_encodings[PM_ENCODING_EUC_JP]) + +/** + * This is the Windows-31J encoding. We need a reference to it to quickly + * process regular expression modifiers. + */ +#define PM_ENCODING_WINDOWS_31J_ENTRY (&pm_encodings[PM_ENCODING_WINDOWS_31J]) + +/** + * Parse the given name of an encoding and return a pointer to the corresponding + * encoding struct if one can be found, otherwise return NULL. + * + * @param start A pointer to the first byte of the name. + * @param end A pointer to the last byte of the name. + * @returns A pointer to the encoding struct if one is found, otherwise NULL. + */ +const pm_encoding_t * pm_encoding_find(const uint8_t *start, const uint8_t *end); + +#endif diff --git a/include/prism/internal/memchr.h b/include/prism/internal/memchr.h index 7277971681..905e3f33a1 100644 --- a/include/prism/internal/memchr.h +++ b/include/prism/internal/memchr.h @@ -6,7 +6,7 @@ #ifndef PRISM_INTERNAL_MEMCHR_H #define PRISM_INTERNAL_MEMCHR_H -#include "prism/encoding.h" +#include "prism/internal/encoding.h" #include diff --git a/include/prism/regexp.h b/include/prism/regexp.h index 9baafe6d36..17f0e03019 100644 --- a/include/prism/regexp.h +++ b/include/prism/regexp.h @@ -8,7 +8,7 @@ #include "prism/defines.h" #include "prism/parser.h" -#include "prism/encoding.h" +#include "prism/internal/encoding.h" #include "prism/internal/memchr.h" #include "prism/strings.h" diff --git a/prism.gemspec b/prism.gemspec index 3394bb6ce7..4712a86430 100644 --- a/prism.gemspec +++ b/prism.gemspec @@ -73,6 +73,7 @@ Gem::Specification.new do |spec| "include/prism/regexp.h", "include/prism/static_literals.h", "include/prism/strings.h", + "include/prism/version.h", "include/prism/internal/accel.h", "include/prism/internal/arena.h", "include/prism/internal/bit.h", @@ -80,13 +81,14 @@ Gem::Specification.new do |spec| "include/prism/internal/char.h", "include/prism/internal/constant_pool.h", "include/prism/internal/diagnostic.h", + "include/prism/internal/encoding.h", "include/prism/internal/integer.h", "include/prism/internal/line_offset_list.h", + "include/prism/internal/list.h", "include/prism/internal/memchr.h", "include/prism/internal/strncasecmp.h", "include/prism/internal/strings.h", "include/prism/internal/strpbrk.h", - "include/prism/version.h", "lib/prism.rb", "lib/prism/compiler.rb", "lib/prism/desugar_compiler.rb", diff --git a/src/encoding.c b/src/encoding.c index d7e5616840..f43d0fd079 100644 --- a/src/encoding.c +++ b/src/encoding.c @@ -1,4 +1,9 @@ -#include "prism/encoding.h" +#include "prism/internal/encoding.h" + +#include "prism/attribute/unused.h" +#include "prism/internal/strncasecmp.h" + +#include typedef uint32_t pm_unicode_codepoint_t; diff --git a/src/strpbrk.c b/src/strpbrk.c index fe7a1ab67b..9b28a680c9 100644 --- a/src/strpbrk.c +++ b/src/strpbrk.c @@ -4,6 +4,7 @@ #include "prism/internal/accel.h" #include "prism/internal/bit.h" #include "prism/internal/diagnostic.h" +#include "prism/internal/encoding.h" #include #include From d2ec3620fa18cb662a9bc8dab9b45abacdae5a7d Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 17 Mar 2026 11:56:58 -0400 Subject: [PATCH 025/100] Split up static literals public and internal --- include/prism.h | 5 +- include/prism/internal/static_literals.h | 122 +++++++++++++++++++++++ include/prism/parser.h | 1 + include/prism/static_literals.h | 116 +-------------------- prism.gemspec | 1 + src/prism.c | 2 + src/static_literals.c | 2 +- 7 files changed, 130 insertions(+), 119 deletions(-) create mode 100644 include/prism/internal/static_literals.h diff --git a/include/prism.h b/include/prism.h index 3627c459a8..df7d49af29 100644 --- a/include/prism.h +++ b/include/prism.h @@ -11,9 +11,7 @@ extern "C" { #endif #include "prism/defines.h" -#include "prism/internal/buffer.h" -#include "prism/internal/memchr.h" -#include "prism/internal/strncasecmp.h" + #include "prism/arena.h" #include "prism/ast.h" #include "prism/diagnostic.h" @@ -22,7 +20,6 @@ extern "C" { #include "prism/parser.h" #include "prism/prettyprint.h" #include "prism/regexp.h" -#include "prism/static_literals.h" #include "prism/version.h" #include diff --git a/include/prism/internal/static_literals.h b/include/prism/internal/static_literals.h new file mode 100644 index 0000000000..0adde85780 --- /dev/null +++ b/include/prism/internal/static_literals.h @@ -0,0 +1,122 @@ +/** + * @file internal/static_literals.h + * + * A set of static literal nodes that can be checked for duplicates. + */ +#ifndef PRISM_INTERNAL_STATIC_LITERALS_H +#define PRISM_INTERNAL_STATIC_LITERALS_H + +#include "prism/static_literals.h" + +#include "prism/ast.h" +#include "prism/buffer.h" +#include "prism/line_offset_list.h" + +/** + * An internal hash table for a set of nodes. + */ +typedef struct { + /** The array of nodes in the hash table. */ + pm_node_t **nodes; + + /** The size of the hash table. */ + uint32_t size; + + /** The space that has been allocated in the hash table. */ + uint32_t capacity; +} pm_node_hash_t; + +/** + * Certain sets of nodes (hash keys and when clauses) check for duplicate nodes + * to alert the user of potential issues. To do this, we keep a set of the nodes + * that have been seen so far, and compare whenever we find a new node. + * + * We bucket the nodes based on their type to minimize the number of comparisons + * that need to be performed. + */ +struct pm_static_literals_t { + /** + * This is the set of IntegerNode and SourceLineNode instances. + */ + pm_node_hash_t integer_nodes; + + /** + * This is the set of FloatNode instances. + */ + pm_node_hash_t float_nodes; + + /** + * This is the set of RationalNode and ImaginaryNode instances. + */ + pm_node_hash_t number_nodes; + + /** + * This is the set of StringNode and SourceFileNode instances. + */ + pm_node_hash_t string_nodes; + + /** + * This is the set of RegularExpressionNode instances. + */ + pm_node_hash_t regexp_nodes; + + /** + * This is the set of SymbolNode instances. + */ + pm_node_hash_t symbol_nodes; + + /** + * A pointer to the last TrueNode instance that was inserted, or NULL. + */ + pm_node_t *true_node; + + /** + * A pointer to the last FalseNode instance that was inserted, or NULL. + */ + pm_node_t *false_node; + + /** + * A pointer to the last NilNode instance that was inserted, or NULL. + */ + pm_node_t *nil_node; + + /** + * A pointer to the last SourceEncodingNode instance that was inserted, or + * NULL. + */ + pm_node_t *source_encoding_node; +}; + +/** + * Add a node to the set of static literals. + * + * @param line_offsets The list of newline offsets to use to calculate lines. + * @param start The start of the source being parsed. + * @param start_line The line number that the parser starts on. + * @param literals The set of static literals to add the node to. + * @param node The node to add to the set. + * @param replace Whether to replace the previous node if one already exists. + * @return A pointer to the node that is being overwritten, if there is one. + */ +pm_node_t * pm_static_literals_add(const pm_line_offset_list_t *line_offsets, const uint8_t *start, int32_t start_line, pm_static_literals_t *literals, pm_node_t *node, bool replace); + +/** + * Free the internal memory associated with the given static literals set. + * + * @param literals The set of static literals to free. + */ +void pm_static_literals_free(pm_static_literals_t *literals); + +/** + * Create a string-based representation of the given static literal. + * + * @param buffer The buffer to write the string to. + * @param line_offsets The list of newline offsets to use to calculate lines. + * @param start The start of the source being parsed. + * @param start_line The line number that the parser starts on. + * @param encoding_name The name of the encoding of the source being parsed. + * @param node The node to create a string representation of. + */ +void pm_static_literal_inspect(pm_buffer_t *buffer, const pm_line_offset_list_t *line_offsets, const uint8_t *start, int32_t start_line, const char *encoding_name, const pm_node_t *node); + +#endif diff --git a/include/prism/parser.h b/include/prism/parser.h index 6a4a7cade4..0a0b142952 100644 --- a/include/prism/parser.h +++ b/include/prism/parser.h @@ -7,6 +7,7 @@ #define PRISM_PARSER_H #include "prism/defines.h" + #include "prism/arena.h" #include "prism/ast.h" #include "prism/constant_pool.h" diff --git a/include/prism/static_literals.h b/include/prism/static_literals.h index cdb00e6a83..4519510280 100644 --- a/include/prism/static_literals.h +++ b/include/prism/static_literals.h @@ -6,119 +6,7 @@ #ifndef PRISM_STATIC_LITERALS_H #define PRISM_STATIC_LITERALS_H -#include "prism/defines.h" -#include "prism/ast.h" -#include "prism/buffer.h" -#include "prism/line_offset_list.h" - -#include -#include - -/** - * An internal hash table for a set of nodes. - */ -typedef struct { - /** The array of nodes in the hash table. */ - pm_node_t **nodes; - - /** The size of the hash table. */ - uint32_t size; - - /** The space that has been allocated in the hash table. */ - uint32_t capacity; -} pm_node_hash_t; - -/** - * Certain sets of nodes (hash keys and when clauses) check for duplicate nodes - * to alert the user of potential issues. To do this, we keep a set of the nodes - * that have been seen so far, and compare whenever we find a new node. - * - * We bucket the nodes based on their type to minimize the number of comparisons - * that need to be performed. - */ -typedef struct { - /** - * This is the set of IntegerNode and SourceLineNode instances. - */ - pm_node_hash_t integer_nodes; - - /** - * This is the set of FloatNode instances. - */ - pm_node_hash_t float_nodes; - - /** - * This is the set of RationalNode and ImaginaryNode instances. - */ - pm_node_hash_t number_nodes; - - /** - * This is the set of StringNode and SourceFileNode instances. - */ - pm_node_hash_t string_nodes; - - /** - * This is the set of RegularExpressionNode instances. - */ - pm_node_hash_t regexp_nodes; - - /** - * This is the set of SymbolNode instances. - */ - pm_node_hash_t symbol_nodes; - - /** - * A pointer to the last TrueNode instance that was inserted, or NULL. - */ - pm_node_t *true_node; - - /** - * A pointer to the last FalseNode instance that was inserted, or NULL. - */ - pm_node_t *false_node; - - /** - * A pointer to the last NilNode instance that was inserted, or NULL. - */ - pm_node_t *nil_node; - - /** - * A pointer to the last SourceEncodingNode instance that was inserted, or - * NULL. - */ - pm_node_t *source_encoding_node; -} pm_static_literals_t; - -/** - * Add a node to the set of static literals. - * - * @param line_offsets The list of newline offsets to use to calculate lines. - * @param start The start of the source being parsed. - * @param start_line The line number that the parser starts on. - * @param literals The set of static literals to add the node to. - * @param node The node to add to the set. - * @param replace Whether to replace the previous node if one already exists. - * @return A pointer to the node that is being overwritten, if there is one. - */ -pm_node_t * pm_static_literals_add(const pm_line_offset_list_t *line_offsets, const uint8_t *start, int32_t start_line, pm_static_literals_t *literals, pm_node_t *node, bool replace); - -/** - * Free the internal memory associated with the given static literals set. - * - * @param literals The set of static literals to free. - */ -void pm_static_literals_free(pm_static_literals_t *literals); - -/** - * Create a string-based representation of the given static literal. - * - * @param buffer The buffer to write the string to. - * @param line_offsets The list of newline offsets to use to calculate lines. - * @param start The start of the source being parsed. - * @param start_line The line number that the parser starts on. - * @param encoding_name The name of the encoding of the source being parsed. - * @param node The node to create a string representation of. - */ -void pm_static_literal_inspect(pm_buffer_t *buffer, const pm_line_offset_list_t *line_offsets, const uint8_t *start, int32_t start_line, const char *encoding_name, const pm_node_t *node); +/** An opaque struct that holds the static literals. */ +typedef struct pm_static_literals_t pm_static_literals_t; #endif diff --git a/prism.gemspec b/prism.gemspec index 4712a86430..bd5d1ec1dd 100644 --- a/prism.gemspec +++ b/prism.gemspec @@ -86,6 +86,7 @@ Gem::Specification.new do |spec| "include/prism/internal/line_offset_list.h", "include/prism/internal/list.h", "include/prism/internal/memchr.h", + "include/prism/internal/static_literals.h", "include/prism/internal/strncasecmp.h", "include/prism/internal/strings.h", "include/prism/internal/strpbrk.h", diff --git a/src/prism.c b/src/prism.c index 46a5960179..2162eea1de 100644 --- a/src/prism.c +++ b/src/prism.c @@ -9,7 +9,9 @@ #include "prism/internal/integer.h" #include "prism/internal/line_offset_list.h" #include "prism/internal/list.h" +#include "prism/internal/static_literals.h" #include "prism/internal/strings.h" +#include "prism/internal/strncasecmp.h" #include "prism/internal/strpbrk.h" #include "prism/node_new.h" diff --git a/src/static_literals.c b/src/static_literals.c index 3323ea9e1b..9023f7fff9 100644 --- a/src/static_literals.c +++ b/src/static_literals.c @@ -1,4 +1,4 @@ -#include "prism/static_literals.h" +#include "prism/internal/static_literals.h" #include "prism/internal/buffer.h" #include "prism/internal/integer.h" From abd0a833be47af4a7a2237f3187d238c4dfd5f70 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 17 Mar 2026 12:01:09 -0400 Subject: [PATCH 026/100] Split up public/internal options --- include/prism/internal/options.h | 83 ++++++++++++++++++++++++++++++++ include/prism/options.h | 76 +---------------------------- prism.gemspec | 1 + src/options.c | 1 + src/prism.c | 1 + templates/src/serialize.c.erb | 1 + 6 files changed, 88 insertions(+), 75 deletions(-) create mode 100644 include/prism/internal/options.h diff --git a/include/prism/internal/options.h b/include/prism/internal/options.h new file mode 100644 index 0000000000..e3fb0a9e5e --- /dev/null +++ b/include/prism/internal/options.h @@ -0,0 +1,83 @@ +/** + * @file internal/options.h + * + * The options that can be passed to parsing. + */ +#ifndef PRISM_INTERNAL_OPTIONS_H +#define PRISM_INTERNAL_OPTIONS_H + +#include "prism/options.h" + +/** + * Deserialize an options struct from the given binary string. This is used to + * pass options to the parser from an FFI call so that consumers of the library + * from an FFI perspective don't have to worry about the structure of our + * options structs. Since the source of these calls will be from Ruby + * implementation internals we assume it is from a trusted source. + * + * `data` is assumed to be a valid pointer pointing to well-formed data. The + * layout of this data should be the same every time, and is described below: + * + * | # bytes | field | + * | ------- | -------------------------- | + * | `4` | the length of the filepath | + * | ... | the filepath bytes | + * | `4` | the line number | + * | `4` | the length the encoding | + * | ... | the encoding bytes | + * | `1` | frozen string literal | + * | `1` | -p command line option | + * | `1` | -n command line option | + * | `1` | -l command line option | + * | `1` | -a command line option | + * | `1` | the version | + * | `1` | encoding locked | + * | `1` | main script | + * | `1` | partial script | + * | `1` | freeze | + * | `4` | the number of scopes | + * | ... | the scopes | + * + * The version field is an enum, so it should be one of the following values: + * + * | value | version | + * | ----- | ------------------------- | + * | `0` | use the latest version of prism | + * | `1` | use the version of prism that is vendored in CRuby 3.3.0 | + * | `2` | use the version of prism that is vendored in CRuby 3.4.0 | + * | `3` | use the version of prism that is vendored in CRuby 4.0.0 | + * | `4` | use the version of prism that is vendored in CRuby 4.1.0 | + * + * Each scope is laid out as follows: + * + * | # bytes | field | + * | ------- | -------------------------- | + * | `4` | the number of locals | + * | `1` | the forwarding flags | + * | ... | the locals | + * + * Each local is laid out as follows: + * + * | # bytes | field | + * | ------- | -------------------------- | + * | `4` | the length of the local | + * | ... | the local bytes | + * + * Some additional things to note about this layout: + * + * * The filepath can have a length of 0, in which case we'll consider it an + * empty string. + * * The line number should be 0-indexed. + * * The encoding can have a length of 0, in which case we'll use the default + * encoding (UTF-8). If it's not 0, it should correspond to a name of an + * encoding that can be passed to `Encoding.find` in Ruby. + * * The frozen string literal, encoding locked, main script, and partial script + * fields are booleans, so their values should be either 0 or 1. + * * The number of scopes can be 0. + * + * @param options The options struct to deserialize into. + * @param data The binary string to deserialize from. + */ +void pm_options_read(pm_options_t *options, const char *data); + +#endif diff --git a/include/prism/options.h b/include/prism/options.h index cf144f649c..4c8728c69a 100644 --- a/include/prism/options.h +++ b/include/prism/options.h @@ -6,12 +6,10 @@ #ifndef PRISM_OPTIONS_H #define PRISM_OPTIONS_H -#include "prism/defines.h" #include "prism/strings.h" #include #include -#include /** * String literals should be made frozen. @@ -58,7 +56,7 @@ static const uint8_t PM_OPTIONS_SCOPE_FORWARDING_BLOCK = 0x4; /** When the scope is fowarding with the ... parameter. */ static const uint8_t PM_OPTIONS_SCOPE_FORWARDING_ALL = 0x8; -// Forward declaration needed by the callback typedef. +/* Forward declaration needed by the callback typedef. */ struct pm_options; /** @@ -418,76 +416,4 @@ PRISM_EXPORTED_FUNCTION void pm_options_scope_forwarding_set(pm_options_scope_t */ PRISM_EXPORTED_FUNCTION void pm_options_free(pm_options_t *options); -/** - * Deserialize an options struct from the given binary string. This is used to - * pass options to the parser from an FFI call so that consumers of the library - * from an FFI perspective don't have to worry about the structure of our - * options structs. Since the source of these calls will be from Ruby - * implementation internals we assume it is from a trusted source. - * - * `data` is assumed to be a valid pointer pointing to well-formed data. The - * layout of this data should be the same every time, and is described below: - * - * | # bytes | field | - * | ------- | -------------------------- | - * | `4` | the length of the filepath | - * | ... | the filepath bytes | - * | `4` | the line number | - * | `4` | the length the encoding | - * | ... | the encoding bytes | - * | `1` | frozen string literal | - * | `1` | -p command line option | - * | `1` | -n command line option | - * | `1` | -l command line option | - * | `1` | -a command line option | - * | `1` | the version | - * | `1` | encoding locked | - * | `1` | main script | - * | `1` | partial script | - * | `1` | freeze | - * | `4` | the number of scopes | - * | ... | the scopes | - * - * The version field is an enum, so it should be one of the following values: - * - * | value | version | - * | ----- | ------------------------- | - * | `0` | use the latest version of prism | - * | `1` | use the version of prism that is vendored in CRuby 3.3.0 | - * | `2` | use the version of prism that is vendored in CRuby 3.4.0 | - * | `3` | use the version of prism that is vendored in CRuby 4.0.0 | - * | `4` | use the version of prism that is vendored in CRuby 4.1.0 | - * - * Each scope is laid out as follows: - * - * | # bytes | field | - * | ------- | -------------------------- | - * | `4` | the number of locals | - * | `1` | the forwarding flags | - * | ... | the locals | - * - * Each local is laid out as follows: - * - * | # bytes | field | - * | ------- | -------------------------- | - * | `4` | the length of the local | - * | ... | the local bytes | - * - * Some additional things to note about this layout: - * - * * The filepath can have a length of 0, in which case we'll consider it an - * empty string. - * * The line number should be 0-indexed. - * * The encoding can have a length of 0, in which case we'll use the default - * encoding (UTF-8). If it's not 0, it should correspond to a name of an - * encoding that can be passed to `Encoding.find` in Ruby. - * * The frozen string literal, encoding locked, main script, and partial script - * fields are booleans, so their values should be either 0 or 1. - * * The number of scopes can be 0. - * - * @param options The options struct to deserialize into. - * @param data The binary string to deserialize from. - */ -void pm_options_read(pm_options_t *options, const char *data); - #endif diff --git a/prism.gemspec b/prism.gemspec index bd5d1ec1dd..3941e21077 100644 --- a/prism.gemspec +++ b/prism.gemspec @@ -86,6 +86,7 @@ Gem::Specification.new do |spec| "include/prism/internal/line_offset_list.h", "include/prism/internal/list.h", "include/prism/internal/memchr.h", + "include/prism/internal/options.h", "include/prism/internal/static_literals.h", "include/prism/internal/strncasecmp.h", "include/prism/internal/strings.h", diff --git a/src/options.c b/src/options.c index 42ea38ba83..26df07fca3 100644 --- a/src/options.c +++ b/src/options.c @@ -4,6 +4,7 @@ #include "prism/allocator.h" #include +#include /** * Set the shebang callback option on the given options struct. diff --git a/src/prism.c b/src/prism.c index 2162eea1de..0e24aff7d6 100644 --- a/src/prism.c +++ b/src/prism.c @@ -9,6 +9,7 @@ #include "prism/internal/integer.h" #include "prism/internal/line_offset_list.h" #include "prism/internal/list.h" +#include "prism/internal/options.h" #include "prism/internal/static_literals.h" #include "prism/internal/strings.h" #include "prism/internal/strncasecmp.h" diff --git a/templates/src/serialize.c.erb b/templates/src/serialize.c.erb index bf00abb69d..d2ef80b3c4 100644 --- a/templates/src/serialize.c.erb +++ b/templates/src/serialize.c.erb @@ -1,6 +1,7 @@ #include "prism.h" #include "prism/internal/list.h" +#include "prism/internal/options.h" // We optionally support serializing to a binary string. For systems that don't // want or need this functionality, it can be turned off with the From cc93903d080e538b33acb8628f97edbd6aa036ee Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 17 Mar 2026 12:31:46 -0400 Subject: [PATCH 027/100] Put inline in its own header --- include/prism/attribute/force_inline.h | 4 +- include/prism/attribute/inline.h | 31 +++ include/prism/defines.h | 18 +- include/prism/internal/arena.h | 5 +- include/prism/internal/bit.h | 6 +- include/prism/internal/line_offset_list.h | 1 + prism.gemspec | 1 + src/buffer.c | 6 +- src/char.c | 10 +- src/constant_pool.c | 7 +- src/options.c | 3 +- src/prism.c | 288 +++++++++++----------- src/regexp.c | 15 +- src/static_literals.c | 5 +- src/strncasecmp.c | 4 +- src/strpbrk.c | 23 +- templates/include/prism/node_new.h.erb | 4 +- templates/src/diagnostic.c.erb | 5 +- templates/src/prettyprint.c.erb | 5 +- templates/src/serialize.c.erb | 5 +- 20 files changed, 242 insertions(+), 204 deletions(-) create mode 100644 include/prism/attribute/inline.h diff --git a/include/prism/attribute/force_inline.h b/include/prism/attribute/force_inline.h index 1d2c494d6e..7f1e8c57f8 100644 --- a/include/prism/attribute/force_inline.h +++ b/include/prism/attribute/force_inline.h @@ -6,6 +6,8 @@ #ifndef PRISM_FORCE_INLINE_H #define PRISM_FORCE_INLINE_H +#include "prism/attribute/inline.h" + /** * Force a function to be inlined at every call site. Use sparingly — only for * small, hot functions where the compiler's heuristics fail to inline. @@ -15,7 +17,7 @@ #elif defined(__GNUC__) || defined(__clang__) # define PRISM_FORCE_INLINE inline __attribute__((always_inline)) #else -# define PRISM_FORCE_INLINE inline +# define PRISM_FORCE_INLINE PRISM_INLINE #endif #endif diff --git a/include/prism/attribute/inline.h b/include/prism/attribute/inline.h new file mode 100644 index 0000000000..79a5dfcbee --- /dev/null +++ b/include/prism/attribute/inline.h @@ -0,0 +1,31 @@ +/** + * @file attribute/inline.h + * + * Macro definitions for forcing a function to be inlined at every call site. + */ +#ifndef PRISM_INLINE_H +#define PRISM_INLINE_H + +/** + * Old Visual Studio versions do not support the inline keyword, so we need to + * define it to be __inline. + */ +#if defined(_MSC_VER) && !defined(inline) +# define PRISM_INLINE __inline +#else +# define PRISM_INLINE inline +#endif + +/** + * Force a function to be inlined at every call site. Use sparingly — only for + * small, hot functions where the compiler's heuristics fail to inline. + */ +#if defined(_MSC_VER) +# define PRISM_FORCE_INLINE __forceinline +#elif defined(__GNUC__) || defined(__clang__) +# define PRISM_FORCE_INLINE inline __attribute__((always_inline)) +#else +# define PRISM_FORCE_INLINE PRISM_INLINE +#endif + +#endif diff --git a/include/prism/defines.h b/include/prism/defines.h index 04082e9c29..0d997f9c9b 100644 --- a/include/prism/defines.h +++ b/include/prism/defines.h @@ -12,8 +12,8 @@ #include "prism/attribute/align.h" #include "prism/attribute/exported.h" #include "prism/attribute/flex_array.h" -#include "prism/attribute/force_inline.h" #include "prism/attribute/format.h" +#include "prism/attribute/inline.h" #include "prism/attribute/unused.h" #include "prism/internal/accel.h" @@ -50,22 +50,6 @@ #define PRISM_DEPTH_MAXIMUM 10000 #endif -/** - * Old Visual Studio versions do not support the inline keyword, so we need to - * define it to be __inline. - */ -#if defined(_MSC_VER) && !defined(inline) -# define inline __inline -#endif - -/** - * Old Visual Studio versions before 2015 do not implement sprintf, but instead - * implement _snprintf. We standard that here. - */ -#if !defined(snprintf) && defined(_MSC_VER) && (_MSC_VER < 1900) -# define snprintf _snprintf -#endif - /** * A simple utility macro to concatenate two tokens together, necessary when one * of the tokens is itself a macro. diff --git a/include/prism/internal/arena.h b/include/prism/internal/arena.h index e86b89903e..8d263d9e6e 100644 --- a/include/prism/internal/arena.h +++ b/include/prism/internal/arena.h @@ -7,6 +7,7 @@ #define PRISM_INTERNAL_ARENA_H #include "prism/attribute/exported.h" +#include "prism/attribute/inline.h" #include "prism/arena.h" #include @@ -31,7 +32,7 @@ void pm_arena_reserve(pm_arena_t *arena, size_t capacity); * @param alignment The required alignment (must be a power of 2). * @returns A pointer to the allocated, zero-initialized memory. */ -static inline void * +static PRISM_INLINE void * pm_arena_zalloc(pm_arena_t *arena, size_t size, size_t alignment) { void *ptr = pm_arena_alloc(arena, size, alignment); memset(ptr, 0, size); @@ -48,7 +49,7 @@ pm_arena_zalloc(pm_arena_t *arena, size_t size, size_t alignment) { * @param alignment The required alignment (must be a power of 2). * @returns A pointer to the allocated copy. */ -static inline void * +static PRISM_INLINE void * pm_arena_memdup(pm_arena_t *arena, const void *src, size_t size, size_t alignment) { void *dst = pm_arena_alloc(arena, size, alignment); memcpy(dst, src, size); diff --git a/include/prism/internal/bit.h b/include/prism/internal/bit.h index 110d4d68cb..06ac3be2ee 100644 --- a/include/prism/internal/bit.h +++ b/include/prism/internal/bit.h @@ -6,6 +6,8 @@ #ifndef PRISM_INTERNAL_BIT_H #define PRISM_INTERNAL_BIT_H +#include "prism/attribute/inline.h" + /** * Count trailing zero bits in a 64-bit value. Used by SWAR identifier scanning * to find the first non-matching byte in a word. @@ -17,13 +19,13 @@ #define pm_ctzll(v) ((unsigned) __builtin_ctzll(v)) #elif defined(_MSC_VER) #include - static inline unsigned pm_ctzll(uint64_t v) { + static PRISM_INLINE unsigned pm_ctzll(uint64_t v) { unsigned long index; _BitScanForward64(&index, v); return (unsigned) index; } #else - static inline unsigned + static PRISM_INLINE unsigned pm_ctzll(uint64_t v) { unsigned c = 0; v &= (uint64_t) (-(int64_t) v); diff --git a/include/prism/internal/line_offset_list.h b/include/prism/internal/line_offset_list.h index 87af0bb524..97fa050924 100644 --- a/include/prism/internal/line_offset_list.h +++ b/include/prism/internal/line_offset_list.h @@ -15,6 +15,7 @@ #define PRISM_INTERNAL_LINE_OFFSET_LIST_H #include "prism/line_offset_list.h" + #include "prism/attribute/force_inline.h" #include "prism/arena.h" diff --git a/prism.gemspec b/prism.gemspec index 3941e21077..6a4b7ba5a4 100644 --- a/prism.gemspec +++ b/prism.gemspec @@ -51,6 +51,7 @@ Gem::Specification.new do |spec| "include/prism/attribute/flex_array.h", "include/prism/attribute/force_inline.h", "include/prism/attribute/format.h", + "include/prism/attribute/inline.h", "include/prism/attribute/unused.h", "include/prism/allocator.h", "include/prism/arena.h", diff --git a/src/buffer.c b/src/buffer.c index b416519795..a54c55182b 100644 --- a/src/buffer.c +++ b/src/buffer.c @@ -1,4 +1,6 @@ #include "prism/internal/buffer.h" + +#include "prism/attribute/inline.h" #include "prism/internal/char.h" #include "prism/allocator.h" @@ -55,7 +57,7 @@ pm_buffer_length(const pm_buffer_t *buffer) { /** * Append the given amount of space to the buffer. */ -static inline bool +static PRISM_INLINE bool pm_buffer_append_length(pm_buffer_t *buffer, size_t length) { size_t next_length = buffer->length + length; const size_t original_capacity = buffer->capacity; @@ -80,7 +82,7 @@ pm_buffer_append_length(pm_buffer_t *buffer, size_t length) { /** * Append a generic pointer to memory to the buffer. */ -static inline void +static PRISM_INLINE void pm_buffer_append(pm_buffer_t *buffer, const void *source, size_t length) { size_t cursor = buffer->length; if (pm_buffer_append_length(buffer, length)) { diff --git a/src/char.c b/src/char.c index c119ee8a19..1b5438cd0c 100644 --- a/src/char.c +++ b/src/char.c @@ -1,4 +1,6 @@ #include "prism/internal/char.h" + +#include "prism/attribute/inline.h" #include "prism/internal/line_offset_list.h" #define PRISM_CHAR_BIT_REGEXP_OPTION (1 << 2) @@ -56,7 +58,7 @@ static const uint8_t pm_number_table[256] = { * Returns the number of characters at the start of the string that match the * given kind. Disallows searching past the given maximum number of characters. */ -static inline size_t +static PRISM_INLINE size_t pm_strspn_char_kind(const uint8_t *string, ptrdiff_t length, uint8_t kind) { if (length <= 0) return 0; @@ -114,7 +116,7 @@ pm_strspn_regexp_option(const uint8_t *string, ptrdiff_t length) { * the string that match the given kind. Disallows searching past the given * maximum number of characters. */ -static inline size_t +static PRISM_INLINE size_t pm_strspn_number_kind(const uint8_t *string, ptrdiff_t length, uint8_t kind) { if (length <= 0) return 0; @@ -133,7 +135,7 @@ pm_strspn_number_kind(const uint8_t *string, ptrdiff_t length, uint8_t kind) { * Additionally, report the location of the last invalid underscore character * found in the string through the out invalid parameter. */ -static inline size_t +static PRISM_INLINE size_t pm_strspn_number_kind_underscores(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid, uint8_t kind) { if (length <= 0) return 0; @@ -234,7 +236,7 @@ pm_strspn_hexadecimal_number(const uint8_t *string, ptrdiff_t length, const uint /** * Returns true if the given character matches the given kind. */ -static inline bool +static PRISM_INLINE bool pm_char_is_number_kind(const uint8_t b, uint8_t kind) { return (pm_number_table[b] & kind) != 0; } diff --git a/src/constant_pool.c b/src/constant_pool.c index 106566df90..7de70a47a4 100644 --- a/src/constant_pool.c +++ b/src/constant_pool.c @@ -1,6 +1,7 @@ #include "prism/constant_pool.h" #include "prism/attribute/align.h" +#include "prism/attribute/inline.h" #include "prism/internal/arena.h" #include @@ -81,7 +82,7 @@ pm_constant_id_list_includes(pm_constant_id_list_t *list, pm_constant_id_t id) { * by XOR followed by multiplication by a large odd constant, which spreads * entropy across all bits. A final xorshift fold produces the 32-bit result. */ -static inline uint32_t +static PRISM_INLINE uint32_t pm_constant_pool_hash(const uint8_t *start, size_t length) { // This constant is borrowed from wyhash. It is a 64-bit odd integer with // roughly equal 0/1 bits, chosen for good avalanche behavior when used in @@ -167,7 +168,7 @@ is_power_of_two(uint32_t size) { /** * Resize a constant pool to a given capacity. */ -static inline void +static PRISM_INLINE void pm_constant_pool_resize(pm_arena_t *arena, pm_constant_pool_t *pool) { assert(is_power_of_two(pool->capacity)); @@ -257,7 +258,7 @@ pm_constant_pool_find(const pm_constant_pool_t *pool, const uint8_t *start, size /** * Insert a constant into a constant pool and return its index in the pool. */ -static inline pm_constant_id_t +static PRISM_INLINE pm_constant_id_t pm_constant_pool_insert(pm_arena_t *arena, pm_constant_pool_t *pool, const uint8_t *start, size_t length, pm_constant_pool_bucket_type_t type) { if (pool->size >= (pool->capacity / 4 * 3)) { pm_constant_pool_resize(arena, pool); diff --git a/src/options.c b/src/options.c index 26df07fca3..e38bd92ca9 100644 --- a/src/options.c +++ b/src/options.c @@ -1,5 +1,6 @@ #include "prism/options.h" +#include "prism/attribute/inline.h" #include "prism/internal/char.h" #include "prism/allocator.h" @@ -66,7 +67,7 @@ pm_options_command_line_set(pm_options_t *options, uint8_t command_line) { /** * Checks if the given slice represents a number. */ -static inline bool +static PRISM_INLINE bool is_number(const char *string, size_t length) { return pm_strspn_decimal_digit((const uint8_t *) string, (ptrdiff_t) length) == length; } diff --git a/src/prism.c b/src/prism.c index 0e24aff7d6..fed7b069e6 100644 --- a/src/prism.c +++ b/src/prism.c @@ -88,7 +88,7 @@ pm_version(void) { * Returns the incrementor character that should be used to increment the * nesting count if one is possible. */ -static inline uint8_t +static PRISM_INLINE uint8_t lex_mode_incrementor(const uint8_t start) { switch (start) { case '(': @@ -105,7 +105,7 @@ lex_mode_incrementor(const uint8_t start) { * Returns the matching character that should be used to terminate a list * beginning with the given character. */ -static inline uint8_t +static PRISM_INLINE uint8_t lex_mode_terminator(const uint8_t start) { switch (start) { case '(': @@ -147,7 +147,7 @@ lex_mode_push(pm_parser_t *parser, pm_lex_mode_t lex_mode) { /** * Push on a new list lex mode. */ -static inline bool +static PRISM_INLINE bool lex_mode_push_list(pm_parser_t *parser, bool interpolation, uint8_t delimiter) { uint8_t incrementor = lex_mode_incrementor(delimiter); uint8_t terminator = lex_mode_terminator(delimiter); @@ -195,7 +195,7 @@ lex_mode_push_list(pm_parser_t *parser, bool interpolation, uint8_t delimiter) { * called when we're at the end of the file. We want the parser to be able to * perform its normal error tolerance. */ -static inline bool +static PRISM_INLINE bool lex_mode_push_list_eof(pm_parser_t *parser) { return lex_mode_push_list(parser, false, '\0'); } @@ -203,7 +203,7 @@ lex_mode_push_list_eof(pm_parser_t *parser) { /** * Push on a new regexp lex mode. */ -static inline bool +static PRISM_INLINE bool lex_mode_push_regexp(pm_parser_t *parser, uint8_t incrementor, uint8_t terminator) { pm_lex_mode_t lex_mode = { .mode = PM_LEX_REGEXP, @@ -239,7 +239,7 @@ lex_mode_push_regexp(pm_parser_t *parser, uint8_t incrementor, uint8_t terminato /** * Push on a new string lex mode. */ -static inline bool +static PRISM_INLINE bool lex_mode_push_string(pm_parser_t *parser, bool interpolation, bool label_allowed, uint8_t incrementor, uint8_t terminator) { pm_lex_mode_t lex_mode = { .mode = PM_LEX_STRING, @@ -286,7 +286,7 @@ lex_mode_push_string(pm_parser_t *parser, bool interpolation, bool label_allowed * called when we're at the end of the file. We want the parser to be able to * perform its normal error tolerance. */ -static inline bool +static PRISM_INLINE bool lex_mode_push_string_eof(pm_parser_t *parser) { return lex_mode_push_string(parser, false, false, '\0', '\0'); } @@ -314,7 +314,7 @@ lex_mode_pop(pm_parser_t *parser) { /** * This is the equivalent of IS_lex_state is CRuby. */ -static inline bool +static PRISM_INLINE bool lex_state_p(const pm_parser_t *parser, pm_lex_state_t state) { return parser->lex_state & state; } @@ -325,7 +325,7 @@ typedef enum { PM_IGNORED_NEWLINE_PATTERN } pm_ignored_newline_type_t; -static inline pm_ignored_newline_type_t +static PRISM_INLINE pm_ignored_newline_type_t lex_state_ignored_p(pm_parser_t *parser) { bool ignored = lex_state_p(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_CLASS | PM_LEX_STATE_FNAME | PM_LEX_STATE_DOT) && !lex_state_p(parser, PM_LEX_STATE_LABELED); @@ -338,17 +338,17 @@ lex_state_ignored_p(pm_parser_t *parser) { } } -static inline bool +static PRISM_INLINE bool lex_state_beg_p(pm_parser_t *parser) { return lex_state_p(parser, PM_LEX_STATE_BEG_ANY) || ((parser->lex_state & (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED)) == (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED)); } -static inline bool +static PRISM_INLINE bool lex_state_arg_p(pm_parser_t *parser) { return lex_state_p(parser, PM_LEX_STATE_ARG_ANY); } -static inline bool +static PRISM_INLINE bool lex_state_spcarg_p(pm_parser_t *parser, bool space_seen) { if (parser->current.end >= parser->end) { return false; @@ -356,7 +356,7 @@ lex_state_spcarg_p(pm_parser_t *parser, bool space_seen) { return lex_state_arg_p(parser) && space_seen && !pm_char_is_whitespace(*parser->current.end); } -static inline bool +static PRISM_INLINE bool lex_state_end_p(pm_parser_t *parser) { return lex_state_p(parser, PM_LEX_STATE_END_ANY); } @@ -364,7 +364,7 @@ lex_state_end_p(pm_parser_t *parser) { /** * This is the equivalent of IS_AFTER_OPERATOR in CRuby. */ -static inline bool +static PRISM_INLINE bool lex_state_operator_p(pm_parser_t *parser) { return lex_state_p(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_DOT); } @@ -373,7 +373,7 @@ lex_state_operator_p(pm_parser_t *parser) { * Set the state of the lexer. This is defined as a function to be able to put a * breakpoint in it. */ -static inline void +static PRISM_INLINE void lex_state_set(pm_parser_t *parser, pm_lex_state_t state) { parser->lex_state = state; } @@ -468,7 +468,7 @@ debug_lex_state_set(pm_parser_t *parser, pm_lex_state_t state, char const * call /** * Append an error to the list of errors on the parser. */ -static inline void +static PRISM_INLINE void pm_parser_err(pm_parser_t *parser, uint32_t start, uint32_t length, pm_diagnostic_id_t diag_id) { pm_diagnostic_list_append(&parser->metadata_arena, &parser->error_list, start, length, diag_id); } @@ -477,7 +477,7 @@ pm_parser_err(pm_parser_t *parser, uint32_t start, uint32_t length, pm_diagnosti * Append an error to the list of errors on the parser using the location of the * given token. */ -static inline void +static PRISM_INLINE void pm_parser_err_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_id_t diag_id) { pm_parser_err(parser, PM_TOKEN_START(parser, token), PM_TOKEN_LENGTH(token), diag_id); } @@ -486,7 +486,7 @@ pm_parser_err_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_ * Append an error to the list of errors on the parser using the location of the * current token. */ -static inline void +static PRISM_INLINE void pm_parser_err_current(pm_parser_t *parser, pm_diagnostic_id_t diag_id) { pm_parser_err_token(parser, &parser->current, diag_id); } @@ -495,7 +495,7 @@ pm_parser_err_current(pm_parser_t *parser, pm_diagnostic_id_t diag_id) { * Append an error to the list of errors on the parser using the location of the * previous token. */ -static inline void +static PRISM_INLINE void pm_parser_err_previous(pm_parser_t *parser, pm_diagnostic_id_t diag_id) { pm_parser_err_token(parser, &parser->previous, diag_id); } @@ -504,7 +504,7 @@ pm_parser_err_previous(pm_parser_t *parser, pm_diagnostic_id_t diag_id) { * Append an error to the list of errors on the parser using the location of the * given node. */ -static inline void +static PRISM_INLINE void pm_parser_err_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_t diag_id) { pm_parser_err(parser, PM_NODE_START(node), PM_NODE_LENGTH(node), diag_id); } @@ -546,7 +546,7 @@ pm_parser_err_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_ /** * Append a warning to the list of warnings on the parser. */ -static inline void +static PRISM_INLINE void pm_parser_warn(pm_parser_t *parser, uint32_t start, uint32_t length, pm_diagnostic_id_t diag_id) { pm_diagnostic_list_append(&parser->metadata_arena, &parser->warning_list, start, length, diag_id); } @@ -555,7 +555,7 @@ pm_parser_warn(pm_parser_t *parser, uint32_t start, uint32_t length, pm_diagnost * Append a warning to the list of warnings on the parser using the location of * the given token. */ -static inline void +static PRISM_INLINE void pm_parser_warn_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_id_t diag_id) { pm_parser_warn(parser, PM_TOKEN_START(parser, token), PM_TOKEN_LENGTH(token), diag_id); } @@ -564,7 +564,7 @@ pm_parser_warn_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic * Append a warning to the list of warnings on the parser using the location of * the given node. */ -static inline void +static PRISM_INLINE void pm_parser_warn_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_t diag_id) { pm_parser_warn(parser, PM_NODE_START(node), PM_NODE_LENGTH(node), diag_id); } @@ -767,7 +767,7 @@ pm_parser_scope_forwarding_keywords_check(pm_parser_t *parser, const pm_token_t /** * Get the current state of constant shareability. */ -static inline pm_shareable_constant_value_t +static PRISM_INLINE pm_shareable_constant_value_t pm_parser_scope_shareable_constant_get(pm_parser_t *parser) { return parser->current_scope->shareable_constant; } @@ -1049,7 +1049,7 @@ pm_locals_order(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, pm_locals_t *locals, /** * Retrieve the constant pool id for the given location. */ -static inline pm_constant_id_t +static PRISM_INLINE pm_constant_id_t pm_parser_constant_id_raw(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) { return pm_constant_pool_insert_shared(&parser->metadata_arena, &parser->constant_pool, start, (size_t) (end - start)); } @@ -1057,7 +1057,7 @@ pm_parser_constant_id_raw(pm_parser_t *parser, const uint8_t *start, const uint8 /** * Retrieve the constant pool id for the given string. */ -static inline pm_constant_id_t +static PRISM_INLINE pm_constant_id_t pm_parser_constant_id_owned(pm_parser_t *parser, uint8_t *start, size_t length) { return pm_constant_pool_insert_owned(&parser->metadata_arena, &parser->constant_pool, start, length); } @@ -1065,7 +1065,7 @@ pm_parser_constant_id_owned(pm_parser_t *parser, uint8_t *start, size_t length) /** * Retrieve the constant pool id for the given static literal C string. */ -static inline pm_constant_id_t +static PRISM_INLINE pm_constant_id_t pm_parser_constant_id_constant(pm_parser_t *parser, const char *start, size_t length) { return pm_constant_pool_insert_constant(&parser->metadata_arena, &parser->constant_pool, (const uint8_t *) start, length); } @@ -1073,7 +1073,7 @@ pm_parser_constant_id_constant(pm_parser_t *parser, const char *start, size_t le /** * Retrieve the constant pool id for the given token. */ -static inline pm_constant_id_t +static PRISM_INLINE pm_constant_id_t pm_parser_constant_id_token(pm_parser_t *parser, const pm_token_t *token) { return pm_parser_constant_id_raw(parser, token->start, token->end); } @@ -1292,7 +1292,7 @@ pm_check_value_expression(pm_parser_t *parser, pm_node_t *node) { return NULL; } -static inline void +static PRISM_INLINE void pm_assert_value_expression(pm_parser_t *parser, pm_node_t *node) { pm_node_t *void_node = pm_check_value_expression(parser, node); if (void_node != NULL) { @@ -1533,7 +1533,7 @@ pm_conditional_predicate_warn_write_literal_p(const pm_node_t *node) { * Add a warning to the parser if the value that is being written inside of a * predicate to a conditional is a literal. */ -static inline void +static PRISM_INLINE void pm_conditional_predicate_warn_write_literal(pm_parser_t *parser, const pm_node_t *node) { if (pm_conditional_predicate_warn_write_literal_p(node)) { pm_parser_warn_node(parser, node, parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_WARN_EQUAL_IN_CONDITIONAL_3_3 : PM_WARN_EQUAL_IN_CONDITIONAL); @@ -1699,7 +1699,7 @@ typedef struct { /** * Retrieve the end location of a `pm_arguments_t` object. */ -static inline const pm_location_t * +static PRISM_INLINE const pm_location_t * pm_arguments_end(pm_arguments_t *arguments) { if (arguments->block != NULL) { uint32_t end = PM_NODE_END(arguments->block); @@ -1762,7 +1762,7 @@ pm_arguments_validate_block(pm_parser_t *parser, pm_arguments_t *arguments, pm_b * reason we have the encoding_changed boolean to check if we need to go through * the function pointer or can just directly use the UTF-8 functions. */ -static inline size_t +static PRISM_INLINE size_t char_is_identifier_start(const pm_parser_t *parser, const uint8_t *b, ptrdiff_t n) { if (n <= 0) return 0; @@ -1789,7 +1789,7 @@ char_is_identifier_start(const pm_parser_t *parser, const uint8_t *b, ptrdiff_t * Similar to char_is_identifier but this function assumes that the encoding * has not been changed. */ -static inline size_t +static PRISM_INLINE size_t char_is_identifier_utf8(const uint8_t *b, ptrdiff_t n) { if (n <= 0) { return 0; @@ -1816,7 +1816,7 @@ char_is_identifier_utf8(const uint8_t *b, ptrdiff_t n) { #if defined(PRISM_HAS_NEON) #include -static inline size_t +static PRISM_INLINE size_t scan_identifier_ascii(const uint8_t *start, const uint8_t *end) { const uint8_t *cursor = start; @@ -1870,7 +1870,7 @@ scan_identifier_ascii(const uint8_t *start, const uint8_t *end) { #elif defined(PRISM_HAS_SSSE3) #include -static inline size_t +static PRISM_INLINE size_t scan_identifier_ascii(const uint8_t *start, const uint8_t *end) { const uint8_t *cursor = start; @@ -1924,7 +1924,7 @@ scan_identifier_ascii(const uint8_t *start, const uint8_t *end) { * impossible. The result has bit 7 set if and only if byte >= lo. The same * reasoning applies to the upper-bound direction. */ -static inline size_t +static PRISM_INLINE size_t scan_identifier_ascii(const uint8_t *start, const uint8_t *end) { static const uint64_t ones = 0x0101010101010101ULL; static const uint64_t highs = 0x8080808080808080ULL; @@ -1983,7 +1983,7 @@ scan_identifier_ascii(const uint8_t *start, const uint8_t *end) { * the identifiers in a source file once the first character has been found. So * it's important that it be as fast as possible. */ -static inline size_t +static PRISM_INLINE size_t char_is_identifier(const pm_parser_t *parser, const uint8_t *b, ptrdiff_t n) { if (n <= 0) { return 0; @@ -2021,7 +2021,7 @@ const unsigned int pm_global_name_punctuation_hash[(0x7e - 0x20 + 31) / 32] = { #undef BIT #undef PUNCT -static inline bool +static PRISM_INLINE bool char_is_global_name_punctuation(const uint8_t b) { const unsigned int i = (const unsigned int) b; if (i <= 0x20 || 0x7e < i) return false; @@ -2029,7 +2029,7 @@ char_is_global_name_punctuation(const uint8_t b) { return (pm_global_name_punctuation_hash[(i - 0x20) / 32] >> (i % 32)) & 1; } -static inline bool +static PRISM_INLINE bool token_is_setter_name(pm_token_t *token) { return ( (token->type == PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL) || @@ -2117,7 +2117,7 @@ pm_local_is_keyword(const char *source, size_t length) { /** * Set the given flag on the given node. */ -static inline void +static PRISM_INLINE void pm_node_flag_set(pm_node_t *node, pm_node_flags_t flag) { node->flags |= flag; } @@ -2125,7 +2125,7 @@ pm_node_flag_set(pm_node_t *node, pm_node_flags_t flag) { /** * Remove the given flag from the given node. */ -static inline void +static PRISM_INLINE void pm_node_flag_unset(pm_node_t *node, pm_node_flags_t flag) { node->flags &= (pm_node_flags_t) ~flag; } @@ -2133,7 +2133,7 @@ pm_node_flag_unset(pm_node_t *node, pm_node_flags_t flag) { /** * Set the repeated parameter flag on the given node. */ -static inline void +static PRISM_INLINE void pm_node_flag_set_repeated_parameter(pm_node_t *node) { assert(PM_NODE_TYPE(node) == PM_BLOCK_LOCAL_VARIABLE_NODE || PM_NODE_TYPE(node) == PM_BLOCK_PARAMETER_NODE || @@ -2161,7 +2161,7 @@ pm_node_flag_set_repeated_parameter(pm_node_t *node) { /** * Parse out the options for a regular expression. */ -static inline pm_node_flags_t +static PRISM_INLINE pm_node_flags_t pm_regular_expression_flags_create(pm_parser_t *parser, const pm_token_t *closing) { pm_node_flags_t flags = 0; @@ -2210,7 +2210,7 @@ pm_statements_node_body_length(pm_statements_node_t *node); * Move an integer's values array into the arena. If the integer has heap- * allocated values, copy them to the arena and free the original. */ -static inline void +static PRISM_INLINE void pm_integer_arena_move(pm_arena_t *arena, pm_integer_t *integer) { if (integer->values != NULL) { size_t byte_size = integer->length * sizeof(uint32_t); @@ -2380,7 +2380,7 @@ pm_array_node_create(pm_parser_t *parser, const pm_token_t *opening) { /** * Append an argument to an array node. */ -static inline void +static PRISM_INLINE void pm_array_node_elements_append(pm_arena_t *arena, pm_array_node_t *node, pm_node_t *element) { if (!node->elements.size && !node->opening_loc.length) { PM_NODE_START_SET_NODE(node, element); @@ -2507,7 +2507,7 @@ pm_array_pattern_node_empty_create(pm_parser_t *parser, const pm_token_t *openin ); } -static inline void +static PRISM_INLINE void pm_array_pattern_node_requireds_append(pm_arena_t *arena, pm_array_pattern_node_t *node, pm_node_t *inner) { pm_node_list_append(arena, &node->requireds, inner); } @@ -2836,7 +2836,7 @@ pm_call_node_create(pm_parser_t *parser, pm_node_flags_t flags) { * Returns the value that the ignore visibility flag should be set to for the * given receiver. */ -static inline pm_node_flags_t +static PRISM_INLINE pm_node_flags_t pm_call_node_ignore_visibility_flag(const pm_node_t *receiver) { return PM_NODE_TYPE_P(receiver, PM_SELF_NODE) ? PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY : 0; } @@ -3086,7 +3086,7 @@ pm_call_node_variable_call_create(pm_parser_t *parser, pm_token_t *message) { * Returns whether or not this call can be used on the left-hand side of an * operator assignment. */ -static inline bool +static PRISM_INLINE bool pm_call_node_writable_p(const pm_parser_t *parser, const pm_call_node_t *node) { return ( (node->message_loc.length > 0) && @@ -3594,7 +3594,7 @@ pm_class_variable_read_node_create(pm_parser_t *parser, const pm_token_t *token) * a = *b * a = 1, 2, 3 */ -static inline pm_node_flags_t +static PRISM_INLINE pm_node_flags_t pm_implicit_array_write_flags(const pm_node_t *node, pm_node_flags_t flags) { if (PM_NODE_TYPE_P(node, PM_ARRAY_NODE) && ((const pm_array_node_t *) node)->opening_loc.length == 0) { return flags; @@ -4517,7 +4517,7 @@ pm_hash_node_create(pm_parser_t *parser, const pm_token_t *opening) { /** * Append a new element to a hash node. */ -static inline void +static PRISM_INLINE void pm_hash_node_elements_append(pm_arena_t *arena, pm_hash_node_t *hash, pm_node_t *element) { pm_node_list_append(arena, &hash->elements, element); @@ -4534,7 +4534,7 @@ pm_hash_node_elements_append(pm_arena_t *arena, pm_hash_node_t *hash, pm_node_t } } -static inline void +static PRISM_INLINE void pm_hash_node_closing_loc_set(const pm_parser_t *parser, pm_hash_node_t *hash, pm_token_t *token) { PM_NODE_LENGTH_SET_TOKEN(parser, hash, token); hash->closing_loc = TOK2LOC(parser, token); @@ -4634,13 +4634,13 @@ pm_if_node_ternary_create(pm_parser_t *parser, pm_node_t *predicate, const pm_to ); } -static inline void +static PRISM_INLINE void pm_if_node_end_keyword_loc_set(const pm_parser_t *parser, pm_if_node_t *node, const pm_token_t *keyword) { PM_NODE_LENGTH_SET_TOKEN(parser, node, keyword); node->end_keyword_loc = TOK2LOC(parser, keyword); } -static inline void +static PRISM_INLINE void pm_else_node_end_keyword_loc_set(const pm_parser_t *parser, pm_else_node_t *node, const pm_token_t *keyword) { PM_NODE_LENGTH_SET_TOKEN(parser, node, keyword); node->end_keyword_loc = TOK2LOC(parser, keyword); @@ -4963,7 +4963,7 @@ pm_interpolated_regular_expression_node_create(pm_parser_t *parser, const pm_tok ); } -static inline void +static PRISM_INLINE void pm_interpolated_regular_expression_node_append(pm_arena_t *arena, pm_interpolated_regular_expression_node_t *node, pm_node_t *part) { if (PM_NODE_START(node) > PM_NODE_START(part)) { PM_NODE_START_SET_NODE(node, part); @@ -4975,7 +4975,7 @@ pm_interpolated_regular_expression_node_append(pm_arena_t *arena, pm_interpolate pm_interpolated_node_append(arena, UP(node), &node->parts, part); } -static inline void +static PRISM_INLINE void pm_interpolated_regular_expression_node_closing_set(pm_parser_t *parser, pm_interpolated_regular_expression_node_t *node, const pm_token_t *closing) { node->closing_loc = TOK2LOC(parser, closing); PM_NODE_LENGTH_SET_TOKEN(parser, node, closing); @@ -5005,7 +5005,7 @@ pm_interpolated_regular_expression_node_closing_set(pm_parser_t *parser, pm_inte * is necessary to indicate that the string should be left up to the runtime, * which could potentially use a chilled string otherwise. */ -static inline void +static PRISM_INLINE void pm_interpolated_string_node_append(pm_arena_t *arena, pm_interpolated_string_node_t *node, pm_node_t *part) { #define CLEAR_FLAGS(node) \ node->base.flags = (pm_node_flags_t) (FL(node) & ~(PM_NODE_FLAG_STATIC_LITERAL | PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE)) @@ -5208,13 +5208,13 @@ pm_interpolated_xstring_node_create(pm_parser_t *parser, const pm_token_t *openi ); } -static inline void +static PRISM_INLINE void pm_interpolated_xstring_node_append(pm_arena_t *arena, pm_interpolated_x_string_node_t *node, pm_node_t *part) { pm_interpolated_node_append(arena, UP(node), &node->parts, part); PM_NODE_LENGTH_SET_NODE(node, part); } -static inline void +static PRISM_INLINE void pm_interpolated_xstring_node_closing_set(const pm_parser_t *parser, pm_interpolated_x_string_node_t *node, const pm_token_t *closing) { node->closing_loc = TOK2LOC(parser, closing); PM_NODE_LENGTH_SET_TOKEN(parser, node, closing); @@ -5470,7 +5470,7 @@ pm_local_variable_write_node_create(pm_parser_t *parser, pm_constant_id_t name, /** * Returns true if the given bounds comprise `it`. */ -static inline bool +static PRISM_INLINE bool pm_token_is_it(const uint8_t *start, const uint8_t *end) { return (end - start == 2) && (start[0] == 'i') && (start[1] == 't'); } @@ -5479,7 +5479,7 @@ pm_token_is_it(const uint8_t *start, const uint8_t *end) { * Returns true if the given bounds comprise a numbered parameter (i.e., they * are of the form /^_\d$/). */ -static inline bool +static PRISM_INLINE bool pm_token_is_numbered_parameter(const pm_parser_t *parser, uint32_t start, uint32_t length) { return ( (length == 2) && @@ -5493,7 +5493,7 @@ pm_token_is_numbered_parameter(const pm_parser_t *parser, uint32_t start, uint32 * Ensure the given bounds do not comprise a numbered parameter. If they do, add * an appropriate error message to the parser. */ -static inline void +static PRISM_INLINE void pm_refute_numbered_parameter(pm_parser_t *parser, uint32_t start, uint32_t length) { if (pm_token_is_numbered_parameter(parser, start, length)) { PM_PARSER_ERR_FORMAT(parser, start, length, PM_ERR_PARAMETER_NUMBERED_RESERVED, parser->start + start); @@ -6133,7 +6133,7 @@ pm_regular_expression_node_create_unescaped(pm_parser_t *parser, const pm_token_ /** * Allocate a new initialize a new RegularExpressionNode node. */ -static inline pm_regular_expression_node_t * +static PRISM_INLINE pm_regular_expression_node_t * pm_regular_expression_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) { return pm_regular_expression_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY); } @@ -6188,7 +6188,7 @@ pm_rescue_node_create(pm_parser_t *parser, const pm_token_t *keyword) { ); } -static inline void +static PRISM_INLINE void pm_rescue_node_operator_set(const pm_parser_t *parser, pm_rescue_node_t *node, const pm_token_t *operator) { node->operator_loc = TOK2LOC(parser, operator); } @@ -6423,7 +6423,7 @@ pm_statements_node_body_length(pm_statements_node_t *node) { * Update the location of the statements node based on the statement that is * being added to the list. */ -static inline void +static PRISM_INLINE void pm_statements_node_body_update(pm_statements_node_t *node, pm_node_t *statement) { if (pm_statements_node_body_length(node) == 0 || PM_NODE_START(statement) < PM_NODE_START(node)) { PM_NODE_START_SET_NODE(node, statement); @@ -6474,7 +6474,7 @@ pm_statements_node_body_prepend(pm_arena_t *arena, pm_statements_node_t *node, p /** * Allocate a new StringNode node with the current string on the parser. */ -static inline pm_string_node_t * +static PRISM_INLINE pm_string_node_t * pm_string_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *string) { pm_node_flags_t flags = 0; @@ -6606,7 +6606,7 @@ parse_symbol_encoding_validate_other(pm_parser_t *parser, const pm_token_t *loca * If the validate flag is set, then it will check the contents of the symbol * to ensure that all characters are valid in the encoding. */ -static inline pm_node_flags_t +static PRISM_INLINE pm_node_flags_t parse_symbol_encoding(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents, bool validate) { if (parser->explicit_encoding != NULL) { // A Symbol may optionally have its encoding explicitly set. This will @@ -6655,7 +6655,7 @@ pm_symbol_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, /** * Allocate and initialize a new SymbolNode node. */ -static inline pm_symbol_node_t * +static PRISM_INLINE pm_symbol_node_t * pm_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) { return pm_symbol_node_create_unescaped(parser, opening, value, closing, &PM_STRING_EMPTY, 0); } @@ -6896,7 +6896,7 @@ pm_unless_node_modifier_create(pm_parser_t *parser, pm_node_t *statement, const ); } -static inline void +static PRISM_INLINE void pm_unless_node_end_keyword_loc_set(const pm_parser_t *parser, pm_unless_node_t *node, const pm_token_t *end_keyword) { node->end_keyword_loc = TOK2LOC(parser, end_keyword); PM_NODE_LENGTH_SET_TOKEN(parser, node, end_keyword); @@ -6995,7 +6995,7 @@ pm_when_node_conditions_append(pm_arena_t *arena, pm_when_node_t *node, pm_node_ /** * Set the location of the then keyword of a when node. */ -static inline void +static PRISM_INLINE void pm_when_node_then_keyword_loc_set(const pm_parser_t *parser, pm_when_node_t *node, const pm_token_t *then_keyword) { PM_NODE_LENGTH_SET_TOKEN(parser, node, then_keyword); node->then_keyword_loc = TOK2LOC(parser, then_keyword); @@ -7093,7 +7093,7 @@ pm_xstring_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, /** * Allocate and initialize a new XStringNode node. */ -static inline pm_x_string_node_t * +static PRISM_INLINE pm_x_string_node_t * pm_xstring_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) { return pm_xstring_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY); } @@ -7153,7 +7153,7 @@ pm_parser_local_depth_constant_id(pm_parser_t *parser, pm_constant_id_t constant * described by the given token. This function implicitly inserts a constant * into the constant pool. */ -static inline int +static PRISM_INLINE int pm_parser_local_depth(pm_parser_t *parser, pm_token_t *token) { return pm_parser_local_depth_constant_id(parser, pm_parser_constant_id_token(parser, token)); } @@ -7161,7 +7161,7 @@ pm_parser_local_depth(pm_parser_t *parser, pm_token_t *token) { /** * Add a constant id to the local table of the current scope. */ -static inline void +static PRISM_INLINE void pm_parser_local_add(pm_parser_t *parser, pm_constant_id_t constant_id, const uint8_t *start, const uint8_t *end, uint32_t reads) { pm_locals_write(&parser->current_scope->locals, constant_id, U32(start - parser->start), U32(end - start), reads); } @@ -7179,7 +7179,7 @@ pm_parser_local_add_raw(pm_parser_t *parser, const uint8_t *start, const uint8_t /** * Add a local variable from a location to the current scope. */ -static inline pm_constant_id_t +static PRISM_INLINE pm_constant_id_t pm_parser_local_add_location(pm_parser_t *parser, pm_location_t *location, uint32_t reads) { return pm_parser_local_add_raw(parser, parser->start + location->start, parser->start + location->start + location->length, reads); } @@ -7187,7 +7187,7 @@ pm_parser_local_add_location(pm_parser_t *parser, pm_location_t *location, uint3 /** * Add a local variable from a token to the current scope. */ -static inline pm_constant_id_t +static PRISM_INLINE pm_constant_id_t pm_parser_local_add_token(pm_parser_t *parser, pm_token_t *token, uint32_t reads) { return pm_parser_local_add_raw(parser, token->start, token->end, reads); } @@ -7257,7 +7257,7 @@ pm_parser_scope_pop(pm_parser_t *parser) { /** * Pushes a value onto the stack. */ -static inline void +static PRISM_INLINE void pm_state_stack_push(pm_state_stack_t *stack, bool value) { *stack = (*stack << 1) | (value & 1); } @@ -7265,7 +7265,7 @@ pm_state_stack_push(pm_state_stack_t *stack, bool value) { /** * Pops a value off the stack. */ -static inline void +static PRISM_INLINE void pm_state_stack_pop(pm_state_stack_t *stack) { *stack >>= 1; } @@ -7273,38 +7273,38 @@ pm_state_stack_pop(pm_state_stack_t *stack) { /** * Returns the value at the top of the stack. */ -static inline bool +static PRISM_INLINE bool pm_state_stack_p(const pm_state_stack_t *stack) { return *stack & 1; } -static inline void +static PRISM_INLINE void pm_accepts_block_stack_push(pm_parser_t *parser, bool value) { // Use the negation of the value to prevent stack overflow. pm_state_stack_push(&parser->accepts_block_stack, !value); } -static inline void +static PRISM_INLINE void pm_accepts_block_stack_pop(pm_parser_t *parser) { pm_state_stack_pop(&parser->accepts_block_stack); } -static inline bool +static PRISM_INLINE bool pm_accepts_block_stack_p(pm_parser_t *parser) { return !pm_state_stack_p(&parser->accepts_block_stack); } -static inline void +static PRISM_INLINE void pm_do_loop_stack_push(pm_parser_t *parser, bool value) { pm_state_stack_push(&parser->do_loop_stack, value); } -static inline void +static PRISM_INLINE void pm_do_loop_stack_pop(pm_parser_t *parser) { pm_state_stack_pop(&parser->do_loop_stack); } -static inline bool +static PRISM_INLINE bool pm_do_loop_stack_p(pm_parser_t *parser) { return pm_state_stack_p(&parser->do_loop_stack); } @@ -7317,7 +7317,7 @@ pm_do_loop_stack_p(pm_parser_t *parser) { * Get the next character in the source starting from +cursor+. If that position * is beyond the end of the source then return '\0'. */ -static inline uint8_t +static PRISM_INLINE uint8_t peek_at(const pm_parser_t *parser, const uint8_t *cursor) { if (cursor < parser->end) { return *cursor; @@ -7331,7 +7331,7 @@ peek_at(const pm_parser_t *parser, const uint8_t *cursor) { * adding the given offset. If that position is beyond the end of the source * then return '\0'. */ -static inline uint8_t +static PRISM_INLINE uint8_t peek_offset(pm_parser_t *parser, ptrdiff_t offset) { return peek_at(parser, parser->current.end + offset); } @@ -7340,7 +7340,7 @@ peek_offset(pm_parser_t *parser, ptrdiff_t offset) { * Get the next character in the source starting from parser->current.end. If * that position is beyond the end of the source then return '\0'. */ -static inline uint8_t +static PRISM_INLINE uint8_t peek(const pm_parser_t *parser) { return peek_at(parser, parser->current.end); } @@ -7349,7 +7349,7 @@ peek(const pm_parser_t *parser) { * If the character to be read matches the given value, then returns true and * advances the current pointer. */ -static inline bool +static PRISM_INLINE bool match(pm_parser_t *parser, uint8_t value) { if (peek(parser) == value) { parser->current.end++; @@ -7362,7 +7362,7 @@ match(pm_parser_t *parser, uint8_t value) { * Return the length of the line ending string starting at +cursor+, or 0 if it * is not a line ending. This function is intended to be CRLF/LF agnostic. */ -static inline size_t +static PRISM_INLINE size_t match_eol_at(pm_parser_t *parser, const uint8_t *cursor) { if (peek_at(parser, cursor) == '\n') { return 1; @@ -7378,7 +7378,7 @@ match_eol_at(pm_parser_t *parser, const uint8_t *cursor) { * `parser->current.end + offset`, or 0 if it is not a line ending. This * function is intended to be CRLF/LF agnostic. */ -static inline size_t +static PRISM_INLINE size_t match_eol_offset(pm_parser_t *parser, ptrdiff_t offset) { return match_eol_at(parser, parser->current.end + offset); } @@ -7388,7 +7388,7 @@ match_eol_offset(pm_parser_t *parser, ptrdiff_t offset) { * or 0 if it is not a line ending. This function is intended to be CRLF/LF * agnostic. */ -static inline size_t +static PRISM_INLINE size_t match_eol(pm_parser_t *parser) { return match_eol_at(parser, parser->current.end); } @@ -7396,7 +7396,7 @@ match_eol(pm_parser_t *parser) { /** * Skip to the next newline character or NUL byte. */ -static inline const uint8_t * +static PRISM_INLINE const uint8_t * next_newline(const uint8_t *cursor, ptrdiff_t length) { assert(length >= 0); @@ -7409,7 +7409,7 @@ next_newline(const uint8_t *cursor, ptrdiff_t length) { /** * This is equivalent to the predicate of warn_balanced in CRuby. */ -static inline bool +static PRISM_INLINE bool ambiguous_operator_p(const pm_parser_t *parser, bool space_seen) { return !lex_state_p(parser, PM_LEX_STATE_CLASS | PM_LEX_STATE_DOT | PM_LEX_STATE_FNAME | PM_LEX_STATE_ENDFN) && space_seen && !pm_char_is_whitespace(peek(parser)); } @@ -7512,7 +7512,7 @@ parser_lex_magic_comment_boolean_value(const uint8_t *value_start, uint32_t valu } } -static inline bool +static PRISM_INLINE bool pm_char_is_magic_comment_key_delimiter(const uint8_t b) { return b == '\'' || b == '"' || b == ':' || b == ';'; } @@ -7522,7 +7522,7 @@ pm_char_is_magic_comment_key_delimiter(const uint8_t b) { * found, it returns a pointer to the start of the marker. Otherwise it returns * NULL. */ -static inline const uint8_t * +static PRISM_INLINE const uint8_t * parser_lex_magic_comment_emacs_marker(pm_parser_t *parser, const uint8_t *cursor, const uint8_t *end) { // Scan for '*' as the middle character, since it is rarer than '-' in // typical comments and avoids repeated memchr calls for '-' that hit @@ -7545,7 +7545,7 @@ parser_lex_magic_comment_emacs_marker(pm_parser_t *parser, const uint8_t *cursor * It returns true if it consumes the entire comment. Otherwise it returns * false. */ -static inline bool +static PRISM_INLINE bool parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) { bool result = true; @@ -7820,7 +7820,7 @@ static const uint32_t context_terminators[] = { [PM_CONTEXT_WHILE] = (1U << PM_TOKEN_KEYWORD_END), }; -static inline bool +static PRISM_INLINE bool context_terminator(pm_context_t context, pm_token_t *token) { return token->type < 32 && (context_terminators[context] & (1U << token->type)); } @@ -7984,7 +7984,7 @@ context_human(pm_context_t context) { /* Specific token lexers */ /******************************************************************************/ -static inline void +static PRISM_INLINE void pm_strspn_number_validate(pm_parser_t *parser, const uint8_t *string, size_t length, const uint8_t *invalid) { if (invalid != NULL) { pm_diagnostic_id_t diag_id = (invalid == (string + length - 1)) ? PM_ERR_INVALID_NUMBER_UNDERSCORE_TRAILING : PM_ERR_INVALID_NUMBER_UNDERSCORE_INNER; @@ -8383,7 +8383,7 @@ lex_global_variable(pm_parser_t *parser) { * * `type` - the expected token type * * `modifier_type` - the expected modifier token type */ -static inline pm_token_type_t +static PRISM_INLINE pm_token_type_t lex_keyword(pm_parser_t *parser, const uint8_t *current_start, const char *value, size_t vlen, pm_lex_state_t state, pm_token_type_t type, pm_token_type_t modifier_type) { if (memcmp(current_start, value, vlen) == 0) { pm_lex_state_t last_state = parser->lex_state; @@ -8724,7 +8724,7 @@ static const bool ascii_printable_chars[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0 }; -static inline bool +static PRISM_INLINE bool char_is_ascii_printable(const uint8_t b) { return (b < 0x80) && ascii_printable_chars[b]; } @@ -8733,7 +8733,7 @@ char_is_ascii_printable(const uint8_t b) { * Return the value that a hexadecimal digit character represents. For example, * transform 'a' into 10, 'b' into 11, etc. */ -static inline uint8_t +static PRISM_INLINE uint8_t escape_hexadecimal_digit(const uint8_t value) { return (uint8_t) ((value <= '9') ? (value - '0') : (value & 0x7) + 9); } @@ -8743,7 +8743,7 @@ escape_hexadecimal_digit(const uint8_t value) { * digits scanned. This function assumes that the characters have already been * validated. */ -static inline uint32_t +static PRISM_INLINE uint32_t escape_unicode(pm_parser_t *parser, const uint8_t *string, size_t length, const pm_location_t *error_location, const uint8_t flags) { uint32_t value = 0; for (size_t index = 0; index < length; index++) { @@ -8771,7 +8771,7 @@ escape_unicode(pm_parser_t *parser, const uint8_t *string, size_t length, const /** * Escape a single character value based on the given flags. */ -static inline uint8_t +static PRISM_INLINE uint8_t escape_byte(uint8_t value, const uint8_t flags) { if (flags & PM_ESCAPE_FLAG_CONTROL) value &= 0x9f; if (flags & PM_ESCAPE_FLAG_META) value |= 0x80; @@ -8781,7 +8781,7 @@ escape_byte(uint8_t value, const uint8_t flags) { /** * Write a unicode codepoint to the given buffer. */ -static inline void +static PRISM_INLINE void escape_write_unicode(pm_parser_t *parser, pm_buffer_t *buffer, const uint8_t flags, const uint8_t *start, const uint8_t *end, uint32_t value) { // \u escape sequences in string-like structures implicitly change the // encoding to UTF-8 if they are >= 0x80 or if they are used in a character @@ -8817,7 +8817,7 @@ escape_write_unicode(pm_parser_t *parser, pm_buffer_t *buffer, const uint8_t fla * When you're writing a byte to the unescape buffer, if the byte is non-ASCII * (i.e., the top bit is set) then it locks in the encoding. */ -static inline void +static PRISM_INLINE void escape_write_byte_encoded(pm_parser_t *parser, pm_buffer_t *buffer, const uint8_t flags, uint8_t byte) { if (byte >= 0x80) { if (parser->explicit_encoding != NULL && parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY && parser->encoding != PM_ENCODING_UTF_8_ENTRY) { @@ -8850,7 +8850,7 @@ escape_write_byte_encoded(pm_parser_t *parser, pm_buffer_t *buffer, const uint8_ * Note that in this case there is a literal \ byte in the regular expression * source so that the regular expression engine will perform its own unescaping. */ -static inline void +static PRISM_INLINE void escape_write_byte(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags, uint8_t byte) { if (flags & PM_ESCAPE_FLAG_REGEXP) { pm_buffer_append_format(regular_expression_buffer, "\\x%02X", byte); @@ -8862,7 +8862,7 @@ escape_write_byte(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular /** * Write each byte of the given escaped character into the buffer. */ -static inline void +static PRISM_INLINE void escape_write_escape_encoded(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags) { size_t width; if (parser->encoding_changed) { @@ -9455,7 +9455,7 @@ lex_at_variable(pm_parser_t *parser) { /** * Optionally call out to the lex callback if one is provided. */ -static inline void +static PRISM_INLINE void parser_lex_callback(pm_parser_t *parser) { if (parser->lex_callback) { parser->lex_callback->callback(parser->lex_callback->data, parser, &parser->current); @@ -9465,7 +9465,7 @@ parser_lex_callback(pm_parser_t *parser) { /** * Return a new comment node of the specified type. */ -static inline pm_comment_t * +static PRISM_INLINE pm_comment_t * parser_comment(pm_parser_t *parser, pm_comment_type_t type) { pm_comment_t *comment = (pm_comment_t *) pm_arena_alloc(&parser->metadata_arena, sizeof(pm_comment_t), PRISM_ALIGNOF(pm_comment_t)); @@ -9564,7 +9564,7 @@ lex_embdoc(pm_parser_t *parser) { * This happens in a couple places depending on whether or not we have already * lexed a comment. */ -static inline void +static PRISM_INLINE void parser_lex_ignored_newline(pm_parser_t *parser) { parser->current.type = PM_TOKEN_IGNORED_NEWLINE; parser_lex_callback(parser); @@ -9579,7 +9579,7 @@ parser_lex_ignored_newline(pm_parser_t *parser) { * If it is set, then we need to skip past the heredoc body and then clear the * heredoc_end field. */ -static inline void +static PRISM_INLINE void parser_flush_heredoc_end(pm_parser_t *parser) { assert(parser->heredoc_end <= parser->end); parser->next_start = parser->heredoc_end; @@ -9655,12 +9655,12 @@ typedef struct { /** * Push the given byte into the token buffer. */ -static inline void +static PRISM_INLINE void pm_token_buffer_push_byte(pm_token_buffer_t *token_buffer, uint8_t byte) { pm_buffer_append_byte(&token_buffer->buffer, byte); } -static inline void +static PRISM_INLINE void pm_regexp_token_buffer_push_byte(pm_regexp_token_buffer_t *token_buffer, uint8_t byte) { pm_buffer_append_byte(&token_buffer->regexp_buffer, byte); } @@ -9668,7 +9668,7 @@ pm_regexp_token_buffer_push_byte(pm_regexp_token_buffer_t *token_buffer, uint8_t /** * Return the width of the character at the end of the current token. */ -static inline size_t +static PRISM_INLINE size_t parser_char_width(const pm_parser_t *parser) { size_t width; if (parser->encoding_changed) { @@ -9707,7 +9707,7 @@ pm_regexp_token_buffer_push_escaped(pm_regexp_token_buffer_t *token_buffer, pm_p * contents of the token buffer into the current string on the parser so that it * can be attached to the correct node. */ -static inline void +static PRISM_INLINE void pm_token_buffer_copy(pm_parser_t *parser, pm_token_buffer_t *token_buffer) { // Copy buffer data into the arena and free the heap buffer. size_t len = pm_buffer_length(&token_buffer->buffer); @@ -9716,7 +9716,7 @@ pm_token_buffer_copy(pm_parser_t *parser, pm_token_buffer_t *token_buffer) { pm_buffer_free(&token_buffer->buffer); } -static inline void +static PRISM_INLINE void pm_regexp_token_buffer_copy(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) { pm_token_buffer_copy(parser, &token_buffer->base); pm_buffer_free(&token_buffer->regexp_buffer); @@ -9805,7 +9805,7 @@ pm_regexp_token_buffer_escape(pm_parser_t *parser, pm_regexp_token_buffer_t *tok * Effectively the same thing as pm_strspn_inline_whitespace, but in the case of * a tilde heredoc expands out tab characters to the nearest tab boundaries. */ -static inline size_t +static PRISM_INLINE size_t pm_heredoc_strspn_inline_whitespace(pm_parser_t *parser, const uint8_t **cursor, pm_heredoc_indent_t indent) { size_t whitespace = 0; @@ -12491,7 +12491,7 @@ pm_binding_powers_t pm_binding_powers[PM_TOKEN_MAXIMUM] = { /** * Returns true if the current token is of the given type. */ -static inline bool +static PRISM_INLINE bool match1(const pm_parser_t *parser, pm_token_type_t type) { return parser->current.type == type; } @@ -12499,7 +12499,7 @@ match1(const pm_parser_t *parser, pm_token_type_t type) { /** * Returns true if the current token is of either of the given types. */ -static inline bool +static PRISM_INLINE bool match2(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2) { return match1(parser, type1) || match1(parser, type2); } @@ -12507,7 +12507,7 @@ match2(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2) /** * Returns true if the current token is any of the three given types. */ -static inline bool +static PRISM_INLINE bool match3(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3) { return match1(parser, type1) || match1(parser, type2) || match1(parser, type3); } @@ -12515,7 +12515,7 @@ match3(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, /** * Returns true if the current token is any of the four given types. */ -static inline bool +static PRISM_INLINE bool match4(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4) { return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4); } @@ -12523,7 +12523,7 @@ match4(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, /** * Returns true if the current token is any of the seven given types. */ -static inline bool +static PRISM_INLINE bool match7(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7) { return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7); } @@ -12531,7 +12531,7 @@ match7(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, /** * Returns true if the current token is any of the eight given types. */ -static inline bool +static PRISM_INLINE bool match8(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7, pm_token_type_t type8) { return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7) || match1(parser, type8); } @@ -12555,7 +12555,7 @@ accept1(pm_parser_t *parser, pm_token_type_t type) { * If the current token is either of the two given types, lex forward by one * token and return true. Otherwise return false. */ -static inline bool +static PRISM_INLINE bool accept2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2) { if (match2(parser, type1, type2)) { parser_lex(parser); @@ -12671,7 +12671,7 @@ parse_value_expression(pm_parser_t *parser, pm_binding_power_t binding_power, ui * work in all cases, it may need to be refactored later. But it appears to work * for now. */ -static inline bool +static PRISM_INLINE bool token_begins_expression_p(pm_token_type_t type) { switch (type) { case PM_TOKEN_EQUAL_GREATER: @@ -13611,7 +13611,7 @@ parse_assocs(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *nod return contains_keyword_splat; } -static inline bool +static PRISM_INLINE bool argument_allowed_for_bare_hash(pm_parser_t *parser, pm_node_t *argument) { if (pm_symbol_node_label_p(parser, argument)) { return true; @@ -13638,7 +13638,7 @@ argument_allowed_for_bare_hash(pm_parser_t *parser, pm_node_t *argument) { /** * Append an argument to a list of arguments. */ -static inline void +static PRISM_INLINE void parse_arguments_append(pm_parser_t *parser, pm_arguments_t *arguments, pm_node_t *argument) { if (arguments->arguments == NULL) { arguments->arguments = pm_arguments_node_create(parser); @@ -14024,7 +14024,7 @@ update_parameter_state(pm_parser_t *parser, pm_token_t *token, pm_parameters_ord return true; } -static inline void +static PRISM_INLINE void parse_parameters_handle_trailing_comma( pm_parser_t *parser, pm_parameters_node_t *params, @@ -14564,7 +14564,7 @@ typedef enum { * Parse any number of rescue clauses. This will form a linked list of if * nodes pointing to each other from the top. */ -static inline void +static PRISM_INLINE void parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening, pm_begin_node_t *parent_node, pm_rescues_type_t type, uint16_t depth) { pm_rescue_node_t *current = NULL; @@ -15337,7 +15337,7 @@ pop_block_exits(pm_parser_t *parser, pm_node_list_t *previous_block_exits) { } } -static inline pm_node_t * +static PRISM_INLINE pm_node_t * parse_predicate(pm_parser_t *parser, pm_binding_power_t binding_power, pm_context_t context, pm_token_t *then_keyword, uint16_t depth) { context_push(parser, PM_CONTEXT_PREDICATE); pm_diagnostic_id_t error_id = context == PM_CONTEXT_IF ? PM_ERR_CONDITIONAL_IF_PREDICATE : PM_ERR_CONDITIONAL_UNLESS_PREDICATE; @@ -15359,7 +15359,7 @@ parse_predicate(pm_parser_t *parser, pm_binding_power_t binding_power, pm_contex return predicate; } -static inline pm_node_t * +static PRISM_INLINE pm_node_t * parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newline_index, bool if_after_else, uint16_t depth) { pm_node_list_t current_block_exits = { 0 }; pm_node_list_t *previous_block_exits = push_block_exits(parser, ¤t_block_exits); @@ -15558,7 +15558,7 @@ PM_STATIC_ASSERT(__LINE__, ((int) PM_STRING_FLAGS_FORCED_UTF8_ENCODING) == ((int * If the encoding was explicitly set through the lexing process, then we need * to potentially mark the string's flags to indicate how to encode it. */ -static inline pm_node_flags_t +static PRISM_INLINE pm_node_flags_t parse_unescaped_encoding(const pm_parser_t *parser) { if (parser->explicit_encoding != NULL) { if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) { @@ -15875,7 +15875,7 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s * Parse an argument to undef which can either be a bare word, a symbol, a * constant, or an interpolated symbol. */ -static inline pm_node_t * +static PRISM_INLINE pm_node_t * parse_undef_argument(pm_parser_t *parser, uint16_t depth) { switch (parser->current.type) { case PM_CASE_OPERATOR: @@ -15910,7 +15910,7 @@ parse_undef_argument(pm_parser_t *parser, uint16_t depth) { * we need to set the lex state to PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM * between the first and second arguments. */ -static inline pm_node_t * +static PRISM_INLINE pm_node_t * parse_alias_argument(pm_parser_t *parser, bool first, uint16_t depth) { switch (parser->current.type) { case PM_CASE_OPERATOR: @@ -16018,7 +16018,7 @@ parse_variable_call(pm_parser_t *parser) { * parser. If it does not match a valid method definition name, then a missing * token is returned. */ -static inline pm_token_t +static PRISM_INLINE pm_token_t parse_method_definition_name(pm_parser_t *parser) { switch (parser->current.type) { case PM_CASE_KEYWORD: @@ -16091,7 +16091,7 @@ parse_heredoc_dedent_string(pm_arena_t *arena, pm_string_t *string, size_t commo * If we end up trimming all of the whitespace from a node and it isn't * part of a line continuation, then we'll drop it from the list entirely. */ -static inline bool +static PRISM_INLINE bool heredoc_dedent_discard_string_node(pm_parser_t *parser, pm_string_node_t *string_node) { if (string_node->unescaped.length == 0) { const uint8_t *cursor = parser->start + PM_LOCATION_START(&string_node->content_loc); @@ -16152,7 +16152,7 @@ parse_strings_empty_content(const uint8_t *location) { /** * Parse a set of strings that could be concatenated together. */ -static inline pm_node_t * +static PRISM_INLINE pm_node_t * parse_strings(pm_parser_t *parser, pm_node_t *current, bool accepts_label, uint16_t depth) { assert(parser->current.type == PM_TOKEN_STRING_BEGIN); bool concating = false; @@ -17303,7 +17303,7 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag * from its start bounds. If it's a compound node, then we will recursively * apply this function to its value. */ -static inline void +static PRISM_INLINE void parse_negative_numeric(pm_node_t *node) { switch (PM_NODE_TYPE(node)) { case PM_INTEGER_NODE: { @@ -17563,7 +17563,7 @@ parse_yield(pm_parser_t *parser, const pm_node_t *node) { * Determine if a given call node looks like a "command", which means it has * arguments but does not have parentheses. */ -static inline bool +static PRISM_INLINE bool pm_call_node_command_p(const pm_call_node_t *node) { return ( (node->opening_loc.length == 0) && @@ -17655,7 +17655,7 @@ pm_block_call_p(const pm_node_t *node) { /** * Parse an expression that begins with the previous node that we just lexed. */ -static inline pm_node_t * +static PRISM_INLINE pm_node_t * parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth) { switch (parser->current.type) { case PM_TOKEN_BRACKET_LEFT_ARRAY: { @@ -20567,7 +20567,7 @@ parse_call_operator_write(pm_parser_t *parser, pm_call_node_t *call_node, const } } -static inline const uint8_t * +static PRISM_INLINE const uint8_t * pm_named_capture_escape_hex(pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end) { cursor++; @@ -20588,7 +20588,7 @@ pm_named_capture_escape_hex(pm_buffer_t *unescaped, const uint8_t *cursor, const return cursor; } -static inline const uint8_t * +static PRISM_INLINE const uint8_t * pm_named_capture_escape_octal(pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end) { uint8_t value = (uint8_t) (*cursor - '0'); cursor++; @@ -20607,7 +20607,7 @@ pm_named_capture_escape_octal(pm_buffer_t *unescaped, const uint8_t *cursor, con return cursor; } -static inline const uint8_t * +static PRISM_INLINE const uint8_t * pm_named_capture_escape_unicode(pm_parser_t *parser, pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end, const pm_location_t *error_location) { const uint8_t *start = cursor - 1; cursor++; @@ -20799,7 +20799,7 @@ parse_interpolated_regular_expression_named_captures(pm_parser_t *parser, const } } -static inline pm_node_t * +static PRISM_INLINE pm_node_t * parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, uint8_t flags, uint16_t depth) { pm_token_t token = parser->current; @@ -22728,7 +22728,7 @@ pm_parse_success_p(const uint8_t *source, size_t size, const char *data) { // PRISM_EXCLUDE_SERIALIZATION define. #ifndef PRISM_EXCLUDE_SERIALIZATION -static inline void +static PRISM_INLINE void pm_serialize_header(pm_buffer_t *buffer) { pm_buffer_append_string(buffer, "PRISM", 5); pm_buffer_append_byte(buffer, PRISM_VERSION_MAJOR); diff --git a/src/regexp.c b/src/regexp.c index 3dfe826104..2955abcfcb 100644 --- a/src/regexp.c +++ b/src/regexp.c @@ -1,5 +1,6 @@ #include "prism/regexp.h" +#include "prism/attribute/inline.h" #include "prism/internal/buffer.h" #include "prism/internal/char.h" #include "prism/internal/diagnostic.h" @@ -118,7 +119,7 @@ typedef struct { * (points into the original source), we can point to the exact error location. * Otherwise, we point to the whole regexp node. */ -static inline void +static PRISM_INLINE void pm_regexp_parse_error(pm_regexp_parser_t *parser, const uint8_t *start, const uint8_t *end, const char *message) { pm_parser_t *pm = parser->parser; uint32_t loc_start, loc_length; @@ -167,7 +168,7 @@ pm_regexp_parser_named_capture(pm_regexp_parser_t *parser, const uint8_t *start, /** * Returns true if the next character is the end of the source. */ -static inline bool +static PRISM_INLINE bool pm_regexp_char_is_eof(pm_regexp_parser_t *parser) { return parser->cursor >= parser->end; } @@ -175,7 +176,7 @@ pm_regexp_char_is_eof(pm_regexp_parser_t *parser) { /** * Optionally accept a char and consume it if it exists. */ -static inline bool +static PRISM_INLINE bool pm_regexp_char_accept(pm_regexp_parser_t *parser, uint8_t value) { if (!pm_regexp_char_is_eof(parser) && *parser->cursor == value) { parser->cursor++; @@ -187,7 +188,7 @@ pm_regexp_char_accept(pm_regexp_parser_t *parser, uint8_t value) { /** * Expect a character to be present and consume it. */ -static inline bool +static PRISM_INLINE bool pm_regexp_char_expect(pm_regexp_parser_t *parser, uint8_t value) { if (!pm_regexp_char_is_eof(parser) && *parser->cursor == value) { parser->cursor++; @@ -219,7 +220,7 @@ pm_regexp_char_find(pm_regexp_parser_t *parser, uint8_t value) { * escape bytes >= 0x80 are followed by a non-hex-escape, this appends a 0x00 * sentinel to separate the groups for later multibyte validation. */ -static inline void +static PRISM_INLINE void pm_regexp_hex_group_boundary(pm_regexp_parser_t *parser) { if (parser->hex_group_active) { pm_buffer_append_byte(&parser->hex_escape_buffer, 0x00); @@ -230,7 +231,7 @@ pm_regexp_hex_group_boundary(pm_regexp_parser_t *parser) { /** * Track a hex escape byte value >= 0x80 for multibyte validation. */ -static inline void +static PRISM_INLINE void pm_regexp_track_hex_escape(pm_regexp_parser_t *parser, uint8_t byte) { if (byte >= 0x80) { pm_buffer_append_byte(&parser->hex_escape_buffer, byte); @@ -247,7 +248,7 @@ pm_regexp_track_hex_escape(pm_regexp_parser_t *parser, uint8_t byte) { /** * Parse a hex digit character and return its value, or -1 if not a hex digit. */ -static inline int +static PRISM_INLINE int pm_regexp_hex_digit_value(uint8_t byte) { if (byte >= '0' && byte <= '9') return byte - '0'; if (byte >= 'a' && byte <= 'f') return byte - 'a' + 10; diff --git a/src/static_literals.c b/src/static_literals.c index 9023f7fff9..b59a0acd12 100644 --- a/src/static_literals.c +++ b/src/static_literals.c @@ -1,5 +1,6 @@ #include "prism/internal/static_literals.h" +#include "prism/attribute/inline.h" #include "prism/internal/buffer.h" #include "prism/internal/integer.h" #include "prism/internal/strings.h" @@ -26,7 +27,7 @@ typedef struct { const char *encoding_name; } pm_static_literals_metadata_t; -static inline uint32_t +static PRISM_INLINE uint32_t murmur_scramble(uint32_t value) { value *= 0xcc9e2d51; value = (value << 15) | (value >> 17); @@ -508,7 +509,7 @@ pm_static_literal_positive_p(const pm_node_t *node) { /** * Create a string-based representation of the given static literal. */ -static inline void +static PRISM_INLINE void pm_static_literal_inspect_node(pm_buffer_t *buffer, const pm_static_literals_metadata_t *metadata, const pm_node_t *node) { switch (PM_NODE_TYPE(node)) { case PM_FALSE_NODE: diff --git a/src/strncasecmp.c b/src/strncasecmp.c index c4e2214ee1..80eb399d04 100644 --- a/src/strncasecmp.c +++ b/src/strncasecmp.c @@ -1,9 +1,11 @@ #include "prism/internal/strncasecmp.h" +#include "prism/attribute/inline.h" + /** * A locale-insensitive version of `tolower(3)` */ -static inline int +static PRISM_INLINE int pm_tolower(int c) { if ('A' <= c && c <= 'Z') { return c | 0x20; diff --git a/src/strpbrk.c b/src/strpbrk.c index 9b28a680c9..d66c0b5fd1 100644 --- a/src/strpbrk.c +++ b/src/strpbrk.c @@ -1,5 +1,6 @@ #include "prism/internal/strpbrk.h" +#include "prism/attribute/inline.h" #include "prism/attribute/unused.h" #include "prism/internal/accel.h" #include "prism/internal/bit.h" @@ -13,7 +14,7 @@ /** * Add an invalid multibyte character error to the parser. */ -static inline void +static PRISM_INLINE void pm_strpbrk_invalid_multibyte_character(pm_parser_t *parser, uint32_t start, uint32_t length) { pm_diagnostic_list_append_format(&parser->metadata_arena, &parser->error_list, start, length, PM_ERR_INVALID_MULTIBYTE_CHARACTER, parser->start[start]); } @@ -21,7 +22,7 @@ pm_strpbrk_invalid_multibyte_character(pm_parser_t *parser, uint32_t start, uint /** * Set the explicit encoding for the parser to the current encoding. */ -static inline void +static PRISM_INLINE void pm_strpbrk_explicit_encoding_set(pm_parser_t *parser, uint32_t start, uint32_t length) { if (parser->explicit_encoding != NULL) { if (parser->explicit_encoding == parser->encoding) { @@ -67,7 +68,7 @@ pm_strpbrk_explicit_encoding_set(pm_parser_t *parser, uint32_t start, uint32_t l * - low_lut/high_lut: nibble-based lookup tables for SIMD matching (NEON/SSSE3) * - table: 256-bit bitmap for scalar fallback matching (all platforms) */ -static inline void +static PRISM_INLINE void pm_strpbrk_cache_update(pm_parser_t *parser, const uint8_t *charset) { // The cache key is the full charset buffer (PM_STRPBRK_CACHE_SIZE bytes). // Since it is always NUL-padded, a fixed-size comparison covers both @@ -104,7 +105,7 @@ pm_strpbrk_cache_update(pm_parser_t *parser, const uint8_t *charset) { #if defined(PRISM_HAS_NEON) #include -static inline bool +static PRISM_INLINE bool scan_strpbrk_ascii(pm_parser_t *parser, const uint8_t *source, size_t maximum, const uint8_t *charset, size_t *index) { pm_strpbrk_cache_update(parser, charset); @@ -159,7 +160,7 @@ scan_strpbrk_ascii(pm_parser_t *parser, const uint8_t *source, size_t maximum, c #elif defined(PRISM_HAS_SSSE3) #include -static inline bool +static PRISM_INLINE bool scan_strpbrk_ascii(pm_parser_t *parser, const uint8_t *source, size_t maximum, const uint8_t *charset, size_t *index) { pm_strpbrk_cache_update(parser, charset); @@ -211,7 +212,7 @@ scan_strpbrk_ascii(pm_parser_t *parser, const uint8_t *source, size_t maximum, c #elif defined(PRISM_HAS_SWAR) -static inline bool +static PRISM_INLINE bool scan_strpbrk_ascii(pm_parser_t *parser, const uint8_t *source, size_t maximum, const uint8_t *charset, size_t *index) { pm_strpbrk_cache_update(parser, charset); @@ -253,7 +254,7 @@ scan_strpbrk_ascii(pm_parser_t *parser, const uint8_t *source, size_t maximum, c #else -static inline bool +static PRISM_INLINE bool scan_strpbrk_ascii(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, PRISM_ATTRIBUTE_UNUSED const uint8_t *source, PRISM_ATTRIBUTE_UNUSED size_t maximum, PRISM_ATTRIBUTE_UNUSED const uint8_t *charset, size_t *index) { *index = 0; return false; @@ -264,7 +265,7 @@ scan_strpbrk_ascii(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, PRISM_ATTRIBUTE_U /** * This is the default path. */ -static inline const uint8_t * +static PRISM_INLINE const uint8_t * pm_strpbrk_utf8(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t index, size_t maximum, bool validate) { while (index < maximum) { if (strchr((const char *) charset, source[index]) != NULL) { @@ -302,7 +303,7 @@ pm_strpbrk_utf8(pm_parser_t *parser, const uint8_t *source, const uint8_t *chars /** * This is the path when the encoding is ASCII-8BIT. */ -static inline const uint8_t * +static PRISM_INLINE const uint8_t * pm_strpbrk_ascii_8bit(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t index, size_t maximum, bool validate) { while (index < maximum) { if (strchr((const char *) charset, source[index]) != NULL) { @@ -319,7 +320,7 @@ pm_strpbrk_ascii_8bit(pm_parser_t *parser, const uint8_t *source, const uint8_t /** * This is the slow path that does care about the encoding. */ -static inline const uint8_t * +static PRISM_INLINE const uint8_t * pm_strpbrk_multi_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t index, size_t maximum, bool validate) { const pm_encoding_t *encoding = parser->encoding; @@ -361,7 +362,7 @@ pm_strpbrk_multi_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t * This is the fast path that does not care about the encoding because we know * the encoding only supports single-byte characters. */ -static inline const uint8_t * +static PRISM_INLINE const uint8_t * pm_strpbrk_single_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t index, size_t maximum, bool validate) { const pm_encoding_t *encoding = parser->encoding; diff --git a/templates/include/prism/node_new.h.erb b/templates/include/prism/node_new.h.erb index 56c214e006..27a7ed22c6 100644 --- a/templates/include/prism/node_new.h.erb +++ b/templates/include/prism/node_new.h.erb @@ -10,6 +10,8 @@ #include "prism/node.h" +#include "prism/attribute/inline.h" + <%- nodes.each do |node| -%> <%- params = node.fields.map(&:c_param) -%> /** @@ -24,7 +26,7 @@ <%- end -%> * @return The newly allocated and initialized node. */ -static inline pm_<%= node.human %>_t * +static PRISM_INLINE pm_<%= node.human %>_t * pm_<%= node.human %>_new(pm_arena_t *arena, uint32_t node_id, pm_node_flags_t flags, pm_location_t location<%= params.empty? ? "" : ", #{params.join(", ")}" %>) { pm_<%= node.human %>_t *node = (pm_<%= node.human %>_t *) pm_arena_alloc(arena, sizeof(pm_<%= node.human %>_t), PRISM_ALIGNOF(pm_<%= node.human %>_t)); diff --git a/templates/src/diagnostic.c.erb b/templates/src/diagnostic.c.erb index 80d8b5b261..7eab88201f 100644 --- a/templates/src/diagnostic.c.erb +++ b/templates/src/diagnostic.c.erb @@ -1,5 +1,6 @@ #include "prism/diagnostic.h" +#include "prism/attribute/inline.h" #include "prism/internal/arena.h" #include "prism/internal/list.h" @@ -434,7 +435,7 @@ pm_diagnostic_id_human(pm_diagnostic_id_t diag_id) { return ""; } -static inline const char * +static PRISM_INLINE const char * pm_diagnostic_message(pm_diagnostic_id_t diag_id) { assert(diag_id < PM_DIAGNOSTIC_ID_MAX); @@ -444,7 +445,7 @@ pm_diagnostic_message(pm_diagnostic_id_t diag_id) { return message; } -static inline uint8_t +static PRISM_INLINE uint8_t pm_diagnostic_level(pm_diagnostic_id_t diag_id) { assert(diag_id < PM_DIAGNOSTIC_ID_MAX); diff --git a/templates/src/prettyprint.c.erb b/templates/src/prettyprint.c.erb index 60665faba6..39fdcb6269 100644 --- a/templates/src/prettyprint.c.erb +++ b/templates/src/prettyprint.c.erb @@ -1,6 +1,7 @@ <%# encoding: ASCII -%> #include "prism/prettyprint.h" +#include "prism/attribute/inline.h" #include "prism/internal/buffer.h" #include "prism/internal/constant_pool.h" #include "prism/internal/integer.h" @@ -14,14 +15,14 @@ void pm_prettyprint(void) {} #else -static inline void +static PRISM_INLINE void prettyprint_location(pm_buffer_t *output_buffer, const pm_parser_t *parser, const pm_location_t *location) { pm_line_column_t start = pm_line_offset_list_line_column(&parser->line_offsets, location->start, parser->start_line); pm_line_column_t end = pm_line_offset_list_line_column(&parser->line_offsets, location->start + location->length, parser->start_line); pm_buffer_append_format(output_buffer, "(%" PRIi32 ",%" PRIu32 ")-(%" PRIi32 ",%" PRIu32 ")", start.line, start.column, end.line, end.column); } -static inline void +static PRISM_INLINE void prettyprint_constant(pm_buffer_t *output_buffer, const pm_parser_t *parser, const pm_constant_id_t constant_id) { pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, constant_id); pm_buffer_append_format(output_buffer, ":%.*s", (int) constant->length, constant->start); diff --git a/templates/src/serialize.c.erb b/templates/src/serialize.c.erb index d2ef80b3c4..588e1aac3c 100644 --- a/templates/src/serialize.c.erb +++ b/templates/src/serialize.c.erb @@ -1,5 +1,6 @@ #include "prism.h" +#include "prism/attribute/inline.h" #include "prism/internal/list.h" #include "prism/internal/options.h" @@ -10,13 +11,13 @@ #include -static inline uint32_t +static PRISM_INLINE uint32_t pm_ptrdifft_to_u32(ptrdiff_t value) { assert(value >= 0 && ((unsigned long) value) < UINT32_MAX); return (uint32_t) value; } -static inline uint32_t +static PRISM_INLINE uint32_t pm_sizet_to_u32(size_t value) { assert(value < UINT32_MAX); return (uint32_t) value; From 8d1df7815e498ae8d88985600f356302f081252b Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 17 Mar 2026 12:47:08 -0400 Subject: [PATCH 028/100] Move regexp to internal --- Makefile | 6 +++--- docs/fuzzing.md | 15 ++++----------- ext/prism/extension.c | 10 +++++----- fuzz/asan.ignore | 5 ----- fuzz/corpus/regexp/1 | 1 - fuzz/corpus/regexp/2 | 1 - fuzz/corpus/regexp/3 | 1 - fuzz/corpus/regexp/4 | 1 - fuzz/corpus/regexp/6 | 1 - fuzz/corpus/unescape/1 | 1 - fuzz/corpus/unescape/2 | 1 - fuzz/corpus/unescape/3 | 1 - fuzz/regexp.c | 23 ----------------------- fuzz/regexp.sh | 5 ----- include/prism.h | 1 - include/prism/{ => internal}/regexp.h | 15 ++++----------- include/prism/parser.h | 8 ++++++++ prism.gemspec | 3 ++- src/parser.c | 11 +++++++++++ src/prism.c | 3 +++ src/regexp.c | 4 +++- templates/src/serialize.c.erb | 1 + 22 files changed, 44 insertions(+), 74 deletions(-) delete mode 100644 fuzz/asan.ignore delete mode 100644 fuzz/corpus/regexp/1 delete mode 100644 fuzz/corpus/regexp/2 delete mode 100644 fuzz/corpus/regexp/3 delete mode 100644 fuzz/corpus/regexp/4 delete mode 100644 fuzz/corpus/regexp/6 delete mode 100644 fuzz/corpus/unescape/1 delete mode 100644 fuzz/corpus/unescape/2 delete mode 100644 fuzz/corpus/unescape/3 delete mode 100644 fuzz/regexp.c delete mode 100755 fuzz/regexp.sh rename include/prism/{ => internal}/regexp.h (91%) create mode 100644 src/parser.c diff --git a/Makefile b/Makefile index 7f61f753fa..1c56c0d358 100644 --- a/Makefile +++ b/Makefile @@ -70,12 +70,12 @@ build/fuzz.%: $(SOURCES) fuzz/%.c fuzz/fuzz.c $(ECHO) "building $* fuzzer" $(Q) $(MAKEDIRS) $(@D) $(ECHO) "building main fuzz binary" - $(Q) afl-clang-lto $(DEBUG_FLAGS) $(CPPFLAGS) $(CFLAGS) $(FUZZ_FLAGS) -O0 -fsanitize-ignorelist=fuzz/asan.ignore -fsanitize=fuzzer,address -ggdb3 -std=c99 -Iinclude -o $@ $^ + $(Q) afl-clang-lto $(DEBUG_FLAGS) $(CPPFLAGS) $(CFLAGS) $(FUZZ_FLAGS) -O0 -fsanitize=fuzzer,address -ggdb3 -std=c99 -Iinclude -o $@ $^ $(ECHO) "building cmplog binary" - $(Q) AFL_LLVM_CMPLOG=1 afl-clang-lto $(DEBUG_FLAGS) $(CPPFLAGS) $(CFLAGS) $(FUZZ_FLAGS) -O0 -fsanitize-ignorelist=fuzz/asan.ignore -fsanitize=fuzzer,address -ggdb3 -std=c99 -Iinclude -o $@.cmplog $^ + $(Q) AFL_LLVM_CMPLOG=1 afl-clang-lto $(DEBUG_FLAGS) $(CPPFLAGS) $(CFLAGS) $(FUZZ_FLAGS) -O0 -fsanitize=fuzzer,address -ggdb3 -std=c99 -Iinclude -o $@.cmplog $^ build/fuzz.heisenbug.%: $(SOURCES) fuzz/%.c fuzz/heisenbug.c - $(Q) afl-clang-lto $(DEBUG_FLAGS) $(CPPFLAGS) $(CFLAGS) $(FUZZ_FLAGS) -O0 -fsanitize-ignorelist=fuzz/asan.ignore -fsanitize=fuzzer,address -ggdb3 -std=c99 -Iinclude -o $@ $^ + $(Q) afl-clang-lto $(DEBUG_FLAGS) $(CPPFLAGS) $(CFLAGS) $(FUZZ_FLAGS) -O0 -fsanitize=fuzzer,address -ggdb3 -std=c99 -Iinclude -o $@ $^ fuzz-debug: $(ECHO) "entering debug shell" diff --git a/docs/fuzzing.md b/docs/fuzzing.md index b6ec6112a8..c142394b63 100644 --- a/docs/fuzzing.md +++ b/docs/fuzzing.md @@ -5,8 +5,7 @@ We use fuzzing to test the various entrypoints to the library. The fuzzer we use ``` fuzz ├── corpus -│   ├── parse fuzzing corpus for parsing (a symlink to our fixtures) -│   └── regexp fuzzing corpus for regexp +│   └── parse fuzzing corpus for parsing (a symlink to our fixtures) ├── dict a AFL++ dictionary containing various tokens ├── docker │   └── Dockerfile for building a container with the fuzzer toolchain @@ -14,8 +13,6 @@ fuzz ├── heisenbug.c entrypoint for reproducing a crash or hang ├── parse.c fuzz handler for parsing ├── parse.sh script to run parsing fuzzer -├── regexp.c fuzz handler for regular expression parsing -├── regexp.sh script to run regexp fuzzer └── tools    ├── backtrace.sh generates backtrace files for a crash directory    └── minimize.sh generates minimized crash or hang files @@ -23,16 +20,14 @@ fuzz ## Usage -There are currently three fuzzing targets +There is currently one fuzz target: - `pm_serialize_parse` (parse) -- `pm_regexp_parse` (regexp) -Respectively, fuzzing can be performed with +Fuzzing can be performed with ``` make fuzz-run-parse -make fuzz-run-regexp ``` To end a fuzzing job, interrupt with CTRL+C. To enter a container with the fuzzing toolchain and debug utilities, run @@ -43,8 +38,6 @@ make fuzz-debug # Out-of-bounds reads -Currently, encoding functionality implementing the `pm_encoding_t` interface can read outside of inputs. For the time being, ASAN instrumentation is disabled for functions from src/enc. See `fuzz/asan.ignore`. - To disable ASAN read instrumentation globally, use the `FUZZ_FLAGS` environment variable e.g. ``` @@ -55,7 +48,7 @@ Note, that this may make reproducing bugs difficult as they may depend on memory ``` make fuzz-debug # enter the docker container with build tools -make build/fuzz.heisenbug.parse # or .regexp +make build/fuzz.heisenbug.parse ./build/fuzz.heisenbug.parse path-to-problem-input ``` diff --git a/ext/prism/extension.c b/ext/prism/extension.c index c248b0c123..60f33f0cb1 100644 --- a/ext/prism/extension.c +++ b/ext/prism/extension.c @@ -692,7 +692,7 @@ parse_lex_token(void *data, pm_parser_t *parser, pm_token_t *token) { static void parse_lex_encoding_changed_callback(pm_parser_t *parser) { parse_lex_data_t *parse_lex_data = (parse_lex_data_t *) parser->lex_callback->data; - parse_lex_data->encoding = rb_enc_find(parser->encoding->name); + parse_lex_data->encoding = rb_enc_find(pm_parser_encoding_name(parser)); // Since the encoding changed, we need to go back and change the encoding of // the tokens that were already lexed. This is only going to end up being @@ -767,7 +767,7 @@ parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nod // encoding for the source string and the correct newline offsets. // We do it here because we've already created the Source object and given // it over to all of the tokens, and both of these are only set after pm_parse(). - rb_encoding *encoding = rb_enc_find(parser.encoding->name); + rb_encoding *encoding = rb_enc_find(pm_parser_encoding_name(&parser)); rb_enc_associate(source_string, encoding); for (size_t index = 0; index < parser.line_offsets.size; index++) { @@ -856,7 +856,7 @@ parse_input(pm_string_t *input, const pm_options_t *options) { pm_parser_init(&arena, &parser, pm_string_source(input), pm_string_length(input), options); pm_node_t *node = pm_parse(&parser); - rb_encoding *encoding = rb_enc_find(parser.encoding->name); + rb_encoding *encoding = rb_enc_find(pm_parser_encoding_name(&parser)); VALUE source = pm_source_new(&parser, encoding, options->freeze); VALUE value = pm_ast_new(&parser, node, encoding, source, options->freeze); @@ -1075,7 +1075,7 @@ parse_stream(int argc, VALUE *argv, VALUE self) { pm_buffer_t buffer; pm_node_t *node = pm_parse_stream(&arena, &parser, &buffer, (void *) stream, parse_stream_fgets, parse_stream_eof, &options); - rb_encoding *encoding = rb_enc_find(parser.encoding->name); + rb_encoding *encoding = rb_enc_find(pm_parser_encoding_name(&parser)); VALUE source = pm_source_new(&parser, encoding, options.freeze); VALUE value = pm_ast_new(&parser, node, encoding, source, options.freeze); @@ -1098,7 +1098,7 @@ parse_input_comments(pm_string_t *input, const pm_options_t *options) { pm_parser_init(&arena, &parser, pm_string_source(input), pm_string_length(input), options); pm_parse(&parser); - rb_encoding *encoding = rb_enc_find(parser.encoding->name); + rb_encoding *encoding = rb_enc_find(pm_parser_encoding_name(&parser)); VALUE source = pm_source_new(&parser, encoding, options->freeze); VALUE comments = parser_comments(&parser, source, options->freeze); diff --git a/fuzz/asan.ignore b/fuzz/asan.ignore deleted file mode 100644 index 01a130893c..0000000000 --- a/fuzz/asan.ignore +++ /dev/null @@ -1,5 +0,0 @@ -# for now, we ignore the encoding functions as they -# can read out of bounds -[address] -src:src/enc/* - diff --git a/fuzz/corpus/regexp/1 b/fuzz/corpus/regexp/1 deleted file mode 100644 index 9e2c270196..0000000000 --- a/fuzz/corpus/regexp/1 +++ /dev/null @@ -1 +0,0 @@ -(?#comment) diff --git a/fuzz/corpus/regexp/2 b/fuzz/corpus/regexp/2 deleted file mode 100644 index f4119666b1..0000000000 --- a/fuzz/corpus/regexp/2 +++ /dev/null @@ -1 +0,0 @@ -(?:abcd) diff --git a/fuzz/corpus/regexp/3 b/fuzz/corpus/regexp/3 deleted file mode 100644 index 77ee883146..0000000000 --- a/fuzz/corpus/regexp/3 +++ /dev/null @@ -1 +0,0 @@ -(?:subexp) diff --git a/fuzz/corpus/regexp/4 b/fuzz/corpus/regexp/4 deleted file mode 100644 index 60396c3a43..0000000000 --- a/fuzz/corpus/regexp/4 +++ /dev/null @@ -1 +0,0 @@ -!"£$%^&*()/adfas" diff --git a/fuzz/corpus/regexp/6 b/fuzz/corpus/regexp/6 deleted file mode 100644 index 273d11e622..0000000000 --- a/fuzz/corpus/regexp/6 +++ /dev/null @@ -1 +0,0 @@ -word| ) diff --git a/fuzz/corpus/unescape/1 b/fuzz/corpus/unescape/1 deleted file mode 100644 index cd1ed7f860..0000000000 --- a/fuzz/corpus/unescape/1 +++ /dev/null @@ -1 +0,0 @@ -\r\n diff --git a/fuzz/corpus/unescape/2 b/fuzz/corpus/unescape/2 deleted file mode 100644 index dd3221c3a3..0000000000 --- a/fuzz/corpus/unescape/2 +++ /dev/null @@ -1 +0,0 @@ -\\\"\\' diff --git a/fuzz/corpus/unescape/3 b/fuzz/corpus/unescape/3 deleted file mode 100644 index 186bc5a9e0..0000000000 --- a/fuzz/corpus/unescape/3 +++ /dev/null @@ -1 +0,0 @@ -\b\0000 diff --git a/fuzz/regexp.c b/fuzz/regexp.c deleted file mode 100644 index f1a02dedc2..0000000000 --- a/fuzz/regexp.c +++ /dev/null @@ -1,23 +0,0 @@ -#include - -void -regexp_name_callback(const pm_string_t *name, void *data) { - // Do nothing -} - -void -regexp_error_callback(const uint8_t *start, const uint8_t *end, const char *message, void *data) { - // Do nothing -} - -void -harness(const uint8_t *input, size_t size) { - pm_arena_t arena = { 0 }; - pm_parser_t parser; - pm_parser_init(&arena, &parser, input, size, NULL); - - pm_regexp_parse(&parser, input, size, false, regexp_name_callback, NULL, regexp_error_callback, NULL); - - pm_parser_free(&parser); - pm_arena_free(&arena); -} diff --git a/fuzz/regexp.sh b/fuzz/regexp.sh deleted file mode 100755 index 5cf615a095..0000000000 --- a/fuzz/regexp.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/bash - -OUTPUT_DIR=$1 - -afl-fuzz -G 100 -c ./build/fuzz.regexp.cmplog -i ./fuzz/corpus/regexp -o "$OUTPUT_DIR" ./build/fuzz.regexp diff --git a/include/prism.h b/include/prism.h index df7d49af29..b7ef0d126c 100644 --- a/include/prism.h +++ b/include/prism.h @@ -19,7 +19,6 @@ extern "C" { #include "prism/options.h" #include "prism/parser.h" #include "prism/prettyprint.h" -#include "prism/regexp.h" #include "prism/version.h" #include diff --git a/include/prism/regexp.h b/include/prism/internal/regexp.h similarity index 91% rename from include/prism/regexp.h rename to include/prism/internal/regexp.h index 17f0e03019..7f4731967c 100644 --- a/include/prism/regexp.h +++ b/include/prism/internal/regexp.h @@ -1,20 +1,13 @@ /** - * @file regexp.h + * @file internal/regexp.h * * A regular expression parser. */ -#ifndef PRISM_REGEXP_H -#define PRISM_REGEXP_H +#ifndef PRISM_INTERNAL_REGEXP_H +#define PRISM_INTERNAL_REGEXP_H -#include "prism/defines.h" +#include "prism/ast.h" #include "prism/parser.h" -#include "prism/internal/encoding.h" -#include "prism/internal/memchr.h" -#include "prism/strings.h" - -#include -#include -#include /** * Accumulation state for named capture groups found during regexp parsing. diff --git a/include/prism/parser.h b/include/prism/parser.h index 0a0b142952..ae3961899e 100644 --- a/include/prism/parser.h +++ b/include/prism/parser.h @@ -993,4 +993,12 @@ struct pm_parser { #endif }; +/** + * Returns the name of the encoding that is being used to parse the source. + * + * @param parser the parser whose encoding name we want to get + * @return the name of the encoding that is being used to parse the source + */ +PRISM_EXPORTED_FUNCTION const char * pm_parser_encoding_name(const pm_parser_t *parser); + #endif diff --git a/prism.gemspec b/prism.gemspec index 6a4b7ba5a4..a18de24742 100644 --- a/prism.gemspec +++ b/prism.gemspec @@ -71,7 +71,6 @@ Gem::Specification.new do |spec| "include/prism/options.h", "include/prism/parser.h", "include/prism/prettyprint.h", - "include/prism/regexp.h", "include/prism/static_literals.h", "include/prism/strings.h", "include/prism/version.h", @@ -88,6 +87,7 @@ Gem::Specification.new do |spec| "include/prism/internal/list.h", "include/prism/internal/memchr.h", "include/prism/internal/options.h", + "include/prism/internal/regexp.h", "include/prism/internal/static_literals.h", "include/prism/internal/strncasecmp.h", "include/prism/internal/strings.h", @@ -192,6 +192,7 @@ Gem::Specification.new do |spec| "src/memchr.c", "src/node.c", "src/options.c", + "src/parser.c", "src/prettyprint.c", "src/prism.c", "src/regexp.c", diff --git a/src/parser.c b/src/parser.c new file mode 100644 index 0000000000..356700796f --- /dev/null +++ b/src/parser.c @@ -0,0 +1,11 @@ +#include "prism/parser.h" + +#include "prism/internal/encoding.h" + +/** + * Returns the name of the encoding that is being used to parse the source. + */ +const char * +pm_parser_encoding_name(const pm_parser_t *parser) { + return parser->encoding->name; +} diff --git a/src/prism.c b/src/prism.c index fed7b069e6..b4bf6b8a14 100644 --- a/src/prism.c +++ b/src/prism.c @@ -6,10 +6,13 @@ #include "prism/internal/char.h" #include "prism/internal/constant_pool.h" #include "prism/internal/diagnostic.h" +#include "prism/internal/encoding.h" #include "prism/internal/integer.h" #include "prism/internal/line_offset_list.h" #include "prism/internal/list.h" +#include "prism/internal/memchr.h" #include "prism/internal/options.h" +#include "prism/internal/regexp.h" #include "prism/internal/static_literals.h" #include "prism/internal/strings.h" #include "prism/internal/strncasecmp.h" diff --git a/src/regexp.c b/src/regexp.c index 2955abcfcb..7fd80ca696 100644 --- a/src/regexp.c +++ b/src/regexp.c @@ -1,9 +1,11 @@ -#include "prism/regexp.h" +#include "prism/internal/regexp.h" #include "prism/attribute/inline.h" #include "prism/internal/buffer.h" #include "prism/internal/char.h" #include "prism/internal/diagnostic.h" +#include "prism/internal/encoding.h" +#include "prism/internal/memchr.h" #include "prism/internal/strings.h" #include "prism/internal/strncasecmp.h" diff --git a/templates/src/serialize.c.erb b/templates/src/serialize.c.erb index 588e1aac3c..e1c6709212 100644 --- a/templates/src/serialize.c.erb +++ b/templates/src/serialize.c.erb @@ -1,6 +1,7 @@ #include "prism.h" #include "prism/attribute/inline.h" +#include "prism/internal/encoding.h" #include "prism/internal/list.h" #include "prism/internal/options.h" From 2496b0105b1792d6453662d55f68fe4a389111e3 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 17 Mar 2026 12:53:18 -0400 Subject: [PATCH 029/100] Split out excludes into its own header --- include/prism/defines.h | 19 +------------------ include/prism/excludes.h | 29 +++++++++++++++++++++++++++++ include/prism/prettyprint.h | 6 ++---- prism.gemspec | 1 + 4 files changed, 33 insertions(+), 22 deletions(-) create mode 100644 include/prism/excludes.h diff --git a/include/prism/defines.h b/include/prism/defines.h index 0d997f9c9b..2189c0cdc7 100644 --- a/include/prism/defines.h +++ b/include/prism/defines.h @@ -20,6 +20,7 @@ #include "prism/internal/bit.h" #include "prism/allocator.h" +#include "prism/excludes.h" #include "prism/files.h" #include @@ -79,24 +80,6 @@ #define PRISM_ISINF(x) isinf(x) #endif -/** - * If PRISM_BUILD_MINIMAL is defined, then we're going to define every possible - * switch that will turn off certain features of prism. - */ -#ifdef PRISM_BUILD_MINIMAL - /** Exclude the serialization API. */ - #define PRISM_EXCLUDE_SERIALIZATION - - /** Exclude the JSON serialization API. */ - #define PRISM_EXCLUDE_JSON - - /** Exclude the prettyprint API. */ - #define PRISM_EXCLUDE_PRETTYPRINT - - /** Exclude the full set of encodings, using the minimal only. */ - #define PRISM_ENCODING_EXCLUDE_FULL -#endif - /** * Support PRISM_LIKELY and PRISM_UNLIKELY to help the compiler optimize its * branch predication. diff --git a/include/prism/excludes.h b/include/prism/excludes.h new file mode 100644 index 0000000000..8600622f63 --- /dev/null +++ b/include/prism/excludes.h @@ -0,0 +1,29 @@ +/** + * @file excludes.h + * + * A header file that defines macros to exclude certain features of the prism + * library. This is useful for reducing the size of the library when certain + * features are not needed. + */ +#ifndef PRISM_EXCLUDES_H +#define PRISM_EXCLUDES_H + +/** + * If PRISM_BUILD_MINIMAL is defined, then we're going to define every possible + * switch that will turn off certain features of prism. + */ +#ifdef PRISM_BUILD_MINIMAL + /** Exclude the serialization API. */ + #define PRISM_EXCLUDE_SERIALIZATION + + /** Exclude the JSON serialization API. */ + #define PRISM_EXCLUDE_JSON + + /** Exclude the prettyprint API. */ + #define PRISM_EXCLUDE_PRETTYPRINT + + /** Exclude the full set of encodings, using the minimal only. */ + #define PRISM_ENCODING_EXCLUDE_FULL +#endif + +#endif diff --git a/include/prism/prettyprint.h b/include/prism/prettyprint.h index 55da182f53..65c4302644 100644 --- a/include/prism/prettyprint.h +++ b/include/prism/prettyprint.h @@ -6,16 +6,14 @@ #ifndef PRISM_PRETTYPRINT_H #define PRISM_PRETTYPRINT_H -#include "prism/defines.h" +#include "prism/excludes.h" #ifdef PRISM_EXCLUDE_PRETTYPRINT -void pm_prettyprint(void); +#define pm_prettyprint(output_buffer_, parser_, node_) #else -#include - #include "prism/ast.h" #include "prism/buffer.h" #include "prism/parser.h" diff --git a/prism.gemspec b/prism.gemspec index a18de24742..df4dd9dd22 100644 --- a/prism.gemspec +++ b/prism.gemspec @@ -62,6 +62,7 @@ Gem::Specification.new do |spec| "include/prism/defines.h", "include/prism/diagnostic.h", "include/prism/encoding.h", + "include/prism/excludes.h", "include/prism/files.h", "include/prism/integer.h", "include/prism/line_offset_list.h", From 4090539e4a582baec27a705ba301bdcf584f1e86 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 17 Mar 2026 12:56:18 -0400 Subject: [PATCH 030/100] Trim down parser.h --- include/prism/parser.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/include/prism/parser.h b/include/prism/parser.h index ae3961899e..bb358bb9a8 100644 --- a/include/prism/parser.h +++ b/include/prism/parser.h @@ -6,19 +6,17 @@ #ifndef PRISM_PARSER_H #define PRISM_PARSER_H -#include "prism/defines.h" - #include "prism/arena.h" #include "prism/ast.h" -#include "prism/constant_pool.h" #include "prism/encoding.h" #include "prism/line_offset_list.h" #include "prism/list.h" #include "prism/options.h" #include "prism/static_literals.h" -#include "prism/strings.h" #include +#include +#include /** * This enum provides various bits that represent different kinds of states that From 76904582b19aa5c7d40b7a8b7335b0759a6ea07c Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 17 Mar 2026 12:59:42 -0400 Subject: [PATCH 031/100] Trim down node.h --- include/prism/node.h | 6 +++--- src/prism.c | 1 + templates/src/node.c.erb | 2 ++ templates/src/serialize.c.erb | 1 + 4 files changed, 7 insertions(+), 3 deletions(-) diff --git a/include/prism/node.h b/include/prism/node.h index 560936dc08..e59baedbd0 100644 --- a/include/prism/node.h +++ b/include/prism/node.h @@ -6,9 +6,9 @@ #ifndef PRISM_NODE_H #define PRISM_NODE_H -#include "prism/defines.h" -#include "prism/internal/buffer.h" -#include "prism/parser.h" +#include "prism/attribute/exported.h" +#include "prism/arena.h" +#include "prism/ast.h" /** * Loop through each node in the node list, writing each node to the given diff --git a/src/prism.c b/src/prism.c index b4bf6b8a14..31c8881fa0 100644 --- a/src/prism.c +++ b/src/prism.c @@ -3,6 +3,7 @@ #include "prism/internal/accel.h" #include "prism/internal/arena.h" #include "prism/internal/bit.h" +#include "prism/internal/buffer.h" #include "prism/internal/char.h" #include "prism/internal/constant_pool.h" #include "prism/internal/diagnostic.h" diff --git a/templates/src/node.c.erb b/templates/src/node.c.erb index 4e1b1cdaae..35ef8d9be5 100644 --- a/templates/src/node.c.erb +++ b/templates/src/node.c.erb @@ -1,8 +1,10 @@ #line <%= __LINE__ + 1 %> "prism/templates/src/<%= File.basename(__FILE__) %>" #include "prism/node.h" +#include "prism/internal/buffer.h" #include "prism/internal/constant_pool.h" #include "prism/internal/integer.h" +#include "prism/parser.h" #include diff --git a/templates/src/serialize.c.erb b/templates/src/serialize.c.erb index e1c6709212..39eb2b521a 100644 --- a/templates/src/serialize.c.erb +++ b/templates/src/serialize.c.erb @@ -1,6 +1,7 @@ #include "prism.h" #include "prism/attribute/inline.h" +#include "prism/internal/buffer.h" #include "prism/internal/encoding.h" #include "prism/internal/list.h" #include "prism/internal/options.h" From 8ad880255d47a3f43f4cb95dc0189a285d7ab8fa Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 17 Mar 2026 13:33:59 -0400 Subject: [PATCH 032/100] More splitting of headers --- include/prism/{internal => }/accel.h | 2 +- include/prism/attribute/fallthrough.h | 25 +++++++++++++++++++++++++ include/prism/defines.h | 18 +++--------------- include/prism/internal/isinf.h | 19 +++++++++++++++++++ include/prism/parser.h | 1 + prism.gemspec | 4 +++- src/prism.c | 2 +- src/regexp.c | 4 ++++ src/static_literals.c | 5 +++++ src/strpbrk.c | 2 +- templates/include/prism/ast.h.erb | 3 +-- templates/src/diagnostic.c.erb | 5 +++++ templates/src/node.c.erb | 2 ++ templates/src/prettyprint.c.erb | 2 ++ templates/src/token_type.c.erb | 4 ++-- 15 files changed, 75 insertions(+), 23 deletions(-) rename include/prism/{internal => }/accel.h (96%) create mode 100644 include/prism/attribute/fallthrough.h create mode 100644 include/prism/internal/isinf.h diff --git a/include/prism/internal/accel.h b/include/prism/accel.h similarity index 96% rename from include/prism/internal/accel.h rename to include/prism/accel.h index 971c9b2473..c8fc1433c6 100644 --- a/include/prism/internal/accel.h +++ b/include/prism/accel.h @@ -1,5 +1,5 @@ /** - * @file internal/accel.h + * @file accel.h * * Platform detection for acceleration implementations. */ diff --git a/include/prism/attribute/fallthrough.h b/include/prism/attribute/fallthrough.h new file mode 100644 index 0000000000..aefe3e48d7 --- /dev/null +++ b/include/prism/attribute/fallthrough.h @@ -0,0 +1,25 @@ +/** + * @file attribute/fallthrough.h + * + * Define the PRISM_FALLTHROUGH macro, which is used to annotate intentional + * fallthroughs in switch statements. + */ +#ifndef PRISM_FALLTHROUGH_H +#define PRISM_FALLTHROUGH_H + +/** + * We use -Wimplicit-fallthrough to guard potentially unintended fall-through + * between cases of a switch. Use PRISM_FALLTHROUGH to explicitly annotate cases + * where the fallthrough is intentional. + */ +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L /* C23 or later */ + #define PRISM_FALLTHROUGH [[fallthrough]]; +#elif defined(__GNUC__) || defined(__clang__) + #define PRISM_FALLTHROUGH __attribute__((fallthrough)); +#elif defined(_MSC_VER) + #define PRISM_FALLTHROUGH __fallthrough; +#else + #define PRISM_FALLTHROUGH +#endif + +#endif diff --git a/include/prism/defines.h b/include/prism/defines.h index 2189c0cdc7..fb88df62b1 100644 --- a/include/prism/defines.h +++ b/include/prism/defines.h @@ -11,14 +11,16 @@ #include "prism/attribute/align.h" #include "prism/attribute/exported.h" +#include "prism/attribute/fallthrough.h" #include "prism/attribute/flex_array.h" #include "prism/attribute/format.h" #include "prism/attribute/inline.h" #include "prism/attribute/unused.h" -#include "prism/internal/accel.h" #include "prism/internal/bit.h" +#include "prism/internal/isinf.h" +#include "prism/accel.h" #include "prism/allocator.h" #include "prism/excludes.h" #include "prism/files.h" @@ -98,18 +100,4 @@ #define PRISM_UNLIKELY(x) (x) #endif -/** - * We use -Wimplicit-fallthrough to guard potentially unintended fall-through between cases of a switch. - * Use PRISM_FALLTHROUGH to explicitly annotate cases where the fallthrough is intentional. - */ -#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L // C23 or later - #define PRISM_FALLTHROUGH [[fallthrough]]; -#elif defined(__GNUC__) || defined(__clang__) - #define PRISM_FALLTHROUGH __attribute__((fallthrough)); -#elif defined(_MSC_VER) - #define PRISM_FALLTHROUGH __fallthrough; -#else - #define PRISM_FALLTHROUGH -#endif - #endif diff --git a/include/prism/internal/isinf.h b/include/prism/internal/isinf.h new file mode 100644 index 0000000000..569f4726e7 --- /dev/null +++ b/include/prism/internal/isinf.h @@ -0,0 +1,19 @@ +/** + * @file isinf.h + */ +#ifndef PRISM_ISINF_H +#define PRISM_ISINF_H + +/** + * isinf on POSIX systems accepts a float, a double, or a long double. But mingw + * didn't provide an isinf macro, only an isinf function that only accepts + * floats, so we need to use _finite instead. + */ +#ifdef __MINGW64__ + #include + #define PRISM_ISINF(x) (!_finite(x)) +#else + #define PRISM_ISINF(x) isinf(x) +#endif + +#endif diff --git a/include/prism/parser.h b/include/prism/parser.h index bb358bb9a8..9dc666661b 100644 --- a/include/prism/parser.h +++ b/include/prism/parser.h @@ -6,6 +6,7 @@ #ifndef PRISM_PARSER_H #define PRISM_PARSER_H +#include "prism/accel.h" #include "prism/arena.h" #include "prism/ast.h" #include "prism/encoding.h" diff --git a/prism.gemspec b/prism.gemspec index df4dd9dd22..fd216e13a7 100644 --- a/prism.gemspec +++ b/prism.gemspec @@ -48,11 +48,13 @@ Gem::Specification.new do |spec| "include/prism.h", "include/prism/attribute/align.h", "include/prism/attribute/exported.h", + "include/prism/attribute/fallthrough.h", "include/prism/attribute/flex_array.h", "include/prism/attribute/force_inline.h", "include/prism/attribute/format.h", "include/prism/attribute/inline.h", "include/prism/attribute/unused.h", + "include/prism/accel.h", "include/prism/allocator.h", "include/prism/arena.h", "include/prism/ast.h", @@ -75,7 +77,6 @@ Gem::Specification.new do |spec| "include/prism/static_literals.h", "include/prism/strings.h", "include/prism/version.h", - "include/prism/internal/accel.h", "include/prism/internal/arena.h", "include/prism/internal/bit.h", "include/prism/internal/buffer.h", @@ -84,6 +85,7 @@ Gem::Specification.new do |spec| "include/prism/internal/diagnostic.h", "include/prism/internal/encoding.h", "include/prism/internal/integer.h", + "include/prism/internal/isinf.h", "include/prism/internal/line_offset_list.h", "include/prism/internal/list.h", "include/prism/internal/memchr.h", diff --git a/src/prism.c b/src/prism.c index 31c8881fa0..eefd24e711 100644 --- a/src/prism.c +++ b/src/prism.c @@ -1,6 +1,5 @@ #include "prism.h" -#include "prism/internal/accel.h" #include "prism/internal/arena.h" #include "prism/internal/bit.h" #include "prism/internal/buffer.h" @@ -19,6 +18,7 @@ #include "prism/internal/strncasecmp.h" #include "prism/internal/strpbrk.h" +#include "prism/accel.h" #include "prism/node_new.h" /** diff --git a/src/regexp.c b/src/regexp.c index 7fd80ca696..0ed501092b 100644 --- a/src/regexp.c +++ b/src/regexp.c @@ -1,6 +1,7 @@ #include "prism/internal/regexp.h" #include "prism/attribute/inline.h" +#include "prism/attribute/fallthrough.h" #include "prism/internal/buffer.h" #include "prism/internal/char.h" #include "prism/internal/diagnostic.h" @@ -9,6 +10,9 @@ #include "prism/internal/strings.h" #include "prism/internal/strncasecmp.h" +#include +#include + /** The maximum depth of nested groups allowed in a regular expression. */ #define PM_REGEXP_PARSE_DEPTH_MAX 4096 diff --git a/src/static_literals.c b/src/static_literals.c index b59a0acd12..73e479f8d3 100644 --- a/src/static_literals.c +++ b/src/static_literals.c @@ -1,12 +1,17 @@ #include "prism/internal/static_literals.h" #include "prism/attribute/inline.h" +#include "prism/attribute/unused.h" #include "prism/internal/buffer.h" #include "prism/internal/integer.h" +#include "prism/internal/isinf.h" #include "prism/internal/strings.h" #include "prism/allocator.h" +#include +#include #include +#include /** * A small struct used for passing around a subset of the information that is diff --git a/src/strpbrk.c b/src/strpbrk.c index d66c0b5fd1..59330fc172 100644 --- a/src/strpbrk.c +++ b/src/strpbrk.c @@ -2,10 +2,10 @@ #include "prism/attribute/inline.h" #include "prism/attribute/unused.h" -#include "prism/internal/accel.h" #include "prism/internal/bit.h" #include "prism/internal/diagnostic.h" #include "prism/internal/encoding.h" +#include "prism/accel.h" #include #include diff --git a/templates/include/prism/ast.h.erb b/templates/include/prism/ast.h.erb index 783eaca2dd..af3403c293 100644 --- a/templates/include/prism/ast.h.erb +++ b/templates/include/prism/ast.h.erb @@ -8,12 +8,11 @@ #ifndef PRISM_AST_H #define PRISM_AST_H -#include "prism/defines.h" +#include "prism/attribute/align.h" #include "prism/constant_pool.h" #include "prism/integer.h" #include "prism/strings.h" -#include #include #include diff --git a/templates/src/diagnostic.c.erb b/templates/src/diagnostic.c.erb index 7eab88201f..8a4abc14eb 100644 --- a/templates/src/diagnostic.c.erb +++ b/templates/src/diagnostic.c.erb @@ -4,6 +4,11 @@ #include "prism/internal/arena.h" #include "prism/internal/list.h" +#include +#include +#include +#include + #define PM_DIAGNOSTIC_ID_MAX <%= errors.length + warnings.length %> /** This struct holds the data for each diagnostic. */ diff --git a/templates/src/node.c.erb b/templates/src/node.c.erb index 35ef8d9be5..c0016647a8 100644 --- a/templates/src/node.c.erb +++ b/templates/src/node.c.erb @@ -6,7 +6,9 @@ #include "prism/internal/integer.h" #include "prism/parser.h" +#include #include +#include /** * Attempts to grow the node list to the next size. If there is already diff --git a/templates/src/prettyprint.c.erb b/templates/src/prettyprint.c.erb index 39fdcb6269..db2d52ddcf 100644 --- a/templates/src/prettyprint.c.erb +++ b/templates/src/prettyprint.c.erb @@ -6,6 +6,8 @@ #include "prism/internal/constant_pool.h" #include "prism/internal/integer.h" +#include + // We optionally support pretty printing nodes. For systems that don't want or // need this functionality, it can be turned off with the // PRISM_EXCLUDE_PRETTYPRINT define. diff --git a/templates/src/token_type.c.erb b/templates/src/token_type.c.erb index c08a0e7921..cdf4e73658 100644 --- a/templates/src/token_type.c.erb +++ b/templates/src/token_type.c.erb @@ -1,7 +1,7 @@ -#include - #include "prism/ast.h" +#include + /** * Returns a string representation of the given token type. */ From 400b21739347720655b8b9b3eacc4f1b0c27b3e1 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 17 Mar 2026 13:41:06 -0400 Subject: [PATCH 033/100] Remove defines --- docs/build_system.md | 2 +- include/prism.h | 2 - include/prism/defines.h | 103 ----------------------------- prism.gemspec | 1 - rust/ruby-prism-sys/build/main.rs | 1 - src/prism.c | 52 +++++++++++++++ templates/ext/prism/api_node.c.erb | 1 + 7 files changed, 54 insertions(+), 108 deletions(-) delete mode 100644 include/prism/defines.h diff --git a/docs/build_system.md b/docs/build_system.md index 096edb8ad0..2e2749e62c 100644 --- a/docs/build_system.md +++ b/docs/build_system.md @@ -87,7 +87,7 @@ If you need to use memory allocation functions implemented outside of the standa * Additionally, include `-I [path/to/custom_allocator]` where your `prism_xallocator.h` is located * Link the implementation of `prism_xallocator.c` that contains functions declared in `prism_xallocator.h` -For further clarity, refer to `include/prism/defines.h`. +For further clarity, refer to `include/prism/allocator.h`. ### Building prism from source as a C library diff --git a/include/prism.h b/include/prism.h index b7ef0d126c..55ce7e4745 100644 --- a/include/prism.h +++ b/include/prism.h @@ -10,8 +10,6 @@ extern "C" { #endif -#include "prism/defines.h" - #include "prism/arena.h" #include "prism/ast.h" #include "prism/diagnostic.h" diff --git a/include/prism/defines.h b/include/prism/defines.h deleted file mode 100644 index fb88df62b1..0000000000 --- a/include/prism/defines.h +++ /dev/null @@ -1,103 +0,0 @@ -/** - * @file defines.h - * - * Macro definitions used throughout the prism library. - * - * This file should be included first by any *.h or *.c in prism for consistency - * and to ensure that the macros are defined before they are used. - */ -#ifndef PRISM_DEFINES_H -#define PRISM_DEFINES_H - -#include "prism/attribute/align.h" -#include "prism/attribute/exported.h" -#include "prism/attribute/fallthrough.h" -#include "prism/attribute/flex_array.h" -#include "prism/attribute/format.h" -#include "prism/attribute/inline.h" -#include "prism/attribute/unused.h" - -#include "prism/internal/bit.h" -#include "prism/internal/isinf.h" - -#include "prism/accel.h" -#include "prism/allocator.h" -#include "prism/excludes.h" -#include "prism/files.h" - -#include -#include -#include -#include -#include -#include -#include -#include - -/** - * We want to be able to use the PRI* macros for printing out integers, but on - * some platforms they aren't included unless this is already defined. - */ -#define __STDC_FORMAT_MACROS -// Include sys/types.h before inttypes.h to work around issue with -// certain versions of GCC and newlib which causes omission of PRIx64 -#include -#include - -/** - * When we are parsing using recursive descent, we want to protect against - * malicious payloads that could attempt to crash our parser. We do this by - * specifying a maximum depth to which we are allowed to recurse. - */ -#ifndef PRISM_DEPTH_MAXIMUM - #define PRISM_DEPTH_MAXIMUM 10000 -#endif - -/** - * A simple utility macro to concatenate two tokens together, necessary when one - * of the tokens is itself a macro. - */ -#define PM_CONCATENATE(left, right) left ## right - -/** - * We want to be able to use static assertions, but they weren't standardized - * until C11. As such, we polyfill it here by making a hacky typedef that will - * fail to compile due to a negative array size if the condition is false. - */ -#if defined(_Static_assert) -# define PM_STATIC_ASSERT(line, condition, message) _Static_assert(condition, message) -#else -# define PM_STATIC_ASSERT(line, condition, message) typedef char PM_CONCATENATE(static_assert_, line)[(condition) ? 1 : -1] -#endif - -/** - * isinf on POSIX systems it accepts a float, a double, or a long double. - * But mingw didn't provide an isinf macro, only an isinf function that only - * accepts floats, so we need to use _finite instead. - */ -#ifdef __MINGW64__ - #include - #define PRISM_ISINF(x) (!_finite(x)) -#else - #define PRISM_ISINF(x) isinf(x) -#endif - -/** - * Support PRISM_LIKELY and PRISM_UNLIKELY to help the compiler optimize its - * branch predication. - */ -#if defined(__GNUC__) || defined(__clang__) - /** The compiler should predicate that this branch will be taken. */ - #define PRISM_LIKELY(x) __builtin_expect(!!(x), 1) - - /** The compiler should predicate that this branch will not be taken. */ - #define PRISM_UNLIKELY(x) __builtin_expect(!!(x), 0) -#else - /** Void because this platform does not support branch prediction hints. */ - #define PRISM_LIKELY(x) (x) - - /** Void because this platform does not support branch prediction hints. */ - #define PRISM_UNLIKELY(x) (x) -#endif - -#endif diff --git a/prism.gemspec b/prism.gemspec index fd216e13a7..e2dc185e81 100644 --- a/prism.gemspec +++ b/prism.gemspec @@ -61,7 +61,6 @@ Gem::Specification.new do |spec| "include/prism/buffer.h", "include/prism/constant_pool.h", "include/prism/debug_allocator.h", - "include/prism/defines.h", "include/prism/diagnostic.h", "include/prism/encoding.h", "include/prism/excludes.h", diff --git a/rust/ruby-prism-sys/build/main.rs b/rust/ruby-prism-sys/build/main.rs index 0de2b3caba..18441ed623 100644 --- a/rust/ruby-prism-sys/build/main.rs +++ b/rust/ruby-prism-sys/build/main.rs @@ -115,7 +115,6 @@ fn generate_bindings(ruby_include_path: &Path) -> bindgen::Bindings { .derive_default(true) .generate_block(true) .generate_comments(true) - .header(ruby_include_path.join("prism/defines.h").to_str().unwrap()) .header(ruby_include_path.join("prism.h").to_str().unwrap()) .clang_arg(format!("-I{}", ruby_include_path.to_str().unwrap())) .clang_arg("-fparse-all-comments") diff --git a/src/prism.c b/src/prism.c index eefd24e711..0e96ee7cff 100644 --- a/src/prism.c +++ b/src/prism.c @@ -1,5 +1,8 @@ #include "prism.h" +#include "prism/attribute/fallthrough.h" +#include "prism/attribute/unused.h" + #include "prism/internal/arena.h" #include "prism/internal/bit.h" #include "prism/internal/buffer.h" @@ -8,6 +11,7 @@ #include "prism/internal/diagnostic.h" #include "prism/internal/encoding.h" #include "prism/internal/integer.h" +#include "prism/internal/isinf.h" #include "prism/internal/line_offset_list.h" #include "prism/internal/list.h" #include "prism/internal/memchr.h" @@ -17,10 +21,58 @@ #include "prism/internal/strings.h" #include "prism/internal/strncasecmp.h" #include "prism/internal/strpbrk.h" +#include "prism/allocator.h" #include "prism/accel.h" #include "prism/node_new.h" +#include +#include + +/** + * When we are parsing using recursive descent, we want to protect against + * malicious payloads that could attempt to crash our parser. We do this by + * specifying a maximum depth to which we are allowed to recurse. + */ +#ifndef PRISM_DEPTH_MAXIMUM + #define PRISM_DEPTH_MAXIMUM 10000 +#endif + +/** + * A simple utility macro to concatenate two tokens together, necessary when one + * of the tokens is itself a macro. + */ +#define PM_CONCATENATE(left, right) left ## right + +/** + * We want to be able to use static assertions, but they weren't standardized + * until C11. As such, we polyfill it here by making a hacky typedef that will + * fail to compile due to a negative array size if the condition is false. + */ +#if defined(_Static_assert) +# define PM_STATIC_ASSERT(line, condition, message) _Static_assert(condition, message) +#else +# define PM_STATIC_ASSERT(line, condition, message) typedef char PM_CONCATENATE(static_assert_, line)[(condition) ? 1 : -1] +#endif + +/** + * Support PRISM_LIKELY and PRISM_UNLIKELY to help the compiler optimize its + * branch predication. + */ +#if defined(__GNUC__) || defined(__clang__) + /** The compiler should predicate that this branch will be taken. */ + #define PRISM_LIKELY(x) __builtin_expect(!!(x), 1) + + /** The compiler should predicate that this branch will not be taken. */ + #define PRISM_UNLIKELY(x) __builtin_expect(!!(x), 0) +#else + /** Void because this platform does not support branch prediction hints. */ + #define PRISM_LIKELY(x) (x) + + /** Void because this platform does not support branch prediction hints. */ + #define PRISM_UNLIKELY(x) (x) +#endif + /** * The prism version and the serialization format. */ diff --git a/templates/ext/prism/api_node.c.erb b/templates/ext/prism/api_node.c.erb index e3bcf116cc..ea9754bdfd 100644 --- a/templates/ext/prism/api_node.c.erb +++ b/templates/ext/prism/api_node.c.erb @@ -1,5 +1,6 @@ #line <%= __LINE__ + 1 %> "prism/templates/ext/prism/<%= File.basename(__FILE__) %>" #include "prism/extension.h" +#include "prism/allocator.h" extern VALUE rb_cPrism; extern VALUE rb_cPrismNode; From 01d575aa2fc5faec7f567265654c9bf1295b69a4 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 17 Mar 2026 13:53:30 -0400 Subject: [PATCH 034/100] Move compiler detection stuff into include/prism/compiler --- include/prism/accel.h | 21 ------------- include/prism/arena.h | 6 ++-- include/prism/attribute/format.h | 28 ----------------- include/prism/attribute/inline.h | 31 ------------------- include/prism/buffer.h | 2 +- include/prism/compiler/accel.h | 19 ++++++++++++ include/prism/{attribute => compiler}/align.h | 8 ++--- .../prism/{attribute => compiler}/exported.h | 8 ++--- .../{attribute => compiler}/fallthrough.h | 9 ++---- .../{attribute => compiler}/flex_array.h | 10 +++--- .../{attribute => compiler}/force_inline.h | 12 +++---- include/prism/compiler/format.h | 25 +++++++++++++++ include/prism/compiler/inline.h | 17 ++++++++++ .../prism/{attribute => compiler}/unused.h | 9 ++---- include/prism/internal/arena.h | 4 +-- include/prism/internal/bit.h | 2 +- include/prism/internal/buffer.h | 2 +- include/prism/internal/char.h | 2 +- include/prism/internal/line_offset_list.h | 2 +- include/prism/line_offset_list.h | 2 +- include/prism/node.h | 2 +- include/prism/parser.h | 2 +- include/prism/strings.h | 2 +- prism.gemspec | 18 +++++------ src/buffer.c | 2 +- src/char.c | 2 +- src/constant_pool.c | 4 +-- src/encoding.c | 2 +- src/line_offset_list.c | 2 +- src/options.c | 2 +- src/prism.c | 6 ++-- src/regexp.c | 4 +-- src/static_literals.c | 4 +-- src/strncasecmp.c | 2 +- src/strpbrk.c | 7 +++-- templates/include/prism/ast.h.erb | 2 +- templates/include/prism/diagnostic.h.erb | 2 +- templates/include/prism/node_new.h.erb | 2 +- templates/src/diagnostic.c.erb | 2 +- templates/src/prettyprint.c.erb | 2 +- templates/src/serialize.c.erb | 2 +- 41 files changed, 130 insertions(+), 162 deletions(-) delete mode 100644 include/prism/accel.h delete mode 100644 include/prism/attribute/format.h delete mode 100644 include/prism/attribute/inline.h create mode 100644 include/prism/compiler/accel.h rename include/prism/{attribute => compiler}/align.h (89%) rename include/prism/{attribute => compiler}/exported.h (79%) rename include/prism/{attribute => compiler}/fallthrough.h (73%) rename include/prism/{attribute => compiler}/flex_array.h (62%) rename include/prism/{attribute => compiler}/force_inline.h (55%) create mode 100644 include/prism/compiler/format.h create mode 100644 include/prism/compiler/inline.h rename include/prism/{attribute => compiler}/unused.h (62%) diff --git a/include/prism/accel.h b/include/prism/accel.h deleted file mode 100644 index c8fc1433c6..0000000000 --- a/include/prism/accel.h +++ /dev/null @@ -1,21 +0,0 @@ -/** - * @file accel.h - * - * Platform detection for acceleration implementations. - */ -#ifndef PRISM_INTERNAL_ACCEL_H -#define PRISM_INTERNAL_ACCEL_H - -/** - * Platform detection for SIMD / fast-path implementations. At most one of - * these macros is defined, selecting the best available vectorization strategy. - */ -#if (defined(__aarch64__) && defined(__ARM_NEON)) || (defined(_MSC_VER) && defined(_M_ARM64)) - #define PRISM_HAS_NEON -#elif (defined(__x86_64__) && defined(__SSSE3__)) || (defined(_MSC_VER) && defined(_M_X64)) - #define PRISM_HAS_SSSE3 -#elif defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - #define PRISM_HAS_SWAR -#endif - -#endif diff --git a/include/prism/arena.h b/include/prism/arena.h index dd8ee09ceb..73f2fab900 100644 --- a/include/prism/arena.h +++ b/include/prism/arena.h @@ -6,9 +6,9 @@ #ifndef PRISM_ARENA_H #define PRISM_ARENA_H -#include "prism/attribute/exported.h" -#include "prism/attribute/flex_array.h" -#include "prism/attribute/force_inline.h" +#include "prism/compiler/exported.h" +#include "prism/compiler/flex_array.h" +#include "prism/compiler/force_inline.h" #include diff --git a/include/prism/attribute/format.h b/include/prism/attribute/format.h deleted file mode 100644 index 4ad99fe125..0000000000 --- a/include/prism/attribute/format.h +++ /dev/null @@ -1,28 +0,0 @@ -/** - * @file attribute/format.h - * - * Macro definition for specifying that a function accepts variadic parameters - * that look like printf format strings. - */ -#ifndef PRISM_FORMAT_H -#define PRISM_FORMAT_H - -/** - * Certain compilers support specifying that a function accepts variadic - * parameters that look like printf format strings to provide a better developer - * experience when someone is using the function. This macro does that in a - * compiler-agnostic way. - */ -#if defined(__GNUC__) -# if defined(__MINGW_PRINTF_FORMAT) -# define PRISM_ATTRIBUTE_FORMAT(string_index_, argument_index_) __attribute__((format(__MINGW_PRINTF_FORMAT, string_index_, argument_index_))) -# else -# define PRISM_ATTRIBUTE_FORMAT(string_index_, argument_index_) __attribute__((format(printf, string_index_, argument_index_))) -# endif -#elif defined(__clang__) -# define PRISM_ATTRIBUTE_FORMAT(string_index_, argument_index_) __attribute__((__format__(__printf__, string_index_, argument_index_))) -#else -# define PRISM_ATTRIBUTE_FORMAT(string_index_, argument_index_) -#endif - -#endif diff --git a/include/prism/attribute/inline.h b/include/prism/attribute/inline.h deleted file mode 100644 index 79a5dfcbee..0000000000 --- a/include/prism/attribute/inline.h +++ /dev/null @@ -1,31 +0,0 @@ -/** - * @file attribute/inline.h - * - * Macro definitions for forcing a function to be inlined at every call site. - */ -#ifndef PRISM_INLINE_H -#define PRISM_INLINE_H - -/** - * Old Visual Studio versions do not support the inline keyword, so we need to - * define it to be __inline. - */ -#if defined(_MSC_VER) && !defined(inline) -# define PRISM_INLINE __inline -#else -# define PRISM_INLINE inline -#endif - -/** - * Force a function to be inlined at every call site. Use sparingly — only for - * small, hot functions where the compiler's heuristics fail to inline. - */ -#if defined(_MSC_VER) -# define PRISM_FORCE_INLINE __forceinline -#elif defined(__GNUC__) || defined(__clang__) -# define PRISM_FORCE_INLINE inline __attribute__((always_inline)) -#else -# define PRISM_FORCE_INLINE PRISM_INLINE -#endif - -#endif diff --git a/include/prism/buffer.h b/include/prism/buffer.h index 90f7c9198e..41b0fd8dc0 100644 --- a/include/prism/buffer.h +++ b/include/prism/buffer.h @@ -6,7 +6,7 @@ #ifndef PRISM_BUFFER_H #define PRISM_BUFFER_H -#include "prism/attribute/exported.h" +#include "prism/compiler/exported.h" #include #include diff --git a/include/prism/compiler/accel.h b/include/prism/compiler/accel.h new file mode 100644 index 0000000000..be23236d1d --- /dev/null +++ b/include/prism/compiler/accel.h @@ -0,0 +1,19 @@ +/** + * @file compiler/accel.h + */ +#ifndef PRISM_COMPILER_ACCEL_H +#define PRISM_COMPILER_ACCEL_H + +/** + * Platform detection for SIMD/fast-path implementations. At most one of these + * macros is defined, selecting the best available vectorization strategy. + */ +#if (defined(__aarch64__) && defined(__ARM_NEON)) || (defined(_MSC_VER) && defined(_M_ARM64)) +# define PRISM_HAS_NEON +#elif (defined(__x86_64__) && defined(__SSSE3__)) || (defined(_MSC_VER) && defined(_M_X64)) +# define PRISM_HAS_SSSE3 +#elif defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +# define PRISM_HAS_SWAR +#endif + +#endif diff --git a/include/prism/attribute/align.h b/include/prism/compiler/align.h similarity index 89% rename from include/prism/attribute/align.h rename to include/prism/compiler/align.h index 9a21b8d6ec..22cb49a48c 100644 --- a/include/prism/attribute/align.h +++ b/include/prism/compiler/align.h @@ -1,10 +1,8 @@ /** - * @file attribute/align.h - * - * Alignment macros used throughout the prism library. + * @file compiler/align.h */ -#ifndef PRISM_ALIGN_H -#define PRISM_ALIGN_H +#ifndef PRISM_COMPILER_ALIGN_H +#define PRISM_COMPILER_ALIGN_H /** * Compiler-agnostic macros for specifying alignment of types and variables. diff --git a/include/prism/attribute/exported.h b/include/prism/compiler/exported.h similarity index 79% rename from include/prism/attribute/exported.h rename to include/prism/compiler/exported.h index 8cb24848d1..823773ecbb 100644 --- a/include/prism/attribute/exported.h +++ b/include/prism/compiler/exported.h @@ -1,10 +1,8 @@ /** - * @file attribute/exported.h - * - * Macro definitions for make functions publically visible. + * @file compiler/exported.h */ -#ifndef PRISM_EXPORTED_H -#define PRISM_EXPORTED_H +#ifndef PRISM_COMPILER_EXPORTED_H +#define PRISM_COMPILER_EXPORTED_H /** * By default, we compile with -fvisibility=hidden. When this is enabled, we diff --git a/include/prism/attribute/fallthrough.h b/include/prism/compiler/fallthrough.h similarity index 73% rename from include/prism/attribute/fallthrough.h rename to include/prism/compiler/fallthrough.h index aefe3e48d7..ce1b450e8a 100644 --- a/include/prism/attribute/fallthrough.h +++ b/include/prism/compiler/fallthrough.h @@ -1,11 +1,8 @@ /** - * @file attribute/fallthrough.h - * - * Define the PRISM_FALLTHROUGH macro, which is used to annotate intentional - * fallthroughs in switch statements. + * @file compiler/fallthrough.h */ -#ifndef PRISM_FALLTHROUGH_H -#define PRISM_FALLTHROUGH_H +#ifndef PRISM_COMPILER_FALLTHROUGH_H +#define PRISM_COMPILER_FALLTHROUGH_H /** * We use -Wimplicit-fallthrough to guard potentially unintended fall-through diff --git a/include/prism/attribute/flex_array.h b/include/prism/compiler/flex_array.h similarity index 62% rename from include/prism/attribute/flex_array.h rename to include/prism/compiler/flex_array.h index 8daefba32b..7504b5fdd3 100644 --- a/include/prism/attribute/flex_array.h +++ b/include/prism/compiler/flex_array.h @@ -1,13 +1,11 @@ /** - * @file attribute/flex_array.h - * - * Macro definitions for working with flexible array members. + * @file compiler/flex_array.h */ -#ifndef PRISM_FLEX_ARRAY_H -#define PRISM_FLEX_ARRAY_H +#ifndef PRISM_COMPILER_FLEX_ARRAY_H +#define PRISM_COMPILER_FLEX_ARRAY_H /** - * A macro for defining a flexible array member. C99 supports `data[]`, GCC + * A macro for helper define a flexible array member. C99 supports `data[]`, GCC * supports `data[0]` as an extension, and older compilers require `data[1]`. */ #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) diff --git a/include/prism/attribute/force_inline.h b/include/prism/compiler/force_inline.h similarity index 55% rename from include/prism/attribute/force_inline.h rename to include/prism/compiler/force_inline.h index 7f1e8c57f8..e189d592d6 100644 --- a/include/prism/attribute/force_inline.h +++ b/include/prism/compiler/force_inline.h @@ -1,12 +1,10 @@ /** - * @file attribute/force_inline.h - * - * Macro definitions for forcing a function to be inlined at every call site. + * @file compiler/force_inline.h */ -#ifndef PRISM_FORCE_INLINE_H -#define PRISM_FORCE_INLINE_H +#ifndef PRISM_COMPILER_FORCE_INLINE_H +#define PRISM_COMPILER_FORCE_INLINE_H -#include "prism/attribute/inline.h" +#include "prism/compiler/inline.h" /** * Force a function to be inlined at every call site. Use sparingly — only for @@ -15,7 +13,7 @@ #if defined(_MSC_VER) # define PRISM_FORCE_INLINE __forceinline #elif defined(__GNUC__) || defined(__clang__) -# define PRISM_FORCE_INLINE inline __attribute__((always_inline)) +# define PRISM_FORCE_INLINE PRISM_INLINE __attribute__((always_inline)) #else # define PRISM_FORCE_INLINE PRISM_INLINE #endif diff --git a/include/prism/compiler/format.h b/include/prism/compiler/format.h new file mode 100644 index 0000000000..32f4c3c6d7 --- /dev/null +++ b/include/prism/compiler/format.h @@ -0,0 +1,25 @@ +/** + * @file compiler/format.h + */ +#ifndef PRISM_COMPILER_FORMAT_H +#define PRISM_COMPILER_FORMAT_H + +/** + * Certain compilers support specifying that a function accepts variadic + * parameters that look like printf format strings to provide a better developer + * experience when someone is using the function. This macro does that in a + * compiler-agnostic way. + */ +#if defined(__GNUC__) +# if defined(__MINGW_PRINTF_FORMAT) +# define PRISM_ATTRIBUTE_FORMAT(fmt_idx_, arg_idx_) __attribute__((format(__MINGW_PRINTF_FORMAT, fmt_idx_, arg_idx_))) +# else +# define PRISM_ATTRIBUTE_FORMAT(fmt_idx_, arg_idx_) __attribute__((format(printf, fmt_idx_, arg_idx_))) +# endif +#elif defined(__clang__) +# define PRISM_ATTRIBUTE_FORMAT(fmt_idx_, arg_idx_) __attribute__((__format__(__printf__, fmt_idx_, arg_idx_))) +#else +# define PRISM_ATTRIBUTE_FORMAT(fmt_idx_, arg_idx_) +#endif + +#endif diff --git a/include/prism/compiler/inline.h b/include/prism/compiler/inline.h new file mode 100644 index 0000000000..856a375691 --- /dev/null +++ b/include/prism/compiler/inline.h @@ -0,0 +1,17 @@ +/** + * @file compiler/inline.h + */ +#ifndef PRISM_COMPILER_INLINE_H +#define PRISM_COMPILER_INLINE_H + +/** + * Old Visual Studio versions do not support the inline keyword, so we need to + * define it to be __inline. + */ +#if defined(_MSC_VER) && !defined(inline) +# define PRISM_INLINE __inline +#else +# define PRISM_INLINE inline +#endif + +#endif diff --git a/include/prism/attribute/unused.h b/include/prism/compiler/unused.h similarity index 62% rename from include/prism/attribute/unused.h rename to include/prism/compiler/unused.h index 37a7b00f40..fced007f9b 100644 --- a/include/prism/attribute/unused.h +++ b/include/prism/compiler/unused.h @@ -1,11 +1,8 @@ /** - * @file attribute/unused.h - * - * Macro definitions for marking functions and parameters as unused to suppress - * compiler warnings. + * @file compiler/unused.h */ -#ifndef PRISM_ATTRIBUTE_UNUSED_H -#define PRISM_ATTRIBUTE_UNUSED_H +#ifndef PRISM_COMPILER_UNUSED_H +#define PRISM_COMPILER_UNUSED_H /** * GCC will warn if you specify a function or parameter that is unused at diff --git a/include/prism/internal/arena.h b/include/prism/internal/arena.h index 8d263d9e6e..8a2dd2a2b9 100644 --- a/include/prism/internal/arena.h +++ b/include/prism/internal/arena.h @@ -6,8 +6,8 @@ #ifndef PRISM_INTERNAL_ARENA_H #define PRISM_INTERNAL_ARENA_H -#include "prism/attribute/exported.h" -#include "prism/attribute/inline.h" +#include "prism/compiler/exported.h" +#include "prism/compiler/inline.h" #include "prism/arena.h" #include diff --git a/include/prism/internal/bit.h b/include/prism/internal/bit.h index 06ac3be2ee..b4249825a3 100644 --- a/include/prism/internal/bit.h +++ b/include/prism/internal/bit.h @@ -6,7 +6,7 @@ #ifndef PRISM_INTERNAL_BIT_H #define PRISM_INTERNAL_BIT_H -#include "prism/attribute/inline.h" +#include "prism/compiler/inline.h" /** * Count trailing zero bits in a 64-bit value. Used by SWAR identifier scanning diff --git a/include/prism/internal/buffer.h b/include/prism/internal/buffer.h index b1d360c91e..8066bd9a97 100644 --- a/include/prism/internal/buffer.h +++ b/include/prism/internal/buffer.h @@ -7,7 +7,7 @@ #define PRISM_INTERNAL_BUFFER_H #include "prism/buffer.h" -#include "prism/attribute/format.h" +#include "prism/compiler/format.h" #include diff --git a/include/prism/internal/char.h b/include/prism/internal/char.h index b3975862dd..6abc998c95 100644 --- a/include/prism/internal/char.h +++ b/include/prism/internal/char.h @@ -6,7 +6,7 @@ #ifndef PRISM_INTERNAL_CHAR_H #define PRISM_INTERNAL_CHAR_H -#include "prism/attribute/force_inline.h" +#include "prism/compiler/force_inline.h" #include "prism/arena.h" #include "prism/line_offset_list.h" diff --git a/include/prism/internal/line_offset_list.h b/include/prism/internal/line_offset_list.h index 97fa050924..5c8ae4763c 100644 --- a/include/prism/internal/line_offset_list.h +++ b/include/prism/internal/line_offset_list.h @@ -16,7 +16,7 @@ #include "prism/line_offset_list.h" -#include "prism/attribute/force_inline.h" +#include "prism/compiler/force_inline.h" #include "prism/arena.h" /** diff --git a/include/prism/line_offset_list.h b/include/prism/line_offset_list.h index 0211b990fe..9d0ddf3889 100644 --- a/include/prism/line_offset_list.h +++ b/include/prism/line_offset_list.h @@ -14,7 +14,7 @@ #ifndef PRISM_LINE_OFFSET_LIST_H #define PRISM_LINE_OFFSET_LIST_H -#include "prism/attribute/exported.h" +#include "prism/compiler/exported.h" #include #include diff --git a/include/prism/node.h b/include/prism/node.h index e59baedbd0..939dbb0c82 100644 --- a/include/prism/node.h +++ b/include/prism/node.h @@ -6,7 +6,7 @@ #ifndef PRISM_NODE_H #define PRISM_NODE_H -#include "prism/attribute/exported.h" +#include "prism/compiler/exported.h" #include "prism/arena.h" #include "prism/ast.h" diff --git a/include/prism/parser.h b/include/prism/parser.h index 9dc666661b..76f9a493d1 100644 --- a/include/prism/parser.h +++ b/include/prism/parser.h @@ -6,7 +6,7 @@ #ifndef PRISM_PARSER_H #define PRISM_PARSER_H -#include "prism/accel.h" +#include "prism/compiler/accel.h" #include "prism/arena.h" #include "prism/ast.h" #include "prism/encoding.h" diff --git a/include/prism/strings.h b/include/prism/strings.h index da28b76940..e3da5c5a9a 100644 --- a/include/prism/strings.h +++ b/include/prism/strings.h @@ -6,7 +6,7 @@ #ifndef PRISM_STRINGS_H #define PRISM_STRINGS_H -#include "prism/attribute/exported.h" +#include "prism/compiler/exported.h" #include "prism/files.h" #include diff --git a/prism.gemspec b/prism.gemspec index e2dc185e81..d0f8ff3b45 100644 --- a/prism.gemspec +++ b/prism.gemspec @@ -46,15 +46,15 @@ Gem::Specification.new do |spec| "ext/prism/extension.c", "ext/prism/extension.h", "include/prism.h", - "include/prism/attribute/align.h", - "include/prism/attribute/exported.h", - "include/prism/attribute/fallthrough.h", - "include/prism/attribute/flex_array.h", - "include/prism/attribute/force_inline.h", - "include/prism/attribute/format.h", - "include/prism/attribute/inline.h", - "include/prism/attribute/unused.h", - "include/prism/accel.h", + "include/prism/compiler/accel.h", + "include/prism/compiler/align.h", + "include/prism/compiler/exported.h", + "include/prism/compiler/fallthrough.h", + "include/prism/compiler/flex_array.h", + "include/prism/compiler/force_inline.h", + "include/prism/compiler/format.h", + "include/prism/compiler/inline.h", + "include/prism/compiler/unused.h", "include/prism/allocator.h", "include/prism/arena.h", "include/prism/ast.h", diff --git a/src/buffer.c b/src/buffer.c index a54c55182b..c5f6b72ef3 100644 --- a/src/buffer.c +++ b/src/buffer.c @@ -1,6 +1,6 @@ #include "prism/internal/buffer.h" -#include "prism/attribute/inline.h" +#include "prism/compiler/inline.h" #include "prism/internal/char.h" #include "prism/allocator.h" diff --git a/src/char.c b/src/char.c index 1b5438cd0c..08e457aa1f 100644 --- a/src/char.c +++ b/src/char.c @@ -1,6 +1,6 @@ #include "prism/internal/char.h" -#include "prism/attribute/inline.h" +#include "prism/compiler/inline.h" #include "prism/internal/line_offset_list.h" #define PRISM_CHAR_BIT_REGEXP_OPTION (1 << 2) diff --git a/src/constant_pool.c b/src/constant_pool.c index 7de70a47a4..0baab71997 100644 --- a/src/constant_pool.c +++ b/src/constant_pool.c @@ -1,7 +1,7 @@ #include "prism/constant_pool.h" -#include "prism/attribute/align.h" -#include "prism/attribute/inline.h" +#include "prism/compiler/align.h" +#include "prism/compiler/inline.h" #include "prism/internal/arena.h" #include diff --git a/src/encoding.c b/src/encoding.c index f43d0fd079..0425a2c5b8 100644 --- a/src/encoding.c +++ b/src/encoding.c @@ -1,6 +1,6 @@ #include "prism/internal/encoding.h" -#include "prism/attribute/unused.h" +#include "prism/compiler/unused.h" #include "prism/internal/strncasecmp.h" #include diff --git a/src/line_offset_list.c b/src/line_offset_list.c index 17946a224a..ce217ebd3f 100644 --- a/src/line_offset_list.c +++ b/src/line_offset_list.c @@ -1,4 +1,4 @@ -#include "prism/attribute/align.h" +#include "prism/compiler/align.h" #include "prism/internal/line_offset_list.h" #include "prism/internal/arena.h" diff --git a/src/options.c b/src/options.c index e38bd92ca9..b089071ec6 100644 --- a/src/options.c +++ b/src/options.c @@ -1,6 +1,6 @@ #include "prism/options.h" -#include "prism/attribute/inline.h" +#include "prism/compiler/inline.h" #include "prism/internal/char.h" #include "prism/allocator.h" diff --git a/src/prism.c b/src/prism.c index 0e96ee7cff..5fed8de93b 100644 --- a/src/prism.c +++ b/src/prism.c @@ -1,7 +1,8 @@ #include "prism.h" -#include "prism/attribute/fallthrough.h" -#include "prism/attribute/unused.h" +#include "prism/compiler/accel.h" +#include "prism/compiler/fallthrough.h" +#include "prism/compiler/unused.h" #include "prism/internal/arena.h" #include "prism/internal/bit.h" @@ -23,7 +24,6 @@ #include "prism/internal/strpbrk.h" #include "prism/allocator.h" -#include "prism/accel.h" #include "prism/node_new.h" #include diff --git a/src/regexp.c b/src/regexp.c index 0ed501092b..566d65a7fd 100644 --- a/src/regexp.c +++ b/src/regexp.c @@ -1,7 +1,7 @@ #include "prism/internal/regexp.h" -#include "prism/attribute/inline.h" -#include "prism/attribute/fallthrough.h" +#include "prism/compiler/inline.h" +#include "prism/compiler/fallthrough.h" #include "prism/internal/buffer.h" #include "prism/internal/char.h" #include "prism/internal/diagnostic.h" diff --git a/src/static_literals.c b/src/static_literals.c index 73e479f8d3..7fc872fe25 100644 --- a/src/static_literals.c +++ b/src/static_literals.c @@ -1,7 +1,7 @@ #include "prism/internal/static_literals.h" -#include "prism/attribute/inline.h" -#include "prism/attribute/unused.h" +#include "prism/compiler/inline.h" +#include "prism/compiler/unused.h" #include "prism/internal/buffer.h" #include "prism/internal/integer.h" #include "prism/internal/isinf.h" diff --git a/src/strncasecmp.c b/src/strncasecmp.c index 80eb399d04..a373cad6d7 100644 --- a/src/strncasecmp.c +++ b/src/strncasecmp.c @@ -1,6 +1,6 @@ #include "prism/internal/strncasecmp.h" -#include "prism/attribute/inline.h" +#include "prism/compiler/inline.h" /** * A locale-insensitive version of `tolower(3)` diff --git a/src/strpbrk.c b/src/strpbrk.c index 59330fc172..6db4fd31bf 100644 --- a/src/strpbrk.c +++ b/src/strpbrk.c @@ -1,11 +1,12 @@ #include "prism/internal/strpbrk.h" -#include "prism/attribute/inline.h" -#include "prism/attribute/unused.h" +#include "prism/compiler/accel.h" +#include "prism/compiler/inline.h" +#include "prism/compiler/unused.h" + #include "prism/internal/bit.h" #include "prism/internal/diagnostic.h" #include "prism/internal/encoding.h" -#include "prism/accel.h" #include #include diff --git a/templates/include/prism/ast.h.erb b/templates/include/prism/ast.h.erb index af3403c293..670efc7045 100644 --- a/templates/include/prism/ast.h.erb +++ b/templates/include/prism/ast.h.erb @@ -8,7 +8,7 @@ #ifndef PRISM_AST_H #define PRISM_AST_H -#include "prism/attribute/align.h" +#include "prism/compiler/align.h" #include "prism/constant_pool.h" #include "prism/integer.h" #include "prism/strings.h" diff --git a/templates/include/prism/diagnostic.h.erb b/templates/include/prism/diagnostic.h.erb index a3964cc40c..1443617811 100644 --- a/templates/include/prism/diagnostic.h.erb +++ b/templates/include/prism/diagnostic.h.erb @@ -6,7 +6,7 @@ #ifndef PRISM_DIAGNOSTIC_H #define PRISM_DIAGNOSTIC_H -#include "prism/attribute/exported.h" +#include "prism/compiler/exported.h" #include "prism/ast.h" #include "prism/list.h" diff --git a/templates/include/prism/node_new.h.erb b/templates/include/prism/node_new.h.erb index 27a7ed22c6..cda6174705 100644 --- a/templates/include/prism/node_new.h.erb +++ b/templates/include/prism/node_new.h.erb @@ -10,7 +10,7 @@ #include "prism/node.h" -#include "prism/attribute/inline.h" +#include "prism/compiler/inline.h" <%- nodes.each do |node| -%> <%- params = node.fields.map(&:c_param) -%> diff --git a/templates/src/diagnostic.c.erb b/templates/src/diagnostic.c.erb index 8a4abc14eb..15b2a776a7 100644 --- a/templates/src/diagnostic.c.erb +++ b/templates/src/diagnostic.c.erb @@ -1,6 +1,6 @@ #include "prism/diagnostic.h" -#include "prism/attribute/inline.h" +#include "prism/compiler/inline.h" #include "prism/internal/arena.h" #include "prism/internal/list.h" diff --git a/templates/src/prettyprint.c.erb b/templates/src/prettyprint.c.erb index db2d52ddcf..b702712742 100644 --- a/templates/src/prettyprint.c.erb +++ b/templates/src/prettyprint.c.erb @@ -1,7 +1,7 @@ <%# encoding: ASCII -%> #include "prism/prettyprint.h" -#include "prism/attribute/inline.h" +#include "prism/compiler/inline.h" #include "prism/internal/buffer.h" #include "prism/internal/constant_pool.h" #include "prism/internal/integer.h" diff --git a/templates/src/serialize.c.erb b/templates/src/serialize.c.erb index 39eb2b521a..a69e38f010 100644 --- a/templates/src/serialize.c.erb +++ b/templates/src/serialize.c.erb @@ -1,6 +1,6 @@ #include "prism.h" -#include "prism/attribute/inline.h" +#include "prism/compiler/inline.h" #include "prism/internal/buffer.h" #include "prism/internal/encoding.h" #include "prism/internal/list.h" From 1e3ec12adce5e93ed1ae1f4612b359d359204e8d Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 17 Mar 2026 13:58:37 -0400 Subject: [PATCH 035/100] Move allocator to internal headers --- Doxyfile | 2 +- docs/build_system.md | 2 +- include/prism/{ => internal}/allocator.h | 8 ++-- .../allocator_debug.h} | 44 ++++++++----------- prism.gemspec | 40 ++++++++--------- src/arena.c | 3 +- src/buffer.c | 3 +- src/integer.c | 5 ++- src/options.c | 3 +- src/prism.c | 2 +- src/static_literals.c | 3 +- src/strings.c | 2 +- templates/ext/prism/api_node.c.erb | 2 +- 13 files changed, 59 insertions(+), 60 deletions(-) rename include/prism/{ => internal}/allocator.h (93%) rename include/prism/{debug_allocator.h => internal/allocator_debug.h} (62%) diff --git a/Doxyfile b/Doxyfile index fca1e8c314..00bb3537ab 100644 --- a/Doxyfile +++ b/Doxyfile @@ -24,7 +24,7 @@ OUTPUT_DIRECTORY = doc JAVADOC_AUTOBRIEF = YES OPTIMIZE_OUTPUT_FOR_C = YES INPUT = src include include/prism -EXCLUDE = include/prism/debug_allocator.h +EXCLUDE = include/prism/internal/allocator_debug.h HTML_OUTPUT = c SORT_MEMBER_DOCS = NO GENERATE_LATEX = NO diff --git a/docs/build_system.md b/docs/build_system.md index 2e2749e62c..40db4a17fc 100644 --- a/docs/build_system.md +++ b/docs/build_system.md @@ -87,7 +87,7 @@ If you need to use memory allocation functions implemented outside of the standa * Additionally, include `-I [path/to/custom_allocator]` where your `prism_xallocator.h` is located * Link the implementation of `prism_xallocator.c` that contains functions declared in `prism_xallocator.h` -For further clarity, refer to `include/prism/allocator.h`. +For further clarity, refer to `include/prism/internal/allocator.h`. ### Building prism from source as a C library diff --git a/include/prism/allocator.h b/include/prism/internal/allocator.h similarity index 93% rename from include/prism/allocator.h rename to include/prism/internal/allocator.h index 6b1c26e84e..bd46257e44 100644 --- a/include/prism/allocator.h +++ b/include/prism/internal/allocator.h @@ -1,10 +1,10 @@ /** - * @file allocator.h + * @file internal/allocator.h * * Macro definitions for defining the main and a custom allocator for Prism. */ -#ifndef PRISM_ALLOCATOR_H -#define PRISM_ALLOCATOR_H +#ifndef PRISM_INTERNAL_ALLOCATOR_H +#define PRISM_INTERNAL_ALLOCATOR_H /** * If you build Prism with a custom allocator, configure it with @@ -79,7 +79,7 @@ #endif #ifdef PRISM_BUILD_DEBUG - #include "prism/debug_allocator.h" + #include "prism/internal/allocator_debug.h" #endif #endif diff --git a/include/prism/debug_allocator.h b/include/prism/internal/allocator_debug.h similarity index 62% rename from include/prism/debug_allocator.h rename to include/prism/internal/allocator_debug.h index 3e28a95efb..40f2a7b4cf 100644 --- a/include/prism/debug_allocator.h +++ b/include/prism/internal/allocator_debug.h @@ -1,37 +1,34 @@ /** - * @file debug_allocator.h + * @file internal/allocator_debug.h * * Decorate allocation function to ensure sizes are correct. */ -#ifndef PRISM_DEBUG_ALLOCATOR_H -#define PRISM_DEBUG_ALLOCATOR_H +#ifndef PRISM_INTERNAL_ALLOCATOR_DEBUG_H +#define PRISM_INTERNAL_ALLOCATOR_DEBUG_H #include #include #include static inline void * -pm_debug_malloc(size_t size) -{ +pm_allocator_debug_malloc(size_t size) { size_t *memory = xmalloc(size + sizeof(size_t)); memory[0] = size; return memory + 1; } static inline void * -pm_debug_calloc(size_t nmemb, size_t size) -{ +pm_allocator_debug_calloc(size_t nmemb, size_t size) { size_t total_size = nmemb * size; - void *ptr = pm_debug_malloc(total_size); + void *ptr = pm_allocator_debug_malloc(total_size); memset(ptr, 0, total_size); return ptr; } static inline void * -pm_debug_realloc(void *ptr, size_t size) -{ +pm_allocator_debug_realloc(void *ptr, size_t size) { if (ptr == NULL) { - return pm_debug_malloc(size); + return pm_allocator_debug_malloc(size); } size_t *memory = (size_t *)ptr; @@ -42,8 +39,7 @@ pm_debug_realloc(void *ptr, size_t size) } static inline void -pm_debug_free(void *ptr) -{ +pm_allocator_debug_free(void *ptr) { if (ptr != NULL) { size_t *memory = (size_t *)ptr; xfree(memory - 1); @@ -51,8 +47,7 @@ pm_debug_free(void *ptr) } static inline void -pm_debug_free_sized(void *ptr, size_t old_size) -{ +pm_allocator_debug_free_sized(void *ptr, size_t old_size) { if (ptr != NULL) { size_t *memory = (size_t *)ptr; if (old_size != memory[-1]) { @@ -64,14 +59,13 @@ pm_debug_free_sized(void *ptr, size_t old_size) } static inline void * -pm_debug_realloc_sized(void *ptr, size_t size, size_t old_size) -{ +pm_allocator_debug_realloc_sized(void *ptr, size_t size, size_t old_size) { if (ptr == NULL) { if (old_size != 0) { fprintf(stderr, "[BUG] realloc_sized called with NULL pointer and old size %lu\n", old_size); abort(); } - return pm_debug_malloc(size); + return pm_allocator_debug_malloc(size); } size_t *memory = (size_t *)ptr; @@ -79,7 +73,7 @@ pm_debug_realloc_sized(void *ptr, size_t size, size_t old_size) fprintf(stderr, "[BUG] buffer %p was allocated with size %lu but realloced with size %lu\n", ptr, memory[-1], old_size); abort(); } - return pm_debug_realloc(ptr, size); + return pm_allocator_debug_realloc(ptr, size); } #undef xmalloc @@ -89,11 +83,11 @@ pm_debug_realloc_sized(void *ptr, size_t size, size_t old_size) #undef xrealloc_sized #undef xfree_sized -#define xmalloc pm_debug_malloc -#define xrealloc pm_debug_realloc -#define xcalloc pm_debug_calloc -#define xfree pm_debug_free -#define xrealloc_sized pm_debug_realloc_sized -#define xfree_sized pm_debug_free_sized +#define xmalloc pm_allocator_debug_malloc +#define xrealloc pm_allocator_debug_realloc +#define xcalloc pm_allocator_debug_calloc +#define xfree pm_allocator_debug_free +#define xrealloc_sized pm_allocator_debug_realloc_sized +#define xfree_sized pm_allocator_debug_free_sized #endif diff --git a/prism.gemspec b/prism.gemspec index d0f8ff3b45..a5bde09328 100644 --- a/prism.gemspec +++ b/prism.gemspec @@ -55,12 +55,30 @@ Gem::Specification.new do |spec| "include/prism/compiler/format.h", "include/prism/compiler/inline.h", "include/prism/compiler/unused.h", - "include/prism/allocator.h", + "include/prism/internal/allocator.h", + "include/prism/internal/allocator_debug.h", + "include/prism/internal/arena.h", + "include/prism/internal/bit.h", + "include/prism/internal/buffer.h", + "include/prism/internal/char.h", + "include/prism/internal/constant_pool.h", + "include/prism/internal/diagnostic.h", + "include/prism/internal/encoding.h", + "include/prism/internal/integer.h", + "include/prism/internal/isinf.h", + "include/prism/internal/line_offset_list.h", + "include/prism/internal/list.h", + "include/prism/internal/memchr.h", + "include/prism/internal/options.h", + "include/prism/internal/regexp.h", + "include/prism/internal/static_literals.h", + "include/prism/internal/strncasecmp.h", + "include/prism/internal/strings.h", + "include/prism/internal/strpbrk.h", "include/prism/arena.h", "include/prism/ast.h", "include/prism/buffer.h", "include/prism/constant_pool.h", - "include/prism/debug_allocator.h", "include/prism/diagnostic.h", "include/prism/encoding.h", "include/prism/excludes.h", @@ -76,24 +94,6 @@ Gem::Specification.new do |spec| "include/prism/static_literals.h", "include/prism/strings.h", "include/prism/version.h", - "include/prism/internal/arena.h", - "include/prism/internal/bit.h", - "include/prism/internal/buffer.h", - "include/prism/internal/char.h", - "include/prism/internal/constant_pool.h", - "include/prism/internal/diagnostic.h", - "include/prism/internal/encoding.h", - "include/prism/internal/integer.h", - "include/prism/internal/isinf.h", - "include/prism/internal/line_offset_list.h", - "include/prism/internal/list.h", - "include/prism/internal/memchr.h", - "include/prism/internal/options.h", - "include/prism/internal/regexp.h", - "include/prism/internal/static_literals.h", - "include/prism/internal/strncasecmp.h", - "include/prism/internal/strings.h", - "include/prism/internal/strpbrk.h", "lib/prism.rb", "lib/prism/compiler.rb", "lib/prism/desugar_compiler.rb", diff --git a/src/arena.c b/src/arena.c index d7ce9c043c..d0bd6f139c 100644 --- a/src/arena.c +++ b/src/arena.c @@ -1,5 +1,6 @@ #include "prism/arena.h" -#include "prism/allocator.h" + +#include "prism/internal/allocator.h" #include #include diff --git a/src/buffer.c b/src/buffer.c index c5f6b72ef3..d689a93203 100644 --- a/src/buffer.c +++ b/src/buffer.c @@ -1,8 +1,9 @@ #include "prism/internal/buffer.h" #include "prism/compiler/inline.h" + #include "prism/internal/char.h" -#include "prism/allocator.h" +#include "prism/internal/allocator.h" #include #include diff --git a/src/integer.c b/src/integer.c index 35dd5e0eea..1b69dbdceb 100644 --- a/src/integer.c +++ b/src/integer.c @@ -1,7 +1,8 @@ -#include "prism/allocator.h" -#include "prism/internal/buffer.h" #include "prism/internal/integer.h" +#include "prism/internal/allocator.h" +#include "prism/internal/buffer.h" + #include #include #include diff --git a/src/options.c b/src/options.c index b089071ec6..39f4f2fea5 100644 --- a/src/options.c +++ b/src/options.c @@ -1,8 +1,9 @@ #include "prism/options.h" #include "prism/compiler/inline.h" + +#include "prism/internal/allocator.h" #include "prism/internal/char.h" -#include "prism/allocator.h" #include #include diff --git a/src/prism.c b/src/prism.c index 5fed8de93b..68730a82e3 100644 --- a/src/prism.c +++ b/src/prism.c @@ -4,6 +4,7 @@ #include "prism/compiler/fallthrough.h" #include "prism/compiler/unused.h" +#include "prism/internal/allocator.h" #include "prism/internal/arena.h" #include "prism/internal/bit.h" #include "prism/internal/buffer.h" @@ -22,7 +23,6 @@ #include "prism/internal/strings.h" #include "prism/internal/strncasecmp.h" #include "prism/internal/strpbrk.h" -#include "prism/allocator.h" #include "prism/node_new.h" diff --git a/src/static_literals.c b/src/static_literals.c index 7fc872fe25..c66fa7724a 100644 --- a/src/static_literals.c +++ b/src/static_literals.c @@ -2,11 +2,12 @@ #include "prism/compiler/inline.h" #include "prism/compiler/unused.h" + +#include "prism/internal/allocator.h" #include "prism/internal/buffer.h" #include "prism/internal/integer.h" #include "prism/internal/isinf.h" #include "prism/internal/strings.h" -#include "prism/allocator.h" #include #include diff --git a/src/strings.c b/src/strings.c index da7548112b..37b48293ab 100644 --- a/src/strings.c +++ b/src/strings.c @@ -1,6 +1,6 @@ #include "prism/internal/strings.h" -#include "prism/allocator.h" +#include "prism/internal/allocator.h" #include #include diff --git a/templates/ext/prism/api_node.c.erb b/templates/ext/prism/api_node.c.erb index ea9754bdfd..be6bd113ad 100644 --- a/templates/ext/prism/api_node.c.erb +++ b/templates/ext/prism/api_node.c.erb @@ -1,6 +1,6 @@ #line <%= __LINE__ + 1 %> "prism/templates/ext/prism/<%= File.basename(__FILE__) %>" #include "prism/extension.h" -#include "prism/allocator.h" +#include "prism/internal/allocator.h" extern VALUE rb_cPrism; extern VALUE rb_cPrismNode; From 323f7f16f84a9ec2b2e5adb03937de51014c779d Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 17 Mar 2026 14:01:59 -0400 Subject: [PATCH 036/100] Move file system into compiler headers --- include/prism/{files.h => compiler/filesystem.h} | 6 +++--- include/prism/node.h | 1 + include/prism/parser.h | 1 + include/prism/prettyprint.h | 2 ++ include/prism/strings.h | 2 +- prism.gemspec | 2 +- templates/include/prism/ast.h.erb | 1 + templates/include/prism/diagnostic.h.erb | 1 + templates/include/prism/node_new.h.erb | 4 ++-- 9 files changed, 13 insertions(+), 7 deletions(-) rename include/prism/{files.h => compiler/filesystem.h} (87%) diff --git a/include/prism/files.h b/include/prism/compiler/filesystem.h similarity index 87% rename from include/prism/files.h rename to include/prism/compiler/filesystem.h index 8a74e03be8..f988909db8 100644 --- a/include/prism/files.h +++ b/include/prism/compiler/filesystem.h @@ -1,10 +1,10 @@ /** - * @file files.h + * @file compiler/filesystem.h * * Platform detection for mmap and filesystem support. */ -#ifndef PRISM_FILES_H -#define PRISM_FILES_H +#ifndef PRISM_COMPILER_FILESYSTEM_H +#define PRISM_COMPILER_FILESYSTEM_H /** * In general, libc for embedded systems does not support memory-mapped files. diff --git a/include/prism/node.h b/include/prism/node.h index 939dbb0c82..6d4512bd37 100644 --- a/include/prism/node.h +++ b/include/prism/node.h @@ -7,6 +7,7 @@ #define PRISM_NODE_H #include "prism/compiler/exported.h" + #include "prism/arena.h" #include "prism/ast.h" diff --git a/include/prism/parser.h b/include/prism/parser.h index 76f9a493d1..53c9472171 100644 --- a/include/prism/parser.h +++ b/include/prism/parser.h @@ -7,6 +7,7 @@ #define PRISM_PARSER_H #include "prism/compiler/accel.h" + #include "prism/arena.h" #include "prism/ast.h" #include "prism/encoding.h" diff --git a/include/prism/prettyprint.h b/include/prism/prettyprint.h index 65c4302644..b59cfe1460 100644 --- a/include/prism/prettyprint.h +++ b/include/prism/prettyprint.h @@ -14,6 +14,8 @@ #else +#include "prism/compiler/exported.h" + #include "prism/ast.h" #include "prism/buffer.h" #include "prism/parser.h" diff --git a/include/prism/strings.h b/include/prism/strings.h index e3da5c5a9a..6f659fc0e7 100644 --- a/include/prism/strings.h +++ b/include/prism/strings.h @@ -7,7 +7,7 @@ #define PRISM_STRINGS_H #include "prism/compiler/exported.h" -#include "prism/files.h" +#include "prism/compiler/filesystem.h" #include #include diff --git a/prism.gemspec b/prism.gemspec index a5bde09328..6fc30c6f69 100644 --- a/prism.gemspec +++ b/prism.gemspec @@ -50,6 +50,7 @@ Gem::Specification.new do |spec| "include/prism/compiler/align.h", "include/prism/compiler/exported.h", "include/prism/compiler/fallthrough.h", + "include/prism/compiler/filesystem.h", "include/prism/compiler/flex_array.h", "include/prism/compiler/force_inline.h", "include/prism/compiler/format.h", @@ -82,7 +83,6 @@ Gem::Specification.new do |spec| "include/prism/diagnostic.h", "include/prism/encoding.h", "include/prism/excludes.h", - "include/prism/files.h", "include/prism/integer.h", "include/prism/line_offset_list.h", "include/prism/list.h", diff --git a/templates/include/prism/ast.h.erb b/templates/include/prism/ast.h.erb index 670efc7045..754d05a216 100644 --- a/templates/include/prism/ast.h.erb +++ b/templates/include/prism/ast.h.erb @@ -9,6 +9,7 @@ #define PRISM_AST_H #include "prism/compiler/align.h" + #include "prism/constant_pool.h" #include "prism/integer.h" #include "prism/strings.h" diff --git a/templates/include/prism/diagnostic.h.erb b/templates/include/prism/diagnostic.h.erb index 1443617811..dceb21cf65 100644 --- a/templates/include/prism/diagnostic.h.erb +++ b/templates/include/prism/diagnostic.h.erb @@ -7,6 +7,7 @@ #define PRISM_DIAGNOSTIC_H #include "prism/compiler/exported.h" + #include "prism/ast.h" #include "prism/list.h" diff --git a/templates/include/prism/node_new.h.erb b/templates/include/prism/node_new.h.erb index cda6174705..4a253fc02e 100644 --- a/templates/include/prism/node_new.h.erb +++ b/templates/include/prism/node_new.h.erb @@ -8,10 +8,10 @@ #ifndef PRISM_NODE_NEW_H #define PRISM_NODE_NEW_H -#include "prism/node.h" - #include "prism/compiler/inline.h" +#include "prism/node.h" + <%- nodes.each do |node| -%> <%- params = node.fields.map(&:c_param) -%> /** From a5dfba8dbe3b1fb2e5dacc8339ffc92485e27f4c Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 17 Mar 2026 14:03:35 -0400 Subject: [PATCH 037/100] Trim down even more of internal header includes --- include/prism/internal/arena.h | 1 + include/prism/internal/buffer.h | 3 ++- include/prism/internal/char.h | 1 + include/prism/internal/diagnostic.h | 2 +- include/prism/internal/line_offset_list.h | 4 ++-- include/prism/internal/static_literals.h | 3 +-- 6 files changed, 8 insertions(+), 6 deletions(-) diff --git a/include/prism/internal/arena.h b/include/prism/internal/arena.h index 8a2dd2a2b9..747484fa2f 100644 --- a/include/prism/internal/arena.h +++ b/include/prism/internal/arena.h @@ -8,6 +8,7 @@ #include "prism/compiler/exported.h" #include "prism/compiler/inline.h" + #include "prism/arena.h" #include diff --git a/include/prism/internal/buffer.h b/include/prism/internal/buffer.h index 8066bd9a97..1d7ee93762 100644 --- a/include/prism/internal/buffer.h +++ b/include/prism/internal/buffer.h @@ -6,9 +6,10 @@ #ifndef PRISM_INTERNAL_BUFFER_H #define PRISM_INTERNAL_BUFFER_H -#include "prism/buffer.h" #include "prism/compiler/format.h" +#include "prism/buffer.h" + #include /** diff --git a/include/prism/internal/char.h b/include/prism/internal/char.h index 6abc998c95..8216a1828c 100644 --- a/include/prism/internal/char.h +++ b/include/prism/internal/char.h @@ -7,6 +7,7 @@ #define PRISM_INTERNAL_CHAR_H #include "prism/compiler/force_inline.h" + #include "prism/arena.h" #include "prism/line_offset_list.h" diff --git a/include/prism/internal/diagnostic.h b/include/prism/internal/diagnostic.h index edaaa33a14..3e58c2ad2e 100644 --- a/include/prism/internal/diagnostic.h +++ b/include/prism/internal/diagnostic.h @@ -6,8 +6,8 @@ #ifndef PRISM_INTERNAL_DIAGNOSTIC_H #define PRISM_INTERNAL_DIAGNOSTIC_H -#include "prism/diagnostic.h" #include "prism/arena.h" +#include "prism/diagnostic.h" /** * Append a diagnostic to the given list of diagnostics that is using shared diff --git a/include/prism/internal/line_offset_list.h b/include/prism/internal/line_offset_list.h index 5c8ae4763c..e18f7276e6 100644 --- a/include/prism/internal/line_offset_list.h +++ b/include/prism/internal/line_offset_list.h @@ -14,10 +14,10 @@ #ifndef PRISM_INTERNAL_LINE_OFFSET_LIST_H #define PRISM_INTERNAL_LINE_OFFSET_LIST_H -#include "prism/line_offset_list.h" - #include "prism/compiler/force_inline.h" + #include "prism/arena.h" +#include "prism/line_offset_list.h" /** * Initialize a new line offset list with the given capacity. diff --git a/include/prism/internal/static_literals.h b/include/prism/internal/static_literals.h index 0adde85780..f924dd9e6a 100644 --- a/include/prism/internal/static_literals.h +++ b/include/prism/internal/static_literals.h @@ -6,11 +6,10 @@ #ifndef PRISM_INTERNAL_STATIC_LITERALS_H #define PRISM_INTERNAL_STATIC_LITERALS_H -#include "prism/static_literals.h" - #include "prism/ast.h" #include "prism/buffer.h" #include "prism/line_offset_list.h" +#include "prism/static_literals.h" /** * An internal hash table for a set of nodes. From a4a44cb14a69ff5509e461b918d12a5b40f80cad Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 17 Mar 2026 14:09:02 -0400 Subject: [PATCH 038/100] Split node.h headers --- include/prism/internal/node.h | 28 ++++++++++++++++++++++++++++ include/prism/node.h | 18 ------------------ prism.gemspec | 1 + src/prism.c | 1 + 4 files changed, 30 insertions(+), 18 deletions(-) create mode 100644 include/prism/internal/node.h diff --git a/include/prism/internal/node.h b/include/prism/internal/node.h new file mode 100644 index 0000000000..2399b5a72a --- /dev/null +++ b/include/prism/internal/node.h @@ -0,0 +1,28 @@ +/** + * @file internal/node.h + */ +#ifndef PRISM_INTERNAL_NODE_H +#define PRISM_INTERNAL_NODE_H + +#include "prism/arena.h" +#include "prism/ast.h" + +/** + * Prepend a new node onto the beginning of the node list. + * + * @param arena The arena to allocate from. + * @param list The list to prepend to. + * @param node The node to prepend. + */ +void pm_node_list_prepend(pm_arena_t *arena, pm_node_list_t *list, pm_node_t *node); + +/** + * Concatenate the given node list onto the end of the other node list. + * + * @param arena The arena to allocate from. + * @param list The list to concatenate onto. + * @param other The list to concatenate. + */ +void pm_node_list_concat(pm_arena_t *arena, pm_node_list_t *list, pm_node_list_t *other); + +#endif diff --git a/include/prism/node.h b/include/prism/node.h index 6d4512bd37..44a3134bc4 100644 --- a/include/prism/node.h +++ b/include/prism/node.h @@ -44,24 +44,6 @@ pm_node_list_append(pm_arena_t *arena, pm_node_list_t *list, pm_node_t *node) { } } -/** - * Prepend a new node onto the beginning of the node list. - * - * @param arena The arena to allocate from. - * @param list The list to prepend to. - * @param node The node to prepend. - */ -void pm_node_list_prepend(pm_arena_t *arena, pm_node_list_t *list, pm_node_t *node); - -/** - * Concatenate the given node list onto the end of the other node list. - * - * @param arena The arena to allocate from. - * @param list The list to concatenate onto. - * @param other The list to concatenate. - */ -void pm_node_list_concat(pm_arena_t *arena, pm_node_list_t *list, pm_node_list_t *other); - /** * Returns a string representation of the given node type. * diff --git a/prism.gemspec b/prism.gemspec index 6fc30c6f69..f407e2f952 100644 --- a/prism.gemspec +++ b/prism.gemspec @@ -70,6 +70,7 @@ Gem::Specification.new do |spec| "include/prism/internal/line_offset_list.h", "include/prism/internal/list.h", "include/prism/internal/memchr.h", + "include/prism/internal/node.h", "include/prism/internal/options.h", "include/prism/internal/regexp.h", "include/prism/internal/static_literals.h", diff --git a/src/prism.c b/src/prism.c index 68730a82e3..9bb5005260 100644 --- a/src/prism.c +++ b/src/prism.c @@ -17,6 +17,7 @@ #include "prism/internal/line_offset_list.h" #include "prism/internal/list.h" #include "prism/internal/memchr.h" +#include "prism/internal/node.h" #include "prism/internal/options.h" #include "prism/internal/regexp.h" #include "prism/internal/static_literals.h" From 2b5298128ebdb871b49102fbf9d41f55f581ada0 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 17 Mar 2026 14:16:14 -0400 Subject: [PATCH 039/100] pm_buffer_free -> pm_buffer_cleanup --- cpp/test.cpp | 2 +- docs/serialization.md | 6 +++--- ext/prism/extension.c | 4 ++-- fuzz/parse.c | 2 +- include/prism.h | 6 +++--- include/prism/buffer.h | 6 +++--- javascript/src/parsePrism.js | 2 +- lib/prism/ffi.rb | 4 ++-- src/buffer.c | 4 ++-- src/prism.c | 18 +++++++++--------- src/regexp.c | 6 +++--- templates/src/prettyprint.c.erb | 2 +- 12 files changed, 31 insertions(+), 31 deletions(-) diff --git a/cpp/test.cpp b/cpp/test.cpp index 3e41b270e9..76673ab87d 100644 --- a/cpp/test.cpp +++ b/cpp/test.cpp @@ -17,7 +17,7 @@ int main() { std::cout << buffer.value << std::endl; - pm_buffer_free(&buffer); + pm_buffer_cleanup(&buffer); pm_parser_free(&parser); pm_arena_free(&arena); diff --git a/docs/serialization.md b/docs/serialization.md index a7ba533566..c744e0d81a 100644 --- a/docs/serialization.md +++ b/docs/serialization.md @@ -159,8 +159,8 @@ typedef struct { size_t capacity; } pm_buffer_t; -// Free the memory associated with the buffer. -void pm_buffer_free(pm_buffer_t *); +// Free the memory held by the buffer. +void pm_buffer_cleanup(pm_buffer_t *); // Parse and serialize the AST represented by the given source to the given // buffer. @@ -177,7 +177,7 @@ serialize(const uint8_t *source, size_t length) { // Do something with the serialized string. - pm_buffer_free(&buffer); + pm_buffer_cleanup(&buffer); } ``` diff --git a/ext/prism/extension.c b/ext/prism/extension.c index 60f33f0cb1..965f630de0 100644 --- a/ext/prism/extension.c +++ b/ext/prism/extension.c @@ -382,7 +382,7 @@ dump_input(pm_string_t *input, const pm_options_t *options) { pm_serialize(&parser, node, &buffer); VALUE result = rb_str_new(pm_buffer_value(&buffer), pm_buffer_length(&buffer)); - pm_buffer_free(&buffer); + pm_buffer_cleanup(&buffer); pm_parser_free(&parser); pm_arena_free(&arena); @@ -1081,7 +1081,7 @@ parse_stream(int argc, VALUE *argv, VALUE self) { VALUE value = pm_ast_new(&parser, node, encoding, source, options.freeze); VALUE result = parse_result_create(rb_cPrismParseResult, &parser, value, encoding, source, options.freeze); - pm_buffer_free(&buffer); + pm_buffer_cleanup(&buffer); pm_parser_free(&parser); pm_arena_free(&arena); diff --git a/fuzz/parse.c b/fuzz/parse.c index be9094d86f..0805c5489c 100644 --- a/fuzz/parse.c +++ b/fuzz/parse.c @@ -5,5 +5,5 @@ harness(const uint8_t *input, size_t size) { pm_buffer_t buffer; pm_buffer_init(&buffer); pm_serialize_parse(&buffer, input, size, NULL); - pm_buffer_free(&buffer); + pm_buffer_cleanup(&buffer); } diff --git a/include/prism.h b/include/prism.h index 55ce7e4745..c131d21aaa 100644 --- a/include/prism.h +++ b/include/prism.h @@ -365,7 +365,7 @@ PRISM_EXPORTED_FUNCTION pm_string_query_t pm_string_query_method_name(const uint * use and be aware of are: * * * `pm_buffer_t` - a small buffer object that will hold the serialized AST - * * `pm_buffer_free()` - free the memory associated with the buffer + * * `pm_buffer_cleanup()` - free the memory associated with the buffer * * `pm_serialize()` - serialize the AST into a buffer * * `pm_serialize_parse()` - parse and serialize the AST into a buffer * @@ -378,7 +378,7 @@ PRISM_EXPORTED_FUNCTION pm_string_query_t pm_string_query_method_name(const uint * pm_serialize_parse(&buffer, source, length, NULL); * printf("SERIALIZED!\n"); * - * pm_buffer_free(&buffer); + * pm_buffer_cleanup(&buffer); * } * ``` * @@ -399,7 +399,7 @@ PRISM_EXPORTED_FUNCTION pm_string_query_t pm_string_query_method_name(const uint * pm_prettyprint(&buffer, &parser, root); * printf("%*.s\n", (int) buffer.length, buffer.value); * - * pm_buffer_free(&buffer); + * pm_buffer_cleanup(&buffer); * pm_parser_free(&parser); * pm_arena_free(&arena); * } diff --git a/include/prism/buffer.h b/include/prism/buffer.h index 41b0fd8dc0..ec0571b28b 100644 --- a/include/prism/buffer.h +++ b/include/prism/buffer.h @@ -64,12 +64,12 @@ PRISM_EXPORTED_FUNCTION char * pm_buffer_value(const pm_buffer_t *buffer); PRISM_EXPORTED_FUNCTION size_t pm_buffer_length(const pm_buffer_t *buffer); /** - * Free the memory associated with the buffer. + * Free the memory held by the buffer. * - * @param buffer The buffer to free. + * @param buffer The buffer whose held memory should be freed. * * \public \memberof pm_buffer_t */ -PRISM_EXPORTED_FUNCTION void pm_buffer_free(pm_buffer_t *buffer); +PRISM_EXPORTED_FUNCTION void pm_buffer_cleanup(pm_buffer_t *buffer); #endif diff --git a/javascript/src/parsePrism.js b/javascript/src/parsePrism.js index 269f84c825..af6d70b841 100644 --- a/javascript/src/parsePrism.js +++ b/javascript/src/parsePrism.js @@ -45,7 +45,7 @@ export function parsePrism(prism, source, options = {}) { const serializedView = new Uint8Array(prism.memory.buffer, prism.pm_buffer_value(bufferPointer), prism.pm_buffer_length(bufferPointer)); const result = deserialize(serializedView); - prism.pm_buffer_free(bufferPointer); + prism.pm_buffer_cleanup(bufferPointer); prism.free(sourcePointer); prism.free(bufferPointer); prism.free(optionsPointer); diff --git a/lib/prism/ffi.rb b/lib/prism/ffi.rb index eb8cf3f4ca..4419213162 100644 --- a/lib/prism/ffi.rb +++ b/lib/prism/ffi.rb @@ -111,7 +111,7 @@ def self.load_exported_functions_from(header, *functions, callbacks) "pm_buffer_init", "pm_buffer_value", "pm_buffer_length", - "pm_buffer_free", + "pm_buffer_cleanup", [] ) @@ -155,7 +155,7 @@ def self.with raise unless LibRubyParser.pm_buffer_init(pointer) return yield new(pointer) ensure - LibRubyParser.pm_buffer_free(pointer) + LibRubyParser.pm_buffer_cleanup(pointer) end end end diff --git a/src/buffer.c b/src/buffer.c index d689a93203..9612cb4802 100644 --- a/src/buffer.c +++ b/src/buffer.c @@ -361,9 +361,9 @@ pm_buffer_insert(pm_buffer_t *buffer, size_t index, const char *value, size_t le } /** - * Free the memory associated with the buffer. + * Free the memory held by the buffer. */ void -pm_buffer_free(pm_buffer_t *buffer) { +pm_buffer_cleanup(pm_buffer_t *buffer) { xfree_sized(buffer->value, buffer->capacity); } diff --git a/src/prism.c b/src/prism.c index 9bb5005260..6f256b4a36 100644 --- a/src/prism.c +++ b/src/prism.c @@ -2246,7 +2246,7 @@ pm_regular_expression_flags_create(pm_parser_t *parser, const pm_token_t *closin const char *word = unknown_flags_length >= 2 ? "options" : "option"; PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, PM_ERR_REGEXP_UNKNOWN_OPTIONS, word, unknown_flags_length, pm_buffer_value(&unknown_flags)); } - pm_buffer_free(&unknown_flags); + pm_buffer_cleanup(&unknown_flags); } return flags; @@ -9446,7 +9446,7 @@ lex_question_mark(pm_parser_t *parser) { // Copy buffer data into the arena and free the heap buffer. void *arena_data = pm_arena_memdup(parser->arena, buffer.value, buffer.length, PRISM_ALIGNOF(uint8_t)); pm_string_constant_init(&parser->current_string, (const char *) arena_data, buffer.length); - pm_buffer_free(&buffer); + pm_buffer_cleanup(&buffer); return PM_TOKEN_CHARACTER_LITERAL; } else { @@ -9770,13 +9770,13 @@ pm_token_buffer_copy(pm_parser_t *parser, pm_token_buffer_t *token_buffer) { size_t len = pm_buffer_length(&token_buffer->buffer); void *arena_data = pm_arena_memdup(parser->arena, pm_buffer_value(&token_buffer->buffer), len, PRISM_ALIGNOF(uint8_t)); pm_string_constant_init(&parser->current_string, (const char *) arena_data, len); - pm_buffer_free(&token_buffer->buffer); + pm_buffer_cleanup(&token_buffer->buffer); } static PRISM_INLINE void pm_regexp_token_buffer_copy(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) { pm_token_buffer_copy(parser, &token_buffer->base); - pm_buffer_free(&token_buffer->regexp_buffer); + pm_buffer_cleanup(&token_buffer->regexp_buffer); } /** @@ -13523,7 +13523,7 @@ pm_hash_key_static_literals_add(pm_parser_t *parser, pm_static_literals_t *liter pm_line_offset_list_line_column(&parser->line_offsets, PM_NODE_START(node), parser->start_line).line ); - pm_buffer_free(&buffer); + pm_buffer_cleanup(&buffer); } } @@ -20779,7 +20779,7 @@ parse_regular_expression_named_capture(pm_parser_t *parser, const pm_string_t *c // If the name of the capture group isn't a valid identifier, we do // not add it to the local table. if (!pm_slice_is_valid_local(parser, source, source + length)) { - pm_buffer_free(&unescaped); + pm_buffer_cleanup(&unescaped); return; } @@ -20810,7 +20810,7 @@ parse_regular_expression_named_capture(pm_parser_t *parser, const pm_string_t *c // If the local is not already a local but it is a keyword, then we // do not want to add a capture for this. if (pm_local_is_keyword((const char *) source, length)) { - pm_buffer_free(&unescaped); + pm_buffer_cleanup(&unescaped); return; } @@ -20831,7 +20831,7 @@ parse_regular_expression_named_capture(pm_parser_t *parser, const pm_string_t *c pm_node_list_append(parser->arena, &callback_data->match->targets, target); } - pm_buffer_free(&unescaped); + pm_buffer_cleanup(&unescaped); } /** @@ -22845,7 +22845,7 @@ pm_serialize_parse_stream(pm_buffer_t *buffer, void *stream, pm_parse_stream_fge pm_serialize_content(&parser, node, buffer); pm_buffer_append_byte(buffer, '\0'); - pm_buffer_free(&parser_buffer); + pm_buffer_cleanup(&parser_buffer); pm_parser_free(&parser); pm_arena_free(&arena); pm_options_free(&options); diff --git a/src/regexp.c b/src/regexp.c index 566d65a7fd..3013974367 100644 --- a/src/regexp.c +++ b/src/regexp.c @@ -1464,7 +1464,7 @@ pm_regexp_validate_encoding_modifier(pm_regexp_parser_t *parser, bool ascii_only pm_buffer_t formatted = { 0 }; pm_regexp_format_for_error(&formatted, parser->encoding, (const uint8_t *) source_start, (size_t) source_length); PM_REGEXP_ENCODING_ERROR(parser, PM_ERR_REGEXP_NON_ESCAPED_MBC, (int) formatted.length, (const char *) formatted.value); - pm_buffer_free(&formatted); + pm_buffer_cleanup(&formatted); } } @@ -1658,7 +1658,7 @@ pm_regexp_parse(pm_parser_t *parser, pm_regular_expression_node_t *node, pm_rege const char *error_source = (const char *) pm_string_source(&node->unescaped); int error_source_length = (int) pm_string_length(&node->unescaped); pm_node_flags_t encoding_flags = pm_regexp_validate_encoding(®exp_parser, ascii_only, flags, error_source, error_source_length); - pm_buffer_free(®exp_parser.hex_escape_buffer); + pm_buffer_cleanup(®exp_parser.hex_escape_buffer); // Second pass: walk unescaped content for named capture extraction. if (name_callback != NULL) { @@ -1712,5 +1712,5 @@ pm_regexp_parse_named_captures(pm_parser_t *parser, const uint8_t *source, size_ }; pm_regexp_parse_pattern(®exp_parser); - pm_buffer_free(®exp_parser.hex_escape_buffer); + pm_buffer_cleanup(®exp_parser.hex_escape_buffer); } diff --git a/templates/src/prettyprint.c.erb b/templates/src/prettyprint.c.erb index b702712742..4af8155c47 100644 --- a/templates/src/prettyprint.c.erb +++ b/templates/src/prettyprint.c.erb @@ -167,7 +167,7 @@ void pm_prettyprint(pm_buffer_t *output_buffer, const pm_parser_t *parser, const pm_node_t *node) { pm_buffer_t prefix_buffer = { 0 }; prettyprint_node(output_buffer, parser, node, &prefix_buffer); - pm_buffer_free(&prefix_buffer); + pm_buffer_cleanup(&prefix_buffer); } #endif From 1b594e10da4807db251af2f919195ba14fcd3c47 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 17 Mar 2026 14:32:46 -0400 Subject: [PATCH 040/100] Make buffer an opaque pointer --- cpp/test.cpp | 10 ++++---- docs/serialization.md | 6 ++--- ext/prism/extension.c | 16 ++++++------- fuzz/parse.c | 7 +++--- include/prism.h | 19 +++++++-------- include/prism/buffer.h | 41 ++++++++++----------------------- include/prism/internal/buffer.h | 25 ++++++++++++++++++-- javascript/src/parsePrism.js | 7 ++---- lib/prism/ffi.rb | 17 +++++++------- src/buffer.c | 35 ++++++++++++++++------------ src/prism.c | 16 ++++++------- 11 files changed, 101 insertions(+), 98 deletions(-) diff --git a/cpp/test.cpp b/cpp/test.cpp index 76673ab87d..f41ac83c61 100644 --- a/cpp/test.cpp +++ b/cpp/test.cpp @@ -10,14 +10,14 @@ int main() { pm_parser_init(&arena, &parser, reinterpret_cast("1 + 2"), 5, NULL); pm_node_t *root = pm_parse(&parser); - pm_buffer_t buffer = { 0 }; + pm_buffer_t *buffer = pm_buffer_new(); - pm_prettyprint(&buffer, &parser, root); - pm_buffer_append_byte(&buffer, '\0'); + pm_prettyprint(buffer, &parser, root); - std::cout << buffer.value << std::endl; + std::string_view view(pm_buffer_value(buffer), pm_buffer_length(buffer)); + std::cout << view << std::endl; - pm_buffer_cleanup(&buffer); + pm_buffer_free(buffer); pm_parser_free(&parser); pm_arena_free(&arena); diff --git a/docs/serialization.md b/docs/serialization.md index c744e0d81a..d087698baa 100644 --- a/docs/serialization.md +++ b/docs/serialization.md @@ -172,12 +172,12 @@ Typically you would use a stack-allocated `pm_buffer_t` and call `pm_serialize_p ```c void serialize(const uint8_t *source, size_t length) { - pm_buffer_t buffer = { 0 }; - pm_serialize_parse(&buffer, source, length, NULL); + pm_buffer_t *buffer = pm_buffer_new(); + pm_serialize_parse(buffer, source, length, NULL); // Do something with the serialized string. - pm_buffer_cleanup(&buffer); + pm_buffer_free(buffer); } ``` diff --git a/ext/prism/extension.c b/ext/prism/extension.c index 965f630de0..d39f8abe7a 100644 --- a/ext/prism/extension.c +++ b/ext/prism/extension.c @@ -369,8 +369,8 @@ file_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options, V */ static VALUE dump_input(pm_string_t *input, const pm_options_t *options) { - pm_buffer_t buffer; - if (!pm_buffer_init(&buffer)) { + pm_buffer_t *buffer = pm_buffer_new(); + if (!buffer) { rb_raise(rb_eNoMemError, "failed to allocate memory"); } @@ -379,10 +379,10 @@ dump_input(pm_string_t *input, const pm_options_t *options) { pm_parser_init(&arena, &parser, pm_string_source(input), pm_string_length(input), options); pm_node_t *node = pm_parse(&parser); - pm_serialize(&parser, node, &buffer); + pm_serialize(&parser, node, buffer); - VALUE result = rb_str_new(pm_buffer_value(&buffer), pm_buffer_length(&buffer)); - pm_buffer_cleanup(&buffer); + VALUE result = rb_str_new(pm_buffer_value(buffer), pm_buffer_length(buffer)); + pm_buffer_free(buffer); pm_parser_free(&parser); pm_arena_free(&arena); @@ -1072,16 +1072,16 @@ parse_stream(int argc, VALUE *argv, VALUE self) { pm_arena_t arena = { 0 }; pm_parser_t parser; - pm_buffer_t buffer; - pm_node_t *node = pm_parse_stream(&arena, &parser, &buffer, (void *) stream, parse_stream_fgets, parse_stream_eof, &options); + pm_buffer_t *buffer = pm_buffer_new(); + pm_node_t *node = pm_parse_stream(&arena, &parser, buffer, (void *) stream, parse_stream_fgets, parse_stream_eof, &options); rb_encoding *encoding = rb_enc_find(pm_parser_encoding_name(&parser)); VALUE source = pm_source_new(&parser, encoding, options.freeze); VALUE value = pm_ast_new(&parser, node, encoding, source, options.freeze); VALUE result = parse_result_create(rb_cPrismParseResult, &parser, value, encoding, source, options.freeze); - pm_buffer_cleanup(&buffer); + pm_buffer_free(buffer); pm_parser_free(&parser); pm_arena_free(&arena); diff --git a/fuzz/parse.c b/fuzz/parse.c index 0805c5489c..c04553909e 100644 --- a/fuzz/parse.c +++ b/fuzz/parse.c @@ -2,8 +2,7 @@ void harness(const uint8_t *input, size_t size) { - pm_buffer_t buffer; - pm_buffer_init(&buffer); - pm_serialize_parse(&buffer, input, size, NULL); - pm_buffer_cleanup(&buffer); + pm_buffer_t *buffer = pm_buffer_new(); + pm_serialize_parse(buffer, input, size, NULL); + pm_buffer_free(buffer); } diff --git a/include/prism.h b/include/prism.h index c131d21aaa..80dd579829 100644 --- a/include/prism.h +++ b/include/prism.h @@ -364,8 +364,9 @@ PRISM_EXPORTED_FUNCTION pm_string_query_t pm_string_query_method_name(const uint * parse Ruby code. The structures and functions that you're going to want to * use and be aware of are: * - * * `pm_buffer_t` - a small buffer object that will hold the serialized AST - * * `pm_buffer_cleanup()` - free the memory associated with the buffer + * * `pm_buffer_t` - an opaque buffer object that will hold the serialized AST + * * `pm_buffer_new()` - create a new buffer + * * `pm_buffer_free()` - free the buffer and its internal memory * * `pm_serialize()` - serialize the AST into a buffer * * `pm_serialize_parse()` - parse and serialize the AST into a buffer * @@ -373,12 +374,12 @@ PRISM_EXPORTED_FUNCTION pm_string_query_t pm_string_query_method_name(const uint * * ```c * void serialize(const uint8_t *source, size_t length) { - * pm_buffer_t buffer = { 0 }; + * pm_buffer_t *buffer = pm_buffer_new(); * - * pm_serialize_parse(&buffer, source, length, NULL); + * pm_serialize_parse(buffer, source, length, NULL); * printf("SERIALIZED!\n"); * - * pm_buffer_cleanup(&buffer); + * pm_buffer_free(buffer); * } * ``` * @@ -394,12 +395,12 @@ PRISM_EXPORTED_FUNCTION pm_string_query_t pm_string_query_method_name(const uint * pm_parser_init(&arena, &parser, source, length, NULL); * * pm_node_t *root = pm_parse(&parser); - * pm_buffer_t buffer = { 0 }; + * pm_buffer_t *buffer = pm_buffer_new(); * - * pm_prettyprint(&buffer, &parser, root); - * printf("%*.s\n", (int) buffer.length, buffer.value); + * pm_prettyprint(buffer, &parser, root); + * printf("%*.s\n", (int) pm_buffer_length(buffer), pm_buffer_value(buffer)); * - * pm_buffer_cleanup(&buffer); + * pm_buffer_free(buffer); * pm_parser_free(&parser); * pm_arena_free(&arena); * } diff --git a/include/prism/buffer.h b/include/prism/buffer.h index ec0571b28b..b6e8feea31 100644 --- a/include/prism/buffer.h +++ b/include/prism/buffer.h @@ -8,40 +8,32 @@ #include "prism/compiler/exported.h" -#include #include /** - * A pm_buffer_t is a simple memory buffer that stores data in a contiguous - * block of memory. + * A wrapper around a contiguous block of allocated memory. */ -typedef struct { - /** The length of the buffer in bytes. */ - size_t length; - - /** The capacity of the buffer in bytes that has been allocated. */ - size_t capacity; - - /** A pointer to the start of the buffer. */ - char *value; -} pm_buffer_t; +typedef struct pm_buffer_t pm_buffer_t; /** - * Return the size of the pm_buffer_t struct. + * Allocate and initialize a new buffer. If the buffer cannot be allocated, this + * function will abort the process. * - * @returns The size of the pm_buffer_t struct. + * @returns A pointer to the initialized buffer. The caller is responsible for + * freeing the buffer with pm_buffer_free. + * + * \public \memberof pm_buffer_t */ -PRISM_EXPORTED_FUNCTION size_t pm_buffer_sizeof(void); +PRISM_EXPORTED_FUNCTION pm_buffer_t * pm_buffer_new(void); /** - * Initialize a pm_buffer_t with its default values. + * Free both the memory held by the buffer and the buffer itself. * - * @param buffer The buffer to initialize. - * @returns True if the buffer was initialized successfully, false otherwise. + * @param buffer The buffer to free. * * \public \memberof pm_buffer_t */ -PRISM_EXPORTED_FUNCTION bool pm_buffer_init(pm_buffer_t *buffer); +PRISM_EXPORTED_FUNCTION void pm_buffer_free(pm_buffer_t *buffer); /** * Return the value of the buffer. @@ -63,13 +55,4 @@ PRISM_EXPORTED_FUNCTION char * pm_buffer_value(const pm_buffer_t *buffer); */ PRISM_EXPORTED_FUNCTION size_t pm_buffer_length(const pm_buffer_t *buffer); -/** - * Free the memory held by the buffer. - * - * @param buffer The buffer whose held memory should be freed. - * - * \public \memberof pm_buffer_t - */ -PRISM_EXPORTED_FUNCTION void pm_buffer_cleanup(pm_buffer_t *buffer); - #endif diff --git a/include/prism/internal/buffer.h b/include/prism/internal/buffer.h index 1d7ee93762..8eb0c7e243 100644 --- a/include/prism/internal/buffer.h +++ b/include/prism/internal/buffer.h @@ -10,16 +10,37 @@ #include "prism/buffer.h" +#include #include +/** + * A simple memory buffer that stores data in a contiguous block of memory. + */ +struct pm_buffer_t { + /** The length of the buffer in bytes. */ + size_t length; + + /** The capacity of the buffer in bytes that has been allocated. */ + size_t capacity; + + /** A pointer to the start of the buffer. */ + char *value; +}; + /** * Initialize a pm_buffer_t with the given capacity. * * @param buffer The buffer to initialize. * @param capacity The capacity of the buffer. - * @returns True if the buffer was initialized successfully, false otherwise. */ -bool pm_buffer_init_capacity(pm_buffer_t *buffer, size_t capacity); +void pm_buffer_init(pm_buffer_t *buffer, size_t capacity); + +/** + * Free the memory held by the buffer. + * + * @param buffer The buffer whose held memory should be freed. + */ +void pm_buffer_cleanup(pm_buffer_t *buffer); /** * Append the given amount of space as zeroes to the buffer. diff --git a/javascript/src/parsePrism.js b/javascript/src/parsePrism.js index af6d70b841..615cc83d13 100644 --- a/javascript/src/parsePrism.js +++ b/javascript/src/parsePrism.js @@ -31,9 +31,7 @@ export function parsePrism(prism, source, options = {}) { const packedOptions = dumpOptions(options); const optionsPointer = prism.calloc(1, packedOptions.length); - - const bufferPointer = prism.calloc(prism.pm_buffer_sizeof(), 1); - prism.pm_buffer_init(bufferPointer); + const bufferPointer = prism.pm_buffer_new(); const sourceView = new Uint8Array(prism.memory.buffer, sourcePointer, sourceArray.length); sourceView.set(sourceArray); @@ -45,9 +43,8 @@ export function parsePrism(prism, source, options = {}) { const serializedView = new Uint8Array(prism.memory.buffer, prism.pm_buffer_value(bufferPointer), prism.pm_buffer_length(bufferPointer)); const result = deserialize(serializedView); - prism.pm_buffer_cleanup(bufferPointer); + prism.pm_buffer_free(bufferPointer); prism.free(sourcePointer); - prism.free(bufferPointer); prism.free(optionsPointer); return result; } diff --git a/lib/prism/ffi.rb b/lib/prism/ffi.rb index 4419213162..e9682c527c 100644 --- a/lib/prism/ffi.rb +++ b/lib/prism/ffi.rb @@ -107,11 +107,10 @@ def self.load_exported_functions_from(header, *functions, callbacks) load_exported_functions_from( "prism/buffer.h", - "pm_buffer_sizeof", - "pm_buffer_init", + "pm_buffer_new", "pm_buffer_value", "pm_buffer_length", - "pm_buffer_cleanup", + "pm_buffer_free", [] ) @@ -128,8 +127,6 @@ def self.load_exported_functions_from(header, *functions, callbacks) # This object represents a pm_buffer_t. We only use it as an opaque pointer, # so it doesn't need to know the fields of pm_buffer_t. class PrismBuffer # :nodoc: - SIZEOF = LibRubyParser.pm_buffer_sizeof - attr_reader :pointer def initialize(pointer) @@ -151,11 +148,13 @@ def read # Initialize a new buffer and yield it to the block. The buffer will be # automatically freed when the block returns. def self.with - FFI::MemoryPointer.new(SIZEOF) do |pointer| - raise unless LibRubyParser.pm_buffer_init(pointer) - return yield new(pointer) + buffer = LibRubyParser.pm_buffer_new + raise unless buffer + + begin + yield new(buffer) ensure - LibRubyParser.pm_buffer_cleanup(pointer) + LibRubyParser.pm_buffer_free(buffer) end end end diff --git a/src/buffer.c b/src/buffer.c index 9612cb4802..cb3b9a4fe8 100644 --- a/src/buffer.c +++ b/src/buffer.c @@ -11,32 +11,28 @@ #include #include -/** - * Return the size of the pm_buffer_t struct. - */ -size_t -pm_buffer_sizeof(void) { - return sizeof(pm_buffer_t); -} - /** * Initialize a pm_buffer_t with the given capacity. */ -bool -pm_buffer_init_capacity(pm_buffer_t *buffer, size_t capacity) { +void +pm_buffer_init(pm_buffer_t *buffer, size_t capacity) { buffer->length = 0; buffer->capacity = capacity; buffer->value = (char *) xmalloc(capacity); - return buffer->value != NULL; + if (buffer->value == NULL) abort(); } /** - * Initialize a pm_buffer_t with its default values. + * Allocate and initialize a new buffer. */ -bool -pm_buffer_init(pm_buffer_t *buffer) { - return pm_buffer_init_capacity(buffer, 1024); +pm_buffer_t * +pm_buffer_new(void) { + pm_buffer_t *buffer = (pm_buffer_t *) xmalloc(sizeof(pm_buffer_t)); + if (buffer == NULL) abort(); + + pm_buffer_init(buffer, 1024); + return buffer; } /** @@ -367,3 +363,12 @@ void pm_buffer_cleanup(pm_buffer_t *buffer) { xfree_sized(buffer->value, buffer->capacity); } + +/** + * Free both the memory held by the buffer and the buffer itself. + */ +void +pm_buffer_free(pm_buffer_t *buffer) { + pm_buffer_cleanup(buffer); + xfree_sized(buffer, sizeof(pm_buffer_t)); +} diff --git a/src/prism.c b/src/prism.c index 6f256b4a36..db1890a5be 100644 --- a/src/prism.c +++ b/src/prism.c @@ -9439,7 +9439,7 @@ lex_question_mark(pm_parser_t *parser) { lex_state_set(parser, PM_LEX_STATE_END); pm_buffer_t buffer; - pm_buffer_init_capacity(&buffer, 3); + pm_buffer_init(&buffer, 3); escape_read(parser, &buffer, NULL, PM_ESCAPE_FLAG_SINGLE); @@ -9825,7 +9825,7 @@ static void pm_token_buffer_escape(pm_parser_t *parser, pm_token_buffer_t *token_buffer) { const uint8_t *start; if (token_buffer->cursor == NULL) { - pm_buffer_init_capacity(&token_buffer->buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE); + pm_buffer_init(&token_buffer->buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE); start = parser->current.start; } else { start = token_buffer->cursor; @@ -9842,8 +9842,8 @@ static void pm_regexp_token_buffer_escape(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) { const uint8_t *start; if (token_buffer->base.cursor == NULL) { - pm_buffer_init_capacity(&token_buffer->base.buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE); - pm_buffer_init_capacity(&token_buffer->regexp_buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE); + pm_buffer_init(&token_buffer->base.buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE); + pm_buffer_init(&token_buffer->regexp_buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE); start = parser->current.start; } else { start = token_buffer->base.cursor; @@ -22734,8 +22734,6 @@ pm_parse_stream_read(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t */ pm_node_t * pm_parse_stream(pm_arena_t *arena, pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const pm_options_t *options) { - pm_buffer_init(buffer); - bool eof = pm_parse_stream_read(buffer, stream, stream_fgets, stream_feof); pm_parser_init(arena, parser, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options); @@ -22839,13 +22837,13 @@ pm_serialize_parse_stream(pm_buffer_t *buffer, void *stream, pm_parse_stream_fge pm_options_t options = { 0 }; pm_options_read(&options, data); - pm_buffer_t parser_buffer; - pm_node_t *node = pm_parse_stream(&arena, &parser, &parser_buffer, stream, stream_fgets, stream_feof, &options); + pm_buffer_t *parser_buffer = pm_buffer_new(); + pm_node_t *node = pm_parse_stream(&arena, &parser, parser_buffer, stream, stream_fgets, stream_feof, &options); pm_serialize_header(buffer); pm_serialize_content(&parser, node, buffer); pm_buffer_append_byte(buffer, '\0'); - pm_buffer_cleanup(&parser_buffer); + pm_buffer_free(parser_buffer); pm_parser_free(&parser); pm_arena_free(&arena); pm_options_free(&options); From b1be4b451dea9f5cd6ec8462d535a3aa819b602a Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 17 Mar 2026 15:01:52 -0400 Subject: [PATCH 041/100] pm_parser_free -> pm_parser_cleanup --- cpp/test.cpp | 2 +- ext/prism/extension.c | 14 +++++++------- include/prism.h | 12 ++++++------ include/prism/node.h | 2 +- rust/ruby-prism-sys/build/main.rs | 2 +- rust/ruby-prism-sys/tests/node_tests.rs | 4 ++-- rust/ruby-prism-sys/tests/parser_tests.rs | 8 ++++---- rust/ruby-prism/src/parse_result/mod.rs | 4 ++-- src/prism.c | 12 ++++++------ templates/src/serialize.c.erb | 4 ++-- 10 files changed, 32 insertions(+), 32 deletions(-) diff --git a/cpp/test.cpp b/cpp/test.cpp index f41ac83c61..19a5731de9 100644 --- a/cpp/test.cpp +++ b/cpp/test.cpp @@ -18,7 +18,7 @@ int main() { std::cout << view << std::endl; pm_buffer_free(buffer); - pm_parser_free(&parser); + pm_parser_cleanup(&parser); pm_arena_free(&arena); return 0; diff --git a/ext/prism/extension.c b/ext/prism/extension.c index d39f8abe7a..ad80a35804 100644 --- a/ext/prism/extension.c +++ b/ext/prism/extension.c @@ -383,7 +383,7 @@ dump_input(pm_string_t *input, const pm_options_t *options) { VALUE result = rb_str_new(pm_buffer_value(buffer), pm_buffer_length(buffer)); pm_buffer_free(buffer); - pm_parser_free(&parser); + pm_parser_cleanup(&parser); pm_arena_free(&arena); return result; @@ -792,7 +792,7 @@ parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nod result = parse_result_create(rb_cPrismLexResult, &parser, parse_lex_data.tokens, parse_lex_data.encoding, source, options->freeze); } - pm_parser_free(&parser); + pm_parser_cleanup(&parser); pm_arena_free(&arena); return result; @@ -866,7 +866,7 @@ parse_input(pm_string_t *input, const pm_options_t *options) { rb_obj_freeze(source); } - pm_parser_free(&parser); + pm_parser_cleanup(&parser); pm_arena_free(&arena); return result; @@ -974,7 +974,7 @@ profile_input(pm_string_t *input, const pm_options_t *options) { pm_parser_init(&arena, &parser, pm_string_source(input), pm_string_length(input), options); pm_parse(&parser); - pm_parser_free(&parser); + pm_parser_cleanup(&parser); pm_arena_free(&arena); } @@ -1082,7 +1082,7 @@ parse_stream(int argc, VALUE *argv, VALUE self) { VALUE result = parse_result_create(rb_cPrismParseResult, &parser, value, encoding, source, options.freeze); pm_buffer_free(buffer); - pm_parser_free(&parser); + pm_parser_cleanup(&parser); pm_arena_free(&arena); return result; @@ -1103,7 +1103,7 @@ parse_input_comments(pm_string_t *input, const pm_options_t *options) { VALUE source = pm_source_new(&parser, encoding, options->freeze); VALUE comments = parser_comments(&parser, source, options->freeze); - pm_parser_free(&parser); + pm_parser_cleanup(&parser); pm_arena_free(&arena); return comments; @@ -1223,7 +1223,7 @@ parse_input_success_p(pm_string_t *input, const pm_options_t *options) { pm_parse(&parser); VALUE result = parser.error_list.size == 0 ? Qtrue : Qfalse; - pm_parser_free(&parser); + pm_parser_cleanup(&parser); pm_arena_free(&arena); return result; diff --git a/include/prism.h b/include/prism.h index 80dd579829..2005c57a77 100644 --- a/include/prism.h +++ b/include/prism.h @@ -44,8 +44,8 @@ PRISM_EXPORTED_FUNCTION const char * pm_version(void); /** * Initialize a parser with the given start and end pointers. * - * The resulting parser must eventually be freed with `pm_parser_free()`. The - * arena is caller-owned and must outlive the parser — `pm_parser_free()` does + * The resulting parser must eventually be freed with `pm_parser_cleanup()`. The + * arena is caller-owned and must outlive the parser — `pm_parser_cleanup()` does * not free the arena. * * @param arena The arena to use for all AST-lifetime allocations. @@ -80,7 +80,7 @@ PRISM_EXPORTED_FUNCTION void pm_parser_register_encoding_changed_callback(pm_par * * \public \memberof pm_parser */ -PRISM_EXPORTED_FUNCTION void pm_parser_free(pm_parser_t *parser); +PRISM_EXPORTED_FUNCTION void pm_parser_cleanup(pm_parser_t *parser); /** * Initiate the parser with the given parser. @@ -333,7 +333,7 @@ PRISM_EXPORTED_FUNCTION pm_string_query_t pm_string_query_method_name(const uint * * `pm_parser_t` - the main parser structure * * `pm_parser_init()` - initialize a parser * * `pm_parse()` - parse and return the root node - * * `pm_parser_free()` - free the internal memory of the parser + * * `pm_parser_cleanup()` - free the internal memory of the parser * * `pm_arena_free()` - free all AST-lifetime memory * * Putting all of this together would look something like: @@ -347,7 +347,7 @@ PRISM_EXPORTED_FUNCTION pm_string_query_t pm_string_query_method_name(const uint * pm_node_t *root = pm_parse(&parser); * printf("PARSED!\n"); * - * pm_parser_free(&parser); + * pm_parser_cleanup(&parser); * pm_arena_free(&arena); * } * ``` @@ -401,7 +401,7 @@ PRISM_EXPORTED_FUNCTION pm_string_query_t pm_string_query_method_name(const uint * printf("%*.s\n", (int) pm_buffer_length(buffer), pm_buffer_value(buffer)); * * pm_buffer_free(buffer); - * pm_parser_free(&parser); + * pm_parser_cleanup(&parser); * pm_arena_free(&arena); * } * ``` diff --git a/include/prism/node.h b/include/prism/node.h index 44a3134bc4..2390dc7290 100644 --- a/include/prism/node.h +++ b/include/prism/node.h @@ -92,7 +92,7 @@ PRISM_EXPORTED_FUNCTION const char * pm_node_type_to_str(pm_node_type_t node_typ * size_t *data = &indent; * pm_visit_node(node, visit, data); * - * pm_parser_free(&parser); + * pm_parser_cleanup(&parser); * pm_arena_free(&arena); * return EXIT_SUCCESS; * } diff --git a/rust/ruby-prism-sys/build/main.rs b/rust/ruby-prism-sys/build/main.rs index 18441ed623..3bbf4fbbeb 100644 --- a/rust/ruby-prism-sys/build/main.rs +++ b/rust/ruby-prism-sys/build/main.rs @@ -163,7 +163,7 @@ fn generate_bindings(ruby_include_path: &Path) -> bindgen::Bindings { .allowlist_function("pm_options_scope_local_get") .allowlist_function("pm_options_scopes_init") .allowlist_function("pm_parse") - .allowlist_function("pm_parser_free") + .allowlist_function("pm_parser_cleanup") .allowlist_function("pm_parser_init") .allowlist_function("pm_size_to_native") .allowlist_function("pm_string_constant_init") diff --git a/rust/ruby-prism-sys/tests/node_tests.rs b/rust/ruby-prism-sys/tests/node_tests.rs index 7c7c13629c..0de0eabe49 100644 --- a/rust/ruby-prism-sys/tests/node_tests.rs +++ b/rust/ruby-prism-sys/tests/node_tests.rs @@ -1,7 +1,7 @@ use std::{ffi::CString, mem::MaybeUninit}; use ruby_prism_sys::{pm_arena_free, pm_arena_t, pm_node_type}; -use ruby_prism_sys::{pm_parse, pm_parser_free, pm_parser_init, pm_parser_t}; +use ruby_prism_sys::{pm_parse, pm_parser_cleanup, pm_parser_init, pm_parser_t}; #[test] fn node_test() { @@ -23,7 +23,7 @@ fn node_test() { assert_eq!((*parsed_node).type_, pm_node_type::PM_PROGRAM_NODE as u16); - pm_parser_free(parser); + pm_parser_cleanup(parser); pm_arena_free(arena.as_mut_ptr()); } } diff --git a/rust/ruby-prism-sys/tests/parser_tests.rs b/rust/ruby-prism-sys/tests/parser_tests.rs index d6f827d144..3d85ef06de 100644 --- a/rust/ruby-prism-sys/tests/parser_tests.rs +++ b/rust/ruby-prism-sys/tests/parser_tests.rs @@ -5,7 +5,7 @@ use std::{ }; use ruby_prism_sys::{ - pm_arena_free, pm_arena_t, pm_comment_t, pm_comment_type_t, pm_diagnostic_t, pm_parse, pm_parser_free, + pm_arena_free, pm_arena_t, pm_comment_t, pm_comment_type_t, pm_diagnostic_t, pm_parse, pm_parser_cleanup, pm_parser_init, pm_parser_t, }; @@ -29,7 +29,7 @@ fn init_test() { pm_parser_init(arena.as_mut_ptr(), parser.as_mut_ptr(), source, len, std::ptr::null()); let parser = parser.assume_init_mut(); - pm_parser_free(parser); + pm_parser_cleanup(parser); pm_arena_free(arena.as_mut_ptr()); } } @@ -62,7 +62,7 @@ fn comments_test() { }; assert_eq!(location, 0..7); - pm_parser_free(parser); + pm_parser_cleanup(parser); pm_arena_free(arena.as_mut_ptr()); } } @@ -101,7 +101,7 @@ fn diagnostics_test() { }; assert_eq!(location, 10..10); - pm_parser_free(parser); + pm_parser_cleanup(parser); pm_arena_free(arena.as_mut_ptr()); } } diff --git a/rust/ruby-prism/src/parse_result/mod.rs b/rust/ruby-prism/src/parse_result/mod.rs index b0e7197707..4ac843661e 100644 --- a/rust/ruby-prism/src/parse_result/mod.rs +++ b/rust/ruby-prism/src/parse_result/mod.rs @@ -8,7 +8,7 @@ mod diagnostics; use std::ptr::NonNull; -use ruby_prism_sys::{pm_arena_free, pm_arena_t, pm_comment_t, pm_diagnostic_t, pm_line_offset_list_line_column, pm_location_t, pm_magic_comment_t, pm_node_t, pm_parser_free, pm_parser_t}; +use ruby_prism_sys::{pm_arena_free, pm_arena_t, pm_comment_t, pm_diagnostic_t, pm_line_offset_list_line_column, pm_location_t, pm_magic_comment_t, pm_node_t, pm_parser_cleanup, pm_parser_t}; pub use self::comments::{Comment, CommentType, Comments, MagicComment, MagicComments}; pub use self::diagnostics::{Diagnostic, Diagnostics}; @@ -260,7 +260,7 @@ impl<'pr> ParseResult<'pr> { impl Drop for ParseResult<'_> { fn drop(&mut self) { unsafe { - pm_parser_free(self.parser.as_ptr()); + pm_parser_cleanup(self.parser.as_ptr()); drop(Box::from_raw(self.parser.as_ptr())); pm_arena_free(self.arena.as_mut()); } diff --git a/src/prism.c b/src/prism.c index db1890a5be..ee5866a341 100644 --- a/src/prism.c +++ b/src/prism.c @@ -22483,7 +22483,7 @@ pm_parser_register_encoding_changed_callback(pm_parser_t *parser, pm_encoding_ch * Free any memory associated with the given parser. */ void -pm_parser_free(pm_parser_t *parser) { +pm_parser_cleanup(pm_parser_t *parser) { pm_string_free(&parser->filepath); pm_arena_free(&parser->metadata_arena); @@ -22742,7 +22742,7 @@ pm_parse_stream(pm_arena_t *arena, pm_parser_t *parser, pm_buffer_t *buffer, voi while (!eof && parser->error_list.size > 0) { eof = pm_parse_stream_read(buffer, stream, stream_fgets, stream_feof); - pm_parser_free(parser); + pm_parser_cleanup(parser); pm_arena_free(arena); pm_parser_init(arena, parser, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options); node = pm_parse(parser); @@ -22766,7 +22766,7 @@ pm_parse_success_p(const uint8_t *source, size_t size, const char *data) { pm_parse(&parser); bool result = parser.error_list.size == 0; - pm_parser_free(&parser); + pm_parser_cleanup(&parser); pm_arena_free(&arena); pm_options_free(&options); @@ -22821,7 +22821,7 @@ pm_serialize_parse(pm_buffer_t *buffer, const uint8_t *source, size_t size, cons pm_serialize_content(&parser, node, buffer); pm_buffer_append_byte(buffer, '\0'); - pm_parser_free(&parser); + pm_parser_cleanup(&parser); pm_arena_free(&arena); pm_options_free(&options); } @@ -22844,7 +22844,7 @@ pm_serialize_parse_stream(pm_buffer_t *buffer, void *stream, pm_parse_stream_fge pm_buffer_append_byte(buffer, '\0'); pm_buffer_free(parser_buffer); - pm_parser_free(&parser); + pm_parser_cleanup(&parser); pm_arena_free(&arena); pm_options_free(&options); } @@ -22867,7 +22867,7 @@ pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t s pm_buffer_append_varsint(buffer, parser.start_line); pm_serialize_comment_list(&parser.comment_list, buffer); - pm_parser_free(&parser); + pm_parser_cleanup(&parser); pm_arena_free(&arena); pm_options_free(&options); } diff --git a/templates/src/serialize.c.erb b/templates/src/serialize.c.erb index a69e38f010..69ee654242 100644 --- a/templates/src/serialize.c.erb +++ b/templates/src/serialize.c.erb @@ -338,7 +338,7 @@ pm_serialize_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, const pm_serialize_metadata(&parser, buffer); - pm_parser_free(&parser); + pm_parser_cleanup(&parser); pm_arena_free(&arena); pm_options_free(&options); } @@ -367,7 +367,7 @@ pm_serialize_parse_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, pm_buffer_append_byte(buffer, 0); pm_serialize(&parser, node, buffer); - pm_parser_free(&parser); + pm_parser_cleanup(&parser); pm_arena_free(&arena); pm_options_free(&options); } From 88a247a4863c39579ca02d3c5269f15e8946211f Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 17 Mar 2026 15:13:08 -0400 Subject: [PATCH 042/100] Move string query into its own file --- include/prism.h | 48 +--------- include/prism/string_query.h | 62 +++++++++++++ prism.gemspec | 2 + src/prism.c | 163 ---------------------------------- src/string_query.c | 166 +++++++++++++++++++++++++++++++++++ 5 files changed, 231 insertions(+), 210 deletions(-) create mode 100644 include/prism/string_query.h create mode 100644 src/string_query.c diff --git a/include/prism.h b/include/prism.h index 2005c57a77..6e5b2dd083 100644 --- a/include/prism.h +++ b/include/prism.h @@ -17,6 +17,7 @@ extern "C" { #include "prism/options.h" #include "prism/parser.h" #include "prism/prettyprint.h" +#include "prism/string_query.h" #include "prism/version.h" #include @@ -257,53 +258,6 @@ PRISM_EXPORTED_FUNCTION void pm_dump_json(pm_buffer_t *buffer, const pm_parser_t #endif -/** - * Represents the results of a slice query. - */ -typedef enum { - /** Returned if the encoding given to a slice query was invalid. */ - PM_STRING_QUERY_ERROR = -1, - - /** Returned if the result of the slice query is false. */ - PM_STRING_QUERY_FALSE, - - /** Returned if the result of the slice query is true. */ - PM_STRING_QUERY_TRUE -} pm_string_query_t; - -/** - * Check that the slice is a valid local variable name. - * - * @param source The source to check. - * @param length The length of the source. - * @param encoding_name The name of the encoding of the source. - * @return PM_STRING_QUERY_TRUE if the query is true, PM_STRING_QUERY_FALSE if - * the query is false, and PM_STRING_QUERY_ERROR if the encoding was invalid. - */ -PRISM_EXPORTED_FUNCTION pm_string_query_t pm_string_query_local(const uint8_t *source, size_t length, const char *encoding_name); - -/** - * Check that the slice is a valid constant name. - * - * @param source The source to check. - * @param length The length of the source. - * @param encoding_name The name of the encoding of the source. - * @return PM_STRING_QUERY_TRUE if the query is true, PM_STRING_QUERY_FALSE if - * the query is false, and PM_STRING_QUERY_ERROR if the encoding was invalid. - */ -PRISM_EXPORTED_FUNCTION pm_string_query_t pm_string_query_constant(const uint8_t *source, size_t length, const char *encoding_name); - -/** - * Check that the slice is a valid method name. - * - * @param source The source to check. - * @param length The length of the source. - * @param encoding_name The name of the encoding of the source. - * @return PM_STRING_QUERY_TRUE if the query is true, PM_STRING_QUERY_FALSE if - * the query is false, and PM_STRING_QUERY_ERROR if the encoding was invalid. - */ -PRISM_EXPORTED_FUNCTION pm_string_query_t pm_string_query_method_name(const uint8_t *source, size_t length, const char *encoding_name); - /** * @mainpage * diff --git a/include/prism/string_query.h b/include/prism/string_query.h new file mode 100644 index 0000000000..55c6c82697 --- /dev/null +++ b/include/prism/string_query.h @@ -0,0 +1,62 @@ +/** + * @file string_query.h + * + * Functions for querying properties of strings, such as whether they are valid + * local variable names, constant names, or method names. + */ +#ifndef PRISM_STRING_QUERY_H +#define PRISM_STRING_QUERY_H + +#include "prism/compiler/exported.h" + +#include +#include + +/** + * Represents the results of a slice query. + */ +typedef enum { + /** Returned if the encoding given to a slice query was invalid. */ + PM_STRING_QUERY_ERROR = -1, + + /** Returned if the result of the slice query is false. */ + PM_STRING_QUERY_FALSE, + + /** Returned if the result of the slice query is true. */ + PM_STRING_QUERY_TRUE +} pm_string_query_t; + +/** + * Check that the slice is a valid local variable name. + * + * @param source The source to check. + * @param length The length of the source. + * @param encoding_name The name of the encoding of the source. + * @return PM_STRING_QUERY_TRUE if the query is true, PM_STRING_QUERY_FALSE if + * the query is false, and PM_STRING_QUERY_ERROR if the encoding was invalid. + */ +PRISM_EXPORTED_FUNCTION pm_string_query_t pm_string_query_local(const uint8_t *source, size_t length, const char *encoding_name); + +/** + * Check that the slice is a valid constant name. + * + * @param source The source to check. + * @param length The length of the source. + * @param encoding_name The name of the encoding of the source. + * @return PM_STRING_QUERY_TRUE if the query is true, PM_STRING_QUERY_FALSE if + * the query is false, and PM_STRING_QUERY_ERROR if the encoding was invalid. + */ +PRISM_EXPORTED_FUNCTION pm_string_query_t pm_string_query_constant(const uint8_t *source, size_t length, const char *encoding_name); + +/** + * Check that the slice is a valid method name. + * + * @param source The source to check. + * @param length The length of the source. + * @param encoding_name The name of the encoding of the source. + * @return PM_STRING_QUERY_TRUE if the query is true, PM_STRING_QUERY_FALSE if + * the query is false, and PM_STRING_QUERY_ERROR if the encoding was invalid. + */ +PRISM_EXPORTED_FUNCTION pm_string_query_t pm_string_query_method_name(const uint8_t *source, size_t length, const char *encoding_name); + +#endif diff --git a/prism.gemspec b/prism.gemspec index f407e2f952..762ab7b590 100644 --- a/prism.gemspec +++ b/prism.gemspec @@ -93,6 +93,7 @@ Gem::Specification.new do |spec| "include/prism/parser.h", "include/prism/prettyprint.h", "include/prism/static_literals.h", + "include/prism/string_query.h", "include/prism/strings.h", "include/prism/version.h", "lib/prism.rb", @@ -201,6 +202,7 @@ Gem::Specification.new do |spec| "src/regexp.c", "src/serialize.c", "src/static_literals.c", + "src/string_query.c", "src/strings.c", "src/strncasecmp.c", "src/strpbrk.c", diff --git a/src/prism.c b/src/prism.c index ee5866a341..aee3e63599 100644 --- a/src/prism.c +++ b/src/prism.c @@ -22873,166 +22873,3 @@ pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t s } #endif - -/******************************************************************************/ -/* Slice queries for the Ruby API */ -/******************************************************************************/ - -/** The category of slice returned from pm_slice_type. */ -typedef enum { - /** Returned when the given encoding name is invalid. */ - PM_SLICE_TYPE_ERROR = -1, - - /** Returned when no other types apply to the slice. */ - PM_SLICE_TYPE_NONE, - - /** Returned when the slice is a valid local variable name. */ - PM_SLICE_TYPE_LOCAL, - - /** Returned when the slice is a valid constant name. */ - PM_SLICE_TYPE_CONSTANT, - - /** Returned when the slice is a valid method name. */ - PM_SLICE_TYPE_METHOD_NAME -} pm_slice_type_t; - -/** - * Check that the slice is a valid local variable name or constant. - */ -pm_slice_type_t -pm_slice_type(const uint8_t *source, size_t length, const char *encoding_name) { - // first, get the right encoding object - const pm_encoding_t *encoding = pm_encoding_find((const uint8_t *) encoding_name, (const uint8_t *) (encoding_name + strlen(encoding_name))); - if (encoding == NULL) return PM_SLICE_TYPE_ERROR; - - // check that there is at least one character - if (length == 0) return PM_SLICE_TYPE_NONE; - - size_t width; - if ((width = encoding->alpha_char(source, (ptrdiff_t) length)) != 0) { - // valid because alphabetical - } else if (*source == '_') { - // valid because underscore - width = 1; - } else if ((*source >= 0x80) && ((width = encoding->char_width(source, (ptrdiff_t) length)) > 0)) { - // valid because multibyte - } else { - // invalid because no match - return PM_SLICE_TYPE_NONE; - } - - // determine the type of the slice based on the first character - const uint8_t *end = source + length; - pm_slice_type_t result = encoding->isupper_char(source, end - source) ? PM_SLICE_TYPE_CONSTANT : PM_SLICE_TYPE_LOCAL; - - // next, iterate through all of the bytes of the string to ensure that they - // are all valid identifier characters - source += width; - - while (source < end) { - if ((width = encoding->alnum_char(source, end - source)) != 0) { - // valid because alphanumeric - source += width; - } else if (*source == '_') { - // valid because underscore - source++; - } else if ((*source >= 0x80) && ((width = encoding->char_width(source, end - source)) > 0)) { - // valid because multibyte - source += width; - } else { - // invalid because no match - break; - } - } - - // accept a ! or ? at the end of the slice as a method name - if (*source == '!' || *source == '?' || *source == '=') { - source++; - result = PM_SLICE_TYPE_METHOD_NAME; - } - - // valid if we are at the end of the slice - return source == end ? result : PM_SLICE_TYPE_NONE; -} - -/** - * Check that the slice is a valid local variable name. - */ -pm_string_query_t -pm_string_query_local(const uint8_t *source, size_t length, const char *encoding_name) { - switch (pm_slice_type(source, length, encoding_name)) { - case PM_SLICE_TYPE_ERROR: - return PM_STRING_QUERY_ERROR; - case PM_SLICE_TYPE_NONE: - case PM_SLICE_TYPE_CONSTANT: - case PM_SLICE_TYPE_METHOD_NAME: - return PM_STRING_QUERY_FALSE; - case PM_SLICE_TYPE_LOCAL: - return PM_STRING_QUERY_TRUE; - } - - assert(false && "unreachable"); - return PM_STRING_QUERY_FALSE; -} - -/** - * Check that the slice is a valid constant name. - */ -pm_string_query_t -pm_string_query_constant(const uint8_t *source, size_t length, const char *encoding_name) { - switch (pm_slice_type(source, length, encoding_name)) { - case PM_SLICE_TYPE_ERROR: - return PM_STRING_QUERY_ERROR; - case PM_SLICE_TYPE_NONE: - case PM_SLICE_TYPE_LOCAL: - case PM_SLICE_TYPE_METHOD_NAME: - return PM_STRING_QUERY_FALSE; - case PM_SLICE_TYPE_CONSTANT: - return PM_STRING_QUERY_TRUE; - } - - assert(false && "unreachable"); - return PM_STRING_QUERY_FALSE; -} - -/** - * Check that the slice is a valid method name. - */ -pm_string_query_t -pm_string_query_method_name(const uint8_t *source, size_t length, const char *encoding_name) { -#define B(p) ((p) ? PM_STRING_QUERY_TRUE : PM_STRING_QUERY_FALSE) -#define C1(c) (*source == c) -#define C2(s) (memcmp(source, s, 2) == 0) -#define C3(s) (memcmp(source, s, 3) == 0) - - switch (pm_slice_type(source, length, encoding_name)) { - case PM_SLICE_TYPE_ERROR: - return PM_STRING_QUERY_ERROR; - case PM_SLICE_TYPE_NONE: - break; - case PM_SLICE_TYPE_LOCAL: - // numbered parameters are not valid method names - return B((length != 2) || (source[0] != '_') || (source[1] == '0') || !pm_char_is_decimal_digit(source[1])); - case PM_SLICE_TYPE_CONSTANT: - // all constants are valid method names - case PM_SLICE_TYPE_METHOD_NAME: - // all method names are valid method names - return PM_STRING_QUERY_TRUE; - } - - switch (length) { - case 1: - return B(C1('&') || C1('`') || C1('!') || C1('^') || C1('>') || C1('<') || C1('-') || C1('%') || C1('|') || C1('+') || C1('/') || C1('*') || C1('~')); - case 2: - return B(C2("!=") || C2("!~") || C2("[]") || C2("==") || C2("=~") || C2(">=") || C2(">>") || C2("<=") || C2("<<") || C2("**")); - case 3: - return B(C3("===") || C3("<=>") || C3("[]=")); - default: - return PM_STRING_QUERY_FALSE; - } - -#undef B -#undef C1 -#undef C2 -#undef C3 -} diff --git a/src/string_query.c b/src/string_query.c new file mode 100644 index 0000000000..a3be418b75 --- /dev/null +++ b/src/string_query.c @@ -0,0 +1,166 @@ +#include "prism/string_query.h" + +#include "prism/internal/char.h" +#include "prism/internal/encoding.h" + +#include +#include + +/** The category of slice returned from pm_slice_type. */ +typedef enum { + /** Returned when the given encoding name is invalid. */ + PM_SLICE_TYPE_ERROR = -1, + + /** Returned when no other types apply to the slice. */ + PM_SLICE_TYPE_NONE, + + /** Returned when the slice is a valid local variable name. */ + PM_SLICE_TYPE_LOCAL, + + /** Returned when the slice is a valid constant name. */ + PM_SLICE_TYPE_CONSTANT, + + /** Returned when the slice is a valid method name. */ + PM_SLICE_TYPE_METHOD_NAME +} pm_slice_type_t; + +/** + * Check that the slice is a valid local variable name or constant. + */ +pm_slice_type_t +pm_slice_type(const uint8_t *source, size_t length, const char *encoding_name) { + // first, get the right encoding object + const pm_encoding_t *encoding = pm_encoding_find((const uint8_t *) encoding_name, (const uint8_t *) (encoding_name + strlen(encoding_name))); + if (encoding == NULL) return PM_SLICE_TYPE_ERROR; + + // check that there is at least one character + if (length == 0) return PM_SLICE_TYPE_NONE; + + size_t width; + if ((width = encoding->alpha_char(source, (ptrdiff_t) length)) != 0) { + // valid because alphabetical + } else if (*source == '_') { + // valid because underscore + width = 1; + } else if ((*source >= 0x80) && ((width = encoding->char_width(source, (ptrdiff_t) length)) > 0)) { + // valid because multibyte + } else { + // invalid because no match + return PM_SLICE_TYPE_NONE; + } + + // determine the type of the slice based on the first character + const uint8_t *end = source + length; + pm_slice_type_t result = encoding->isupper_char(source, end - source) ? PM_SLICE_TYPE_CONSTANT : PM_SLICE_TYPE_LOCAL; + + // next, iterate through all of the bytes of the string to ensure that they + // are all valid identifier characters + source += width; + + while (source < end) { + if ((width = encoding->alnum_char(source, end - source)) != 0) { + // valid because alphanumeric + source += width; + } else if (*source == '_') { + // valid because underscore + source++; + } else if ((*source >= 0x80) && ((width = encoding->char_width(source, end - source)) > 0)) { + // valid because multibyte + source += width; + } else { + // invalid because no match + break; + } + } + + // accept a ! or ? at the end of the slice as a method name + if (*source == '!' || *source == '?' || *source == '=') { + source++; + result = PM_SLICE_TYPE_METHOD_NAME; + } + + // valid if we are at the end of the slice + return source == end ? result : PM_SLICE_TYPE_NONE; +} + +/** + * Check that the slice is a valid local variable name. + */ +pm_string_query_t +pm_string_query_local(const uint8_t *source, size_t length, const char *encoding_name) { + switch (pm_slice_type(source, length, encoding_name)) { + case PM_SLICE_TYPE_ERROR: + return PM_STRING_QUERY_ERROR; + case PM_SLICE_TYPE_NONE: + case PM_SLICE_TYPE_CONSTANT: + case PM_SLICE_TYPE_METHOD_NAME: + return PM_STRING_QUERY_FALSE; + case PM_SLICE_TYPE_LOCAL: + return PM_STRING_QUERY_TRUE; + } + + assert(false && "unreachable"); + return PM_STRING_QUERY_FALSE; +} + +/** + * Check that the slice is a valid constant name. + */ +pm_string_query_t +pm_string_query_constant(const uint8_t *source, size_t length, const char *encoding_name) { + switch (pm_slice_type(source, length, encoding_name)) { + case PM_SLICE_TYPE_ERROR: + return PM_STRING_QUERY_ERROR; + case PM_SLICE_TYPE_NONE: + case PM_SLICE_TYPE_LOCAL: + case PM_SLICE_TYPE_METHOD_NAME: + return PM_STRING_QUERY_FALSE; + case PM_SLICE_TYPE_CONSTANT: + return PM_STRING_QUERY_TRUE; + } + + assert(false && "unreachable"); + return PM_STRING_QUERY_FALSE; +} + +/** + * Check that the slice is a valid method name. + */ +pm_string_query_t +pm_string_query_method_name(const uint8_t *source, size_t length, const char *encoding_name) { +#define B(p) ((p) ? PM_STRING_QUERY_TRUE : PM_STRING_QUERY_FALSE) +#define C1(c) (*source == c) +#define C2(s) (memcmp(source, s, 2) == 0) +#define C3(s) (memcmp(source, s, 3) == 0) + + switch (pm_slice_type(source, length, encoding_name)) { + case PM_SLICE_TYPE_ERROR: + return PM_STRING_QUERY_ERROR; + case PM_SLICE_TYPE_NONE: + break; + case PM_SLICE_TYPE_LOCAL: + // numbered parameters are not valid method names + return B((length != 2) || (source[0] != '_') || (source[1] == '0') || !pm_char_is_decimal_digit(source[1])); + case PM_SLICE_TYPE_CONSTANT: + // all constants are valid method names + case PM_SLICE_TYPE_METHOD_NAME: + // all method names are valid method names + return PM_STRING_QUERY_TRUE; + } + + switch (length) { + case 1: + return B(C1('&') || C1('`') || C1('!') || C1('^') || C1('>') || C1('<') || C1('-') || C1('%') || C1('|') || C1('+') || C1('/') || C1('*') || C1('~')); + case 2: + return B(C2("!=") || C2("!~") || C2("[]") || C2("==") || C2("=~") || C2(">=") || C2(">>") || C2("<=") || C2("<<") || C2("**")); + case 3: + return B(C3("===") || C3("<=>") || C3("[]=")); + default: + return PM_STRING_QUERY_FALSE; + } + +#undef B +#undef C1 +#undef C2 +#undef C3 +} From 6901e85f937643d39195499eee82cb845acba1fc Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 17 Mar 2026 15:23:52 -0400 Subject: [PATCH 043/100] Trim down prism.h --- include/prism.h | 16 +--------------- src/prism.c | 5 +++++ templates/src/serialize.c.erb | 18 +++++++++++++----- 3 files changed, 19 insertions(+), 20 deletions(-) diff --git a/include/prism.h b/include/prism.h index 6e5b2dd083..68b21122d3 100644 --- a/include/prism.h +++ b/include/prism.h @@ -13,6 +13,7 @@ extern "C" { #include "prism/arena.h" #include "prism/ast.h" #include "prism/diagnostic.h" +#include "prism/excludes.h" #include "prism/node.h" #include "prism/options.h" #include "prism/parser.h" @@ -20,21 +21,6 @@ extern "C" { #include "prism/string_query.h" #include "prism/version.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifndef _WIN32 -#include -#endif - /** * The prism version and the serialization format. * diff --git a/src/prism.c b/src/prism.c index aee3e63599..ffc9aad8aa 100644 --- a/src/prism.c +++ b/src/prism.c @@ -27,7 +27,12 @@ #include "prism/node_new.h" +#include +#include #include +#include +#include +#include #include /** diff --git a/templates/src/serialize.c.erb b/templates/src/serialize.c.erb index 69ee654242..b62d0bf066 100644 --- a/templates/src/serialize.c.erb +++ b/templates/src/serialize.c.erb @@ -1,17 +1,25 @@ -#include "prism.h" +#include "prism/excludes.h" + +// We optionally support serializing to a binary string. For systems that don't +// want or need this functionality, it can be turned off with the +// PRISM_EXCLUDE_SERIALIZATION define. +#ifndef PRISM_EXCLUDE_SERIALIZATION #include "prism/compiler/inline.h" + #include "prism/internal/buffer.h" #include "prism/internal/encoding.h" #include "prism/internal/list.h" #include "prism/internal/options.h" -// We optionally support serializing to a binary string. For systems that don't -// want or need this functionality, it can be turned off with the -// PRISM_EXCLUDE_SERIALIZATION define. -#ifndef PRISM_EXCLUDE_SERIALIZATION +#include "prism.h" +#include "prism/ast.h" +#include "prism/diagnostic.h" +#include "prism/parser.h" +#include #include +#include static PRISM_INLINE uint32_t pm_ptrdifft_to_u32(ptrdiff_t value) { From 92b48ce940d79f1ac58d8d497c41377644ca11b6 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 17 Mar 2026 15:27:04 -0400 Subject: [PATCH 044/100] pm_string_free -> pm_string_cleanup --- ext/prism/extension.c | 28 ++++++++++++------------ include/prism/strings.h | 10 ++++----- lib/prism/ffi.rb | 4 ++-- rust/ruby-prism-sys/build/main.rs | 2 +- rust/ruby-prism-sys/tests/utils_tests.rs | 6 ++--- src/options.c | 6 ++--- src/prism.c | 6 ++--- src/regexp.c | 2 +- src/strings.c | 6 ++--- 9 files changed, 35 insertions(+), 35 deletions(-) diff --git a/ext/prism/extension.c b/ext/prism/extension.c index ad80a35804..77a0d9dc9b 100644 --- a/ext/prism/extension.c +++ b/ext/prism/extension.c @@ -417,7 +417,7 @@ dump(int argc, VALUE *argv, VALUE self) { xfree_sized(dup, length); #endif - pm_string_free(&input); + pm_string_cleanup(&input); pm_options_free(&options); return value; @@ -440,7 +440,7 @@ dump_file(int argc, VALUE *argv, VALUE self) { file_options(argc, argv, &input, &options, &encoded_filepath); VALUE value = dump_input(&input, &options); - pm_string_free(&input); + pm_string_cleanup(&input); pm_options_free(&options); return value; @@ -813,7 +813,7 @@ lex(int argc, VALUE *argv, VALUE self) { string_options(argc, argv, &input, &options); VALUE result = parse_lex_input(&input, &options, false); - pm_string_free(&input); + pm_string_cleanup(&input); pm_options_free(&options); return result; @@ -836,7 +836,7 @@ lex_file(int argc, VALUE *argv, VALUE self) { file_options(argc, argv, &input, &options, &encoded_filepath); VALUE value = parse_lex_input(&input, &options, false); - pm_string_free(&input); + pm_string_cleanup(&input); pm_options_free(&options); return value; @@ -936,7 +936,7 @@ parse(int argc, VALUE *argv, VALUE self) { xfree_sized(dup, length); #endif - pm_string_free(&input); + pm_string_cleanup(&input); pm_options_free(&options); return value; } @@ -958,7 +958,7 @@ parse_file(int argc, VALUE *argv, VALUE self) { file_options(argc, argv, &input, &options, &encoded_filepath); VALUE value = parse_input(&input, &options); - pm_string_free(&input); + pm_string_cleanup(&input); pm_options_free(&options); return value; @@ -994,7 +994,7 @@ profile(int argc, VALUE *argv, VALUE self) { string_options(argc, argv, &input, &options); profile_input(&input, &options); - pm_string_free(&input); + pm_string_cleanup(&input); pm_options_free(&options); return Qnil; @@ -1018,7 +1018,7 @@ profile_file(int argc, VALUE *argv, VALUE self) { file_options(argc, argv, &input, &options, &encoded_filepath); profile_input(&input, &options); - pm_string_free(&input); + pm_string_cleanup(&input); pm_options_free(&options); return Qnil; @@ -1124,7 +1124,7 @@ parse_comments(int argc, VALUE *argv, VALUE self) { string_options(argc, argv, &input, &options); VALUE result = parse_input_comments(&input, &options); - pm_string_free(&input); + pm_string_cleanup(&input); pm_options_free(&options); return result; @@ -1147,7 +1147,7 @@ parse_file_comments(int argc, VALUE *argv, VALUE self) { file_options(argc, argv, &input, &options, &encoded_filepath); VALUE value = parse_input_comments(&input, &options); - pm_string_free(&input); + pm_string_cleanup(&input); pm_options_free(&options); return value; @@ -1175,7 +1175,7 @@ parse_lex(int argc, VALUE *argv, VALUE self) { string_options(argc, argv, &input, &options); VALUE value = parse_lex_input(&input, &options, true); - pm_string_free(&input); + pm_string_cleanup(&input); pm_options_free(&options); return value; @@ -1205,7 +1205,7 @@ parse_lex_file(int argc, VALUE *argv, VALUE self) { file_options(argc, argv, &input, &options, &encoded_filepath); VALUE value = parse_lex_input(&input, &options, true); - pm_string_free(&input); + pm_string_cleanup(&input); pm_options_free(&options); return value; @@ -1244,7 +1244,7 @@ parse_success_p(int argc, VALUE *argv, VALUE self) { string_options(argc, argv, &input, &options); VALUE result = parse_input_success_p(&input, &options); - pm_string_free(&input); + pm_string_cleanup(&input); pm_options_free(&options); return result; @@ -1280,7 +1280,7 @@ parse_file_success_p(int argc, VALUE *argv, VALUE self) { file_options(argc, argv, &input, &options, &encoded_filepath); VALUE result = parse_input_success_p(&input, &options); - pm_string_free(&input); + pm_string_cleanup(&input); pm_options_free(&options); return result; diff --git a/include/prism/strings.h b/include/prism/strings.h index 6f659fc0e7..e4de068abc 100644 --- a/include/prism/strings.h +++ b/include/prism/strings.h @@ -30,11 +30,11 @@ typedef struct { /** This is a slice of another string, and should not be freed. */ PM_STRING_SHARED, - /** This string owns its memory, and should be freed using `pm_string_free()`. */ + /** This string owns its memory, and should be freed using `pm_string_cleanup()`. */ PM_STRING_OWNED, #ifdef PRISM_HAS_MMAP - /** This string is a memory-mapped file, and should be freed using `pm_string_free()`. */ + /** This string is a memory-mapped file, and should be freed using `pm_string_cleanup()`. */ PM_STRING_MAPPED #endif } type; @@ -82,7 +82,7 @@ typedef enum { /** * Read the file indicated by the filepath parameter into source and load its * contents and size into the given `pm_string_t`. The given `pm_string_t` - * should be freed using `pm_string_free` when it is no longer used. + * should be freed using `pm_string_cleanup` when it is no longer used. * * We want to use demand paging as much as possible in order to avoid having to * read the entire file into memory (which could be detrimental to performance @@ -101,7 +101,7 @@ PRISM_EXPORTED_FUNCTION pm_string_init_result_t pm_string_mapped_init(pm_string_ /** * Read the file indicated by the filepath parameter into source and load its * contents and size into the given `pm_string_t`. The given `pm_string_t` - * should be freed using `pm_string_free` when it is no longer used. + * should be freed using `pm_string_cleanup` when it is no longer used. * * @param string The string to initialize. * @param filepath The filepath to read. @@ -138,6 +138,6 @@ PRISM_EXPORTED_FUNCTION const uint8_t * pm_string_source(const pm_string_t *stri * * \public \memberof pm_string_t */ -PRISM_EXPORTED_FUNCTION void pm_string_free(pm_string_t *string); +PRISM_EXPORTED_FUNCTION void pm_string_cleanup(pm_string_t *string); #endif diff --git a/lib/prism/ffi.rb b/lib/prism/ffi.rb index e9682c527c..5f5c55378a 100644 --- a/lib/prism/ffi.rb +++ b/lib/prism/ffi.rb @@ -117,7 +117,7 @@ def self.load_exported_functions_from(header, *functions, callbacks) load_exported_functions_from( "prism/strings.h", "pm_string_mapped_init", - "pm_string_free", + "pm_string_cleanup", "pm_string_source", "pm_string_length", "pm_string_sizeof", @@ -219,7 +219,7 @@ def self.with_file(filepath) raise "Unknown error initializing pm_string_t: #{result.inspect}" end ensure - LibRubyParser.pm_string_free(pm_string) + LibRubyParser.pm_string_cleanup(pm_string) end end end diff --git a/rust/ruby-prism-sys/build/main.rs b/rust/ruby-prism-sys/build/main.rs index 3bbf4fbbeb..76bbe603f1 100644 --- a/rust/ruby-prism-sys/build/main.rs +++ b/rust/ruby-prism-sys/build/main.rs @@ -167,7 +167,7 @@ fn generate_bindings(ruby_include_path: &Path) -> bindgen::Bindings { .allowlist_function("pm_parser_init") .allowlist_function("pm_size_to_native") .allowlist_function("pm_string_constant_init") - .allowlist_function("pm_string_free") + .allowlist_function("pm_string_cleanup") .allowlist_function("pm_string_length") .allowlist_function("pm_string_source") .allowlist_function("pm_version") diff --git a/rust/ruby-prism-sys/tests/utils_tests.rs b/rust/ruby-prism-sys/tests/utils_tests.rs index 2c42750dd9..20cce8ea2e 100644 --- a/rust/ruby-prism-sys/tests/utils_tests.rs +++ b/rust/ruby-prism-sys/tests/utils_tests.rs @@ -14,7 +14,7 @@ fn version_test() { mod string { use ruby_prism_sys::{ - pm_string_free, pm_string_length, pm_string_source, pm_string_t, pm_string_t__bindgen_ty_1, PM_STRING_CONSTANT, + pm_string_cleanup, pm_string_length, pm_string_source, pm_string_t, pm_string_t__bindgen_ty_1, PM_STRING_CONSTANT, PM_STRING_MAPPED, PM_STRING_OWNED, PM_STRING_SHARED, }; @@ -54,7 +54,7 @@ mod string { let result_start = pm_string_source(&raw const s.pm_string); assert_eq!(s.start_ptr(), result_start); - pm_string_free(&raw mut s.pm_string); + pm_string_cleanup(&raw mut s.pm_string); } } @@ -84,7 +84,7 @@ mod string { let result_start = pm_string_source(&raw const s.pm_string); assert_eq!(s.pm_string.source, result_start); - pm_string_free(&raw mut s.pm_string); + pm_string_cleanup(&raw mut s.pm_string); } } diff --git a/src/options.c b/src/options.c index 39f4f2fea5..4848fa0ac9 100644 --- a/src/options.c +++ b/src/options.c @@ -224,14 +224,14 @@ pm_options_scope_forwarding_set(pm_options_scope_t *scope, uint8_t forwarding) { */ void pm_options_free(pm_options_t *options) { - pm_string_free(&options->filepath); - pm_string_free(&options->encoding); + pm_string_cleanup(&options->filepath); + pm_string_cleanup(&options->encoding); for (size_t scope_index = 0; scope_index < options->scopes_count; scope_index++) { pm_options_scope_t *scope = &options->scopes[scope_index]; for (size_t local_index = 0; local_index < scope->locals_count; local_index++) { - pm_string_free(&scope->locals[local_index]); + pm_string_cleanup(&scope->locals[local_index]); } xfree_sized(scope->locals, scope->locals_count * sizeof(pm_string_t)); diff --git a/src/prism.c b/src/prism.c index ffc9aad8aa..24d7a49038 100644 --- a/src/prism.c +++ b/src/prism.c @@ -7807,7 +7807,7 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) { // When we're done, we want to free the string in case we had to // allocate memory for it. - pm_string_free(&key); + pm_string_cleanup(&key); // Allocate a new magic comment node to append to the parser's list. pm_magic_comment_t *magic_comment = (pm_magic_comment_t *) pm_arena_alloc(&parser->metadata_arena, sizeof(pm_magic_comment_t), PRISM_ALIGNOF(pm_magic_comment_t)); @@ -21375,7 +21375,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t pm_string_owned_init(&owned, (uint8_t *) memory, total_length); result = parse_interpolated_regular_expression_named_captures(parser, &owned, call, PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED)); - pm_string_free(&owned); + pm_string_cleanup(&owned); } } else if (PM_NODE_TYPE_P(node, PM_REGULAR_EXPRESSION_NODE)) { // If we have a regular expression node, then we can parse @@ -22489,7 +22489,7 @@ pm_parser_register_encoding_changed_callback(pm_parser_t *parser, pm_encoding_ch */ void pm_parser_cleanup(pm_parser_t *parser) { - pm_string_free(&parser->filepath); + pm_string_cleanup(&parser->filepath); pm_arena_free(&parser->metadata_arena); while (parser->current_scope != NULL) { diff --git a/src/regexp.c b/src/regexp.c index 3013974367..2ee2555686 100644 --- a/src/regexp.c +++ b/src/regexp.c @@ -168,7 +168,7 @@ pm_regexp_parser_named_capture(pm_regexp_parser_t *parser, const uint8_t *start, pm_string_t string; pm_string_shared_init(&string, start, end); parser->name_callback(parser->parser, &string, parser->shared, parser->name_data); - pm_string_free(&string); + pm_string_cleanup(&string); } /** diff --git a/src/strings.c b/src/strings.c index 37b48293ab..88bb5ad24f 100644 --- a/src/strings.c +++ b/src/strings.c @@ -127,7 +127,7 @@ pm_string_file_handle_close(pm_string_file_handle_t *handle) { /** * Read the file indicated by the filepath parameter into source and load its * contents and size into the given `pm_string_t`. The given `pm_string_t` - * should be freed using `pm_string_free` when it is no longer used. + * should be freed using `pm_string_cleanup` when it is no longer used. * * We want to use demand paging as much as possible in order to avoid having to * read the entire file into memory (which could be detrimental to performance @@ -223,7 +223,7 @@ pm_string_mapped_init(pm_string_t *string, const char *filepath) { /** * Read the file indicated by the filepath parameter into source and load its * contents and size into the given `pm_string_t`. The given `pm_string_t` - * should be freed using `pm_string_free` when it is no longer used. + * should be freed using `pm_string_cleanup` when it is no longer used. */ pm_string_init_result_t pm_string_file_init(pm_string_t *string, const char *filepath) { @@ -383,7 +383,7 @@ pm_string_source(const pm_string_t *string) { * Free the associated memory of the given string. */ void -pm_string_free(pm_string_t *string) { +pm_string_cleanup(pm_string_t *string) { void *memory = (void *) string->source; if (string->type == PM_STRING_OWNED) { From 0edaefbf2be84a39ca49a5cfb04eff83867d8a59 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 17 Mar 2026 15:27:32 -0400 Subject: [PATCH 045/100] pm_options_free -> pm_options_cleanup --- ext/prism/extension.c | 38 +++++++++++++++---------------- include/prism/options.h | 2 +- rust/ruby-prism-sys/build/main.rs | 2 +- rust/ruby-prism/src/lib.rs | 4 ++-- src/options.c | 4 ++-- src/prism.c | 8 +++---- templates/src/serialize.c.erb | 4 ++-- 7 files changed, 31 insertions(+), 31 deletions(-) diff --git a/ext/prism/extension.c b/ext/prism/extension.c index 77a0d9dc9b..d6e446907b 100644 --- a/ext/prism/extension.c +++ b/ext/prism/extension.c @@ -288,14 +288,14 @@ extract_options(pm_options_t *options, VALUE filepath, VALUE keywords) { rb_protect(build_options, (VALUE) argument, &state); if (state != 0) { - pm_options_free(options); + pm_options_cleanup(options); rb_jump_tag(state); } } if (!NIL_P(filepath)) { if (!RB_TYPE_P(filepath, T_STRING)) { - pm_options_free(options); + pm_options_cleanup(options); rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected String)", rb_obj_class(filepath)); } @@ -336,7 +336,7 @@ file_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options, V case PM_STRING_INIT_SUCCESS: break; case PM_STRING_INIT_ERROR_GENERIC: { - pm_options_free(options); + pm_options_cleanup(options); #ifdef _WIN32 int e = rb_w32_map_errno(GetLastError()); @@ -348,11 +348,11 @@ file_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options, V break; } case PM_STRING_INIT_ERROR_DIRECTORY: - pm_options_free(options); + pm_options_cleanup(options); rb_syserr_fail(EISDIR, source); break; default: - pm_options_free(options); + pm_options_cleanup(options); rb_raise(rb_eRuntimeError, "Unknown error (%d) initializing file: %s", result, source); break; } @@ -418,7 +418,7 @@ dump(int argc, VALUE *argv, VALUE self) { #endif pm_string_cleanup(&input); - pm_options_free(&options); + pm_options_cleanup(&options); return value; } @@ -441,7 +441,7 @@ dump_file(int argc, VALUE *argv, VALUE self) { VALUE value = dump_input(&input, &options); pm_string_cleanup(&input); - pm_options_free(&options); + pm_options_cleanup(&options); return value; } @@ -814,7 +814,7 @@ lex(int argc, VALUE *argv, VALUE self) { VALUE result = parse_lex_input(&input, &options, false); pm_string_cleanup(&input); - pm_options_free(&options); + pm_options_cleanup(&options); return result; } @@ -837,7 +837,7 @@ lex_file(int argc, VALUE *argv, VALUE self) { VALUE value = parse_lex_input(&input, &options, false); pm_string_cleanup(&input); - pm_options_free(&options); + pm_options_cleanup(&options); return value; } @@ -937,7 +937,7 @@ parse(int argc, VALUE *argv, VALUE self) { #endif pm_string_cleanup(&input); - pm_options_free(&options); + pm_options_cleanup(&options); return value; } @@ -959,7 +959,7 @@ parse_file(int argc, VALUE *argv, VALUE self) { VALUE value = parse_input(&input, &options); pm_string_cleanup(&input); - pm_options_free(&options); + pm_options_cleanup(&options); return value; } @@ -995,7 +995,7 @@ profile(int argc, VALUE *argv, VALUE self) { string_options(argc, argv, &input, &options); profile_input(&input, &options); pm_string_cleanup(&input); - pm_options_free(&options); + pm_options_cleanup(&options); return Qnil; } @@ -1019,7 +1019,7 @@ profile_file(int argc, VALUE *argv, VALUE self) { profile_input(&input, &options); pm_string_cleanup(&input); - pm_options_free(&options); + pm_options_cleanup(&options); return Qnil; } @@ -1125,7 +1125,7 @@ parse_comments(int argc, VALUE *argv, VALUE self) { VALUE result = parse_input_comments(&input, &options); pm_string_cleanup(&input); - pm_options_free(&options); + pm_options_cleanup(&options); return result; } @@ -1148,7 +1148,7 @@ parse_file_comments(int argc, VALUE *argv, VALUE self) { VALUE value = parse_input_comments(&input, &options); pm_string_cleanup(&input); - pm_options_free(&options); + pm_options_cleanup(&options); return value; } @@ -1176,7 +1176,7 @@ parse_lex(int argc, VALUE *argv, VALUE self) { VALUE value = parse_lex_input(&input, &options, true); pm_string_cleanup(&input); - pm_options_free(&options); + pm_options_cleanup(&options); return value; } @@ -1206,7 +1206,7 @@ parse_lex_file(int argc, VALUE *argv, VALUE self) { VALUE value = parse_lex_input(&input, &options, true); pm_string_cleanup(&input); - pm_options_free(&options); + pm_options_cleanup(&options); return value; } @@ -1245,7 +1245,7 @@ parse_success_p(int argc, VALUE *argv, VALUE self) { VALUE result = parse_input_success_p(&input, &options); pm_string_cleanup(&input); - pm_options_free(&options); + pm_options_cleanup(&options); return result; } @@ -1281,7 +1281,7 @@ parse_file_success_p(int argc, VALUE *argv, VALUE self) { VALUE result = parse_input_success_p(&input, &options); pm_string_cleanup(&input); - pm_options_free(&options); + pm_options_cleanup(&options); return result; } diff --git a/include/prism/options.h b/include/prism/options.h index 4c8728c69a..75fed285b8 100644 --- a/include/prism/options.h +++ b/include/prism/options.h @@ -414,6 +414,6 @@ PRISM_EXPORTED_FUNCTION void pm_options_scope_forwarding_set(pm_options_scope_t * * \public \memberof pm_options */ -PRISM_EXPORTED_FUNCTION void pm_options_free(pm_options_t *options); +PRISM_EXPORTED_FUNCTION void pm_options_cleanup(pm_options_t *options); #endif diff --git a/rust/ruby-prism-sys/build/main.rs b/rust/ruby-prism-sys/build/main.rs index 76bbe603f1..d239dbc51e 100644 --- a/rust/ruby-prism-sys/build/main.rs +++ b/rust/ruby-prism-sys/build/main.rs @@ -152,7 +152,7 @@ fn generate_bindings(ruby_include_path: &Path) -> bindgen::Bindings { .allowlist_function("pm_options_encoding_locked_set") .allowlist_function("pm_options_encoding_set") .allowlist_function("pm_options_filepath_set") - .allowlist_function("pm_options_free") + .allowlist_function("pm_options_cleanup") .allowlist_function("pm_options_frozen_string_literal_set") .allowlist_function("pm_options_line_set") .allowlist_function("pm_options_main_script_set") diff --git a/rust/ruby-prism/src/lib.rs b/rust/ruby-prism/src/lib.rs index 3725633378..540f140555 100644 --- a/rust/ruby-prism/src/lib.rs +++ b/rust/ruby-prism/src/lib.rs @@ -27,7 +27,7 @@ pub use self::node_ext::{ConstantPathError, FullName}; pub use self::parse_result::{Comment, CommentType, Comments, Diagnostic, Diagnostics, Location, MagicComment, MagicComments, ParseResult}; use ruby_prism_sys::{ - pm_arena_t, pm_options_command_line_set, pm_options_encoding_locked_set, pm_options_encoding_set, pm_options_filepath_set, pm_options_free, pm_options_frozen_string_literal_set, pm_options_line_set, pm_options_main_script_set, pm_options_partial_script_set, pm_options_scope_forwarding_set, + pm_arena_t, pm_options_command_line_set, pm_options_encoding_locked_set, pm_options_encoding_set, pm_options_filepath_set, pm_options_cleanup, pm_options_frozen_string_literal_set, pm_options_line_set, pm_options_main_script_set, pm_options_partial_script_set, pm_options_scope_forwarding_set, pm_options_scope_get, pm_options_scope_init, pm_options_scope_local_get, pm_options_scopes_init, pm_options_t, pm_parse, pm_parser_init, pm_parser_t, pm_string_constant_init, }; @@ -324,7 +324,7 @@ pub struct ParseOptions { impl Drop for ParseOptions { fn drop(&mut self) { - unsafe { pm_options_free(&raw mut self.options) }; + unsafe { pm_options_cleanup(&raw mut self.options) }; } } diff --git a/src/options.c b/src/options.c index 4848fa0ac9..9817691e73 100644 --- a/src/options.c +++ b/src/options.c @@ -223,7 +223,7 @@ pm_options_scope_forwarding_set(pm_options_scope_t *scope, uint8_t forwarding) { * Free the internal memory associated with the options. */ void -pm_options_free(pm_options_t *options) { +pm_options_cleanup(pm_options_t *options) { pm_string_cleanup(&options->filepath); pm_string_cleanup(&options->encoding); @@ -323,7 +323,7 @@ pm_options_read(pm_options_t *options, const char *data) { pm_options_scope_t *scope = &options->scopes[scope_index]; if (!pm_options_scope_init(scope, locals_count)) { - pm_options_free(options); + pm_options_cleanup(options); return; } diff --git a/src/prism.c b/src/prism.c index 24d7a49038..06cfc14b9a 100644 --- a/src/prism.c +++ b/src/prism.c @@ -22773,7 +22773,7 @@ pm_parse_success_p(const uint8_t *source, size_t size, const char *data) { bool result = parser.error_list.size == 0; pm_parser_cleanup(&parser); pm_arena_free(&arena); - pm_options_free(&options); + pm_options_cleanup(&options); return result; } @@ -22828,7 +22828,7 @@ pm_serialize_parse(pm_buffer_t *buffer, const uint8_t *source, size_t size, cons pm_parser_cleanup(&parser); pm_arena_free(&arena); - pm_options_free(&options); + pm_options_cleanup(&options); } /** @@ -22851,7 +22851,7 @@ pm_serialize_parse_stream(pm_buffer_t *buffer, void *stream, pm_parse_stream_fge pm_buffer_free(parser_buffer); pm_parser_cleanup(&parser); pm_arena_free(&arena); - pm_options_free(&options); + pm_options_cleanup(&options); } /** @@ -22874,7 +22874,7 @@ pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t s pm_parser_cleanup(&parser); pm_arena_free(&arena); - pm_options_free(&options); + pm_options_cleanup(&options); } #endif diff --git a/templates/src/serialize.c.erb b/templates/src/serialize.c.erb index b62d0bf066..b305a95d7e 100644 --- a/templates/src/serialize.c.erb +++ b/templates/src/serialize.c.erb @@ -348,7 +348,7 @@ pm_serialize_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, const pm_parser_cleanup(&parser); pm_arena_free(&arena); - pm_options_free(&options); + pm_options_cleanup(&options); } /** @@ -377,7 +377,7 @@ pm_serialize_parse_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, pm_parser_cleanup(&parser); pm_arena_free(&arena); - pm_options_free(&options); + pm_options_cleanup(&options); } #endif From 20de0e4c9c7aff85fc686acb084a2d301ce9437c Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 17 Mar 2026 15:35:06 -0400 Subject: [PATCH 046/100] Do not return bool from pm_options_scope_init --- ext/prism/extension.c | 4 +--- include/prism/options.h | 6 +++--- src/options.c | 9 +++------ 3 files changed, 7 insertions(+), 12 deletions(-) diff --git a/ext/prism/extension.c b/ext/prism/extension.c index d6e446907b..e352eeef4f 100644 --- a/ext/prism/extension.c +++ b/ext/prism/extension.c @@ -149,9 +149,7 @@ build_options_scopes(pm_options_t *options, VALUE scopes) { // Initialize the scope array. size_t locals_count = RARRAY_LEN(locals); pm_options_scope_t *options_scope = &options->scopes[scope_index]; - if (!pm_options_scope_init(options_scope, locals_count)) { - rb_raise(rb_eNoMemError, "failed to allocate memory"); - } + pm_options_scope_init(options_scope, locals_count); // Iterate over the locals and add them to the scope. for (size_t local_index = 0; local_index < locals_count; local_index++) { diff --git a/include/prism/options.h b/include/prism/options.h index 75fed285b8..607394d092 100644 --- a/include/prism/options.h +++ b/include/prism/options.h @@ -376,15 +376,15 @@ PRISM_EXPORTED_FUNCTION const pm_options_scope_t * pm_options_scope_get(const pm /** * Create a new options scope struct. This will hold a set of locals that are in - * scope surrounding the code that is being parsed. + * scope surrounding the code that is being parsed. If the scope was unable to + * allocate its locals, this function will abort the process. * * @param scope The scope struct to initialize. * @param locals_count The number of locals to allocate. - * @return Whether or not the scope was initialized successfully. * * \public \memberof pm_options */ -PRISM_EXPORTED_FUNCTION bool pm_options_scope_init(pm_options_scope_t *scope, size_t locals_count); +PRISM_EXPORTED_FUNCTION void pm_options_scope_init(pm_options_scope_t *scope, size_t locals_count); /** * Return a pointer to the local at the given index within the given scope. diff --git a/src/options.c b/src/options.c index 9817691e73..b99db37ab5 100644 --- a/src/options.c +++ b/src/options.c @@ -195,12 +195,12 @@ pm_options_scope_get(const pm_options_t *options, size_t index) { * Create a new options scope struct. This will hold a set of locals that are in * scope surrounding the code that is being parsed. */ -bool +void pm_options_scope_init(pm_options_scope_t *scope, size_t locals_count) { scope->locals_count = locals_count; scope->locals = xcalloc(locals_count, sizeof(pm_string_t)); scope->forwarding = PM_OPTIONS_SCOPE_FORWARDING_NONE; - return scope->locals != NULL; + if (scope->locals == NULL) abort(); } /** @@ -322,10 +322,7 @@ pm_options_read(pm_options_t *options, const char *data) { data += 4; pm_options_scope_t *scope = &options->scopes[scope_index]; - if (!pm_options_scope_init(scope, locals_count)) { - pm_options_cleanup(options); - return; - } + pm_options_scope_init(scope, locals_count); uint8_t forwarding = (uint8_t) *data++; pm_options_scope_forwarding_set(&options->scopes[scope_index], forwarding); From 01676454903c352056b51166605a1d2e179664aa Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 17 Mar 2026 19:42:45 -0400 Subject: [PATCH 047/100] Make options fully opaque --- ext/prism/extension.c | 174 +++++++++++------------ include/prism/internal/options.h | 116 ++++++++++++++++ include/prism/node.h | 7 +- include/prism/options.h | 222 +++++++++++++----------------- rust/ruby-prism-sys/build/main.rs | 10 +- rust/ruby-prism/src/lib.rs | 55 ++++---- src/options.c | 122 ++++++++++++---- 7 files changed, 439 insertions(+), 267 deletions(-) diff --git a/ext/prism/extension.c b/ext/prism/extension.c index e352eeef4f..5c4e309d42 100644 --- a/ext/prism/extension.c +++ b/ext/prism/extension.c @@ -148,7 +148,7 @@ build_options_scopes(pm_options_t *options, VALUE scopes) { // Initialize the scope array. size_t locals_count = RARRAY_LEN(locals); - pm_options_scope_t *options_scope = &options->scopes[scope_index]; + pm_options_scope_t *options_scope = pm_options_scope_get_mut(options, scope_index); pm_options_scope_init(options_scope, locals_count); // Iterate over the locals and add them to the scope. @@ -162,7 +162,7 @@ build_options_scopes(pm_options_t *options, VALUE scopes) { } // Add the local to the scope. - pm_string_t *scope_local = &options_scope->locals[local_index]; + pm_string_t *scope_local = pm_options_scope_local_get_mut(options_scope, local_index); const char *name = rb_id2name(SYM2ID(local)); pm_string_constant_init(scope_local, name, strlen(name)); } @@ -206,10 +206,10 @@ build_options_i(VALUE key, VALUE value, VALUE argument) { if (!pm_options_version_set(options, ruby_version, 3)) { // Prism doesn't know this specific version. Is it lower? if (ruby_version[0] < '3' || (ruby_version[0] == '3' && ruby_version[2] < '3')) { - options->version = PM_OPTIONS_VERSION_CRUBY_3_3; + pm_options_version_set_lowest(options); } else { // Must be higher. - options->version = PM_OPTIONS_VERSION_LATEST; + pm_options_version_set_highest(options); } } } else if (!pm_options_version_set(options, version, RSTRING_LEN(value))) { @@ -276,7 +276,7 @@ build_options(VALUE argument) { */ static void extract_options(pm_options_t *options, VALUE filepath, VALUE keywords) { - options->line = 1; // default + pm_options_line_set(options, 1); /* default */ if (!NIL_P(keywords)) { struct build_options_data data = { .options = options, .keywords = keywords }; @@ -286,14 +286,14 @@ extract_options(pm_options_t *options, VALUE filepath, VALUE keywords) { rb_protect(build_options, (VALUE) argument, &state); if (state != 0) { - pm_options_cleanup(options); + pm_options_free(options); rb_jump_tag(state); } } if (!NIL_P(filepath)) { if (!RB_TYPE_P(filepath, T_STRING)) { - pm_options_cleanup(options); + pm_options_free(options); rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected String)", rb_obj_class(filepath)); } @@ -327,14 +327,14 @@ file_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options, V *encoded_filepath = rb_str_encode_ospath(filepath); extract_options(options, *encoded_filepath, keywords); - const char *source = (const char *) pm_string_source(&options->filepath); + const char *source = (const char *) pm_string_source(pm_options_filepath_get(options)); pm_string_init_result_t result; switch (result = pm_string_file_init(input, source)) { case PM_STRING_INIT_SUCCESS: break; case PM_STRING_INIT_ERROR_GENERIC: { - pm_options_cleanup(options); + pm_options_free(options); #ifdef _WIN32 int e = rb_w32_map_errno(GetLastError()); @@ -346,11 +346,11 @@ file_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options, V break; } case PM_STRING_INIT_ERROR_DIRECTORY: - pm_options_cleanup(options); + pm_options_free(options); rb_syserr_fail(EISDIR, source); break; default: - pm_options_cleanup(options); + pm_options_free(options); rb_raise(rb_eRuntimeError, "Unknown error (%d) initializing file: %s", result, source); break; } @@ -398,8 +398,8 @@ dump_input(pm_string_t *input, const pm_options_t *options) { static VALUE dump(int argc, VALUE *argv, VALUE self) { pm_string_t input; - pm_options_t options = { 0 }; - string_options(argc, argv, &input, &options); + pm_options_t *options = pm_options_new(); + string_options(argc, argv, &input, options); #ifdef PRISM_BUILD_DEBUG size_t length = pm_string_length(&input); @@ -408,15 +408,15 @@ dump(int argc, VALUE *argv, VALUE self) { pm_string_constant_init(&input, dup, length); #endif - VALUE value = dump_input(&input, &options); - if (options.freeze) rb_obj_freeze(value); + VALUE value = dump_input(&input, options); + if (pm_options_freeze_get(options)) rb_obj_freeze(value); #ifdef PRISM_BUILD_DEBUG xfree_sized(dup, length); #endif pm_string_cleanup(&input); - pm_options_cleanup(&options); + pm_options_free(options); return value; } @@ -432,14 +432,14 @@ dump(int argc, VALUE *argv, VALUE self) { static VALUE dump_file(int argc, VALUE *argv, VALUE self) { pm_string_t input; - pm_options_t options = { 0 }; + pm_options_t *options = pm_options_new(); VALUE encoded_filepath; - file_options(argc, argv, &input, &options, &encoded_filepath); + file_options(argc, argv, &input, options, &encoded_filepath); - VALUE value = dump_input(&input, &options); + VALUE value = dump_input(&input, options); pm_string_cleanup(&input); - pm_options_cleanup(&options); + pm_options_free(options); return value; } @@ -749,7 +749,7 @@ parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nod .source = source, .tokens = rb_ary_new(), .encoding = rb_utf8_encoding(), - .freeze = options->freeze, + .freeze = pm_options_freeze_get(options), }; parse_lex_data_t *data = &parse_lex_data; @@ -772,7 +772,7 @@ parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nod rb_ary_push(offsets, ULONG2NUM(parser.line_offsets.offsets[index])); } - if (options->freeze) { + if (pm_options_freeze_get(options)) { rb_obj_freeze(source_string); rb_obj_freeze(offsets); rb_obj_freeze(source); @@ -782,12 +782,12 @@ parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nod VALUE result; if (return_nodes) { VALUE value = rb_ary_new_capa(2); - rb_ary_push(value, pm_ast_new(&parser, node, parse_lex_data.encoding, source, options->freeze)); + rb_ary_push(value, pm_ast_new(&parser, node, parse_lex_data.encoding, source, pm_options_freeze_get(options))); rb_ary_push(value, parse_lex_data.tokens); - if (options->freeze) rb_obj_freeze(value); - result = parse_result_create(rb_cPrismParseLexResult, &parser, value, parse_lex_data.encoding, source, options->freeze); + if (pm_options_freeze_get(options)) rb_obj_freeze(value); + result = parse_result_create(rb_cPrismParseLexResult, &parser, value, parse_lex_data.encoding, source, pm_options_freeze_get(options)); } else { - result = parse_result_create(rb_cPrismLexResult, &parser, parse_lex_data.tokens, parse_lex_data.encoding, source, options->freeze); + result = parse_result_create(rb_cPrismLexResult, &parser, parse_lex_data.tokens, parse_lex_data.encoding, source, pm_options_freeze_get(options)); } pm_parser_cleanup(&parser); @@ -807,12 +807,12 @@ parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nod static VALUE lex(int argc, VALUE *argv, VALUE self) { pm_string_t input; - pm_options_t options = { 0 }; - string_options(argc, argv, &input, &options); + pm_options_t *options = pm_options_new(); + string_options(argc, argv, &input, options); - VALUE result = parse_lex_input(&input, &options, false); + VALUE result = parse_lex_input(&input, options, false); pm_string_cleanup(&input); - pm_options_cleanup(&options); + pm_options_free(options); return result; } @@ -828,14 +828,14 @@ lex(int argc, VALUE *argv, VALUE self) { static VALUE lex_file(int argc, VALUE *argv, VALUE self) { pm_string_t input; - pm_options_t options = { 0 }; + pm_options_t *options = pm_options_new(); VALUE encoded_filepath; - file_options(argc, argv, &input, &options, &encoded_filepath); + file_options(argc, argv, &input, options, &encoded_filepath); - VALUE value = parse_lex_input(&input, &options, false); + VALUE value = parse_lex_input(&input, options, false); pm_string_cleanup(&input); - pm_options_cleanup(&options); + pm_options_free(options); return value; } @@ -856,11 +856,12 @@ parse_input(pm_string_t *input, const pm_options_t *options) { pm_node_t *node = pm_parse(&parser); rb_encoding *encoding = rb_enc_find(pm_parser_encoding_name(&parser)); - VALUE source = pm_source_new(&parser, encoding, options->freeze); - VALUE value = pm_ast_new(&parser, node, encoding, source, options->freeze); - VALUE result = parse_result_create(rb_cPrismParseResult, &parser, value, encoding, source, options->freeze); + bool freeze = pm_options_freeze_get(options); + VALUE source = pm_source_new(&parser, encoding, freeze); + VALUE value = pm_ast_new(&parser, node, encoding, source, freeze); + VALUE result = parse_result_create(rb_cPrismParseResult, &parser, value, encoding, source, freeze); - if (options->freeze) { + if (freeze) { rb_obj_freeze(source); } @@ -918,8 +919,8 @@ parse_input(pm_string_t *input, const pm_options_t *options) { static VALUE parse(int argc, VALUE *argv, VALUE self) { pm_string_t input; - pm_options_t options = { 0 }; - string_options(argc, argv, &input, &options); + pm_options_t *options = pm_options_new(); + string_options(argc, argv, &input, options); #ifdef PRISM_BUILD_DEBUG size_t length = pm_string_length(&input); @@ -928,14 +929,14 @@ parse(int argc, VALUE *argv, VALUE self) { pm_string_constant_init(&input, dup, length); #endif - VALUE value = parse_input(&input, &options); + VALUE value = parse_input(&input, options); #ifdef PRISM_BUILD_DEBUG xfree_sized(dup, length); #endif pm_string_cleanup(&input); - pm_options_cleanup(&options); + pm_options_free(options); return value; } @@ -950,14 +951,14 @@ parse(int argc, VALUE *argv, VALUE self) { static VALUE parse_file(int argc, VALUE *argv, VALUE self) { pm_string_t input; - pm_options_t options = { 0 }; + pm_options_t *options = pm_options_new(); VALUE encoded_filepath; - file_options(argc, argv, &input, &options, &encoded_filepath); + file_options(argc, argv, &input, options, &encoded_filepath); - VALUE value = parse_input(&input, &options); + VALUE value = parse_input(&input, options); pm_string_cleanup(&input); - pm_options_cleanup(&options); + pm_options_free(options); return value; } @@ -988,12 +989,12 @@ profile_input(pm_string_t *input, const pm_options_t *options) { static VALUE profile(int argc, VALUE *argv, VALUE self) { pm_string_t input; - pm_options_t options = { 0 }; + pm_options_t *options = pm_options_new(); - string_options(argc, argv, &input, &options); - profile_input(&input, &options); + string_options(argc, argv, &input, options); + profile_input(&input, options); pm_string_cleanup(&input); - pm_options_cleanup(&options); + pm_options_free(options); return Qnil; } @@ -1010,14 +1011,14 @@ profile(int argc, VALUE *argv, VALUE self) { static VALUE profile_file(int argc, VALUE *argv, VALUE self) { pm_string_t input; - pm_options_t options = { 0 }; + pm_options_t *options = pm_options_new(); VALUE encoded_filepath; - file_options(argc, argv, &input, &options, &encoded_filepath); + file_options(argc, argv, &input, options, &encoded_filepath); - profile_input(&input, &options); + profile_input(&input, options); pm_string_cleanup(&input); - pm_options_cleanup(&options); + pm_options_free(options); return Qnil; } @@ -1065,23 +1066,24 @@ parse_stream(int argc, VALUE *argv, VALUE self) { VALUE keywords; rb_scan_args(argc, argv, "1:", &stream, &keywords); - pm_options_t options = { 0 }; - extract_options(&options, Qnil, keywords); + pm_options_t *options = pm_options_new(); + extract_options(options, Qnil, keywords); pm_arena_t arena = { 0 }; pm_parser_t parser; pm_buffer_t *buffer = pm_buffer_new(); - pm_node_t *node = pm_parse_stream(&arena, &parser, buffer, (void *) stream, parse_stream_fgets, parse_stream_eof, &options); + pm_node_t *node = pm_parse_stream(&arena, &parser, buffer, (void *) stream, parse_stream_fgets, parse_stream_eof, options); rb_encoding *encoding = rb_enc_find(pm_parser_encoding_name(&parser)); - VALUE source = pm_source_new(&parser, encoding, options.freeze); - VALUE value = pm_ast_new(&parser, node, encoding, source, options.freeze); - VALUE result = parse_result_create(rb_cPrismParseResult, &parser, value, encoding, source, options.freeze); + VALUE source = pm_source_new(&parser, encoding, pm_options_freeze_get(options)); + VALUE value = pm_ast_new(&parser, node, encoding, source, pm_options_freeze_get(options)); + VALUE result = parse_result_create(rb_cPrismParseResult, &parser, value, encoding, source, pm_options_freeze_get(options)); pm_buffer_free(buffer); pm_parser_cleanup(&parser); pm_arena_free(&arena); + pm_options_free(options); return result; } @@ -1098,8 +1100,8 @@ parse_input_comments(pm_string_t *input, const pm_options_t *options) { pm_parse(&parser); rb_encoding *encoding = rb_enc_find(pm_parser_encoding_name(&parser)); - VALUE source = pm_source_new(&parser, encoding, options->freeze); - VALUE comments = parser_comments(&parser, source, options->freeze); + VALUE source = pm_source_new(&parser, encoding, pm_options_freeze_get(options)); + VALUE comments = parser_comments(&parser, source, pm_options_freeze_get(options)); pm_parser_cleanup(&parser); pm_arena_free(&arena); @@ -1118,12 +1120,12 @@ parse_input_comments(pm_string_t *input, const pm_options_t *options) { static VALUE parse_comments(int argc, VALUE *argv, VALUE self) { pm_string_t input; - pm_options_t options = { 0 }; - string_options(argc, argv, &input, &options); + pm_options_t *options = pm_options_new(); + string_options(argc, argv, &input, options); - VALUE result = parse_input_comments(&input, &options); + VALUE result = parse_input_comments(&input, options); pm_string_cleanup(&input); - pm_options_cleanup(&options); + pm_options_free(options); return result; } @@ -1139,14 +1141,14 @@ parse_comments(int argc, VALUE *argv, VALUE self) { static VALUE parse_file_comments(int argc, VALUE *argv, VALUE self) { pm_string_t input; - pm_options_t options = { 0 }; + pm_options_t *options = pm_options_new(); VALUE encoded_filepath; - file_options(argc, argv, &input, &options, &encoded_filepath); + file_options(argc, argv, &input, options, &encoded_filepath); - VALUE value = parse_input_comments(&input, &options); + VALUE value = parse_input_comments(&input, options); pm_string_cleanup(&input); - pm_options_cleanup(&options); + pm_options_free(options); return value; } @@ -1169,12 +1171,12 @@ parse_file_comments(int argc, VALUE *argv, VALUE self) { static VALUE parse_lex(int argc, VALUE *argv, VALUE self) { pm_string_t input; - pm_options_t options = { 0 }; - string_options(argc, argv, &input, &options); + pm_options_t *options = pm_options_new(); + string_options(argc, argv, &input, options); - VALUE value = parse_lex_input(&input, &options, true); + VALUE value = parse_lex_input(&input, options, true); pm_string_cleanup(&input); - pm_options_cleanup(&options); + pm_options_free(options); return value; } @@ -1197,14 +1199,14 @@ parse_lex(int argc, VALUE *argv, VALUE self) { static VALUE parse_lex_file(int argc, VALUE *argv, VALUE self) { pm_string_t input; - pm_options_t options = { 0 }; + pm_options_t *options = pm_options_new(); VALUE encoded_filepath; - file_options(argc, argv, &input, &options, &encoded_filepath); + file_options(argc, argv, &input, options, &encoded_filepath); - VALUE value = parse_lex_input(&input, &options, true); + VALUE value = parse_lex_input(&input, options, true); pm_string_cleanup(&input); - pm_options_cleanup(&options); + pm_options_free(options); return value; } @@ -1238,12 +1240,12 @@ parse_input_success_p(pm_string_t *input, const pm_options_t *options) { static VALUE parse_success_p(int argc, VALUE *argv, VALUE self) { pm_string_t input; - pm_options_t options = { 0 }; - string_options(argc, argv, &input, &options); + pm_options_t *options = pm_options_new(); + string_options(argc, argv, &input, options); - VALUE result = parse_input_success_p(&input, &options); + VALUE result = parse_input_success_p(&input, options); pm_string_cleanup(&input); - pm_options_cleanup(&options); + pm_options_free(options); return result; } @@ -1272,14 +1274,14 @@ parse_failure_p(int argc, VALUE *argv, VALUE self) { static VALUE parse_file_success_p(int argc, VALUE *argv, VALUE self) { pm_string_t input; - pm_options_t options = { 0 }; + pm_options_t *options = pm_options_new(); VALUE encoded_filepath; - file_options(argc, argv, &input, &options, &encoded_filepath); + file_options(argc, argv, &input, options, &encoded_filepath); - VALUE result = parse_input_success_p(&input, &options); + VALUE result = parse_input_success_p(&input, options); pm_string_cleanup(&input); - pm_options_cleanup(&options); + pm_options_free(options); return result; } diff --git a/include/prism/internal/options.h b/include/prism/internal/options.h index e3fb0a9e5e..2dea2f9422 100644 --- a/include/prism/internal/options.h +++ b/include/prism/internal/options.h @@ -8,6 +8,122 @@ #include "prism/options.h" +/** + * A scope of locals surrounding the code that is being parsed. + */ +struct pm_options_scope_t { + /** The number of locals in the scope. */ + size_t locals_count; + + /** The names of the locals in the scope. */ + pm_string_t *locals; + + /** Flags for the set of forwarding parameters in this scope. */ + uint8_t forwarding; +}; + +/** + * The options that can be passed to the parser. + */ +struct pm_options_t { + /** + * The callback to call when additional switches are found in a shebang + * comment. + */ + pm_options_shebang_callback_t shebang_callback; + + /** + * Any additional data that should be passed along to the shebang callback + * if one was set. + */ + void *shebang_callback_data; + + /** The name of the file that is currently being parsed. */ + pm_string_t filepath; + + /** + * The line within the file that the parse starts on. This value is + * 1-indexed. + */ + int32_t line; + + /** + * The name of the encoding that the source file is in. Note that this must + * correspond to a name that can be found with Encoding.find in Ruby. + */ + pm_string_t encoding; + + /** + * The number of scopes surrounding the code that is being parsed. + */ + size_t scopes_count; + + /** + * The scopes surrounding the code that is being parsed. For most parses + * this will be NULL, but for evals it will be the locals that are in scope + * surrounding the eval. Scopes are ordered from the outermost scope to the + * innermost one. + */ + pm_options_scope_t *scopes; + + /** + * The version of prism that we should be parsing with. This is used to + * allow consumers to specify which behavior they want in case they need to + * parse exactly as a specific version of CRuby. + */ + pm_options_version_t version; + + /** A bitset of the various options that were set on the command line. */ + uint8_t command_line; + + /** + * Whether or not the frozen string literal option has been set. + * May be: + * - PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED + * - PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED + * - PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET + */ + int8_t frozen_string_literal; + + /** + * Whether or not the encoding magic comments should be respected. This is a + * niche use-case where you want to parse a file with a specific encoding + * but ignore any encoding magic comments at the top of the file. + */ + bool encoding_locked; + + /** + * When the file being parsed is the main script, the shebang will be + * considered for command-line flags (or for implicit -x). The caller needs + * to pass this information to the parser so that it can behave correctly. + */ + bool main_script; + + /** + * When the file being parsed is considered a "partial" script, jumps will + * not be marked as errors if they are not contained within loops/blocks. + * This is used in the case that you're parsing a script that you know will + * be embedded inside another script later, but you do not have that context + * yet. For example, when parsing an ERB template that will be evaluated + * inside another script. + */ + bool partial_script; + + /** + * Whether or not the parser should freeze the nodes that it creates. This + * makes it possible to have a deeply frozen AST that is safe to share + * between concurrency primitives. + */ + bool freeze; +}; + +/** + * Free the internal memory associated with the options. + * + * @param options The options struct whose internal memory should be freed. + */ +void pm_options_cleanup(pm_options_t *options); + /** * Deserialize an options struct from the given binary string. This is used to * pass options to the parser from an FFI call so that consumers of the library diff --git a/include/prism/node.h b/include/prism/node.h index 2390dc7290..576919047c 100644 --- a/include/prism/node.h +++ b/include/prism/node.h @@ -82,9 +82,10 @@ PRISM_EXPORTED_FUNCTION const char * pm_node_type_to_str(pm_node_type_t node_typ * size_t size = strlen(source); * * pm_arena_t arena = { 0 }; + * pm_options_t *options = pm_options_new(); + * * pm_parser_t parser; - * pm_options_t options = { 0 }; - * pm_parser_init(&arena, &parser, (const uint8_t *) source, size, &options); + * pm_parser_init(&arena, &parser, (const uint8_t *) source, size, options); * * size_t indent = 0; * pm_node_t *node = pm_parse(&parser); @@ -93,7 +94,9 @@ PRISM_EXPORTED_FUNCTION const char * pm_node_type_to_str(pm_node_type_t node_typ * pm_visit_node(node, visit, data); * * pm_parser_cleanup(&parser); + * pm_options_free(options); * pm_arena_free(&arena); + * * return EXIT_SUCCESS; * } * ``` diff --git a/include/prism/options.h b/include/prism/options.h index 607394d092..477b593b2a 100644 --- a/include/prism/options.h +++ b/include/prism/options.h @@ -11,6 +11,16 @@ #include #include +/** + * A scope of locals surrounding the code that is being parsed. + */ +typedef struct pm_options_scope_t pm_options_scope_t; + +/** + * The options that can be passed to the parser. + */ +typedef struct pm_options_t pm_options_t; + /** * String literals should be made frozen. */ @@ -27,20 +37,6 @@ */ #define PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED ((int8_t) 1) -/** - * A scope of locals surrounding the code that is being parsed. - */ -typedef struct pm_options_scope { - /** The number of locals in the scope. */ - size_t locals_count; - - /** The names of the locals in the scope. */ - pm_string_t *locals; - - /** Flags for the set of forwarding parameters in this scope. */ - uint8_t forwarding; -} pm_options_scope_t; - /** The default value for parameters. */ static const uint8_t PM_OPTIONS_SCOPE_FORWARDING_NONE = 0x0; @@ -56,9 +52,6 @@ static const uint8_t PM_OPTIONS_SCOPE_FORWARDING_BLOCK = 0x4; /** When the scope is fowarding with the ... parameter. */ static const uint8_t PM_OPTIONS_SCOPE_FORWARDING_ALL = 0x8; -/* Forward declaration needed by the callback typedef. */ -struct pm_options; - /** * The callback called when additional switches are found in a shebang comment * that need to be processed by the runtime. @@ -71,7 +64,7 @@ struct pm_options; * @param shebang_callback_data Any additional data that should be passed along * to the callback. */ -typedef void (*pm_options_shebang_callback_t)(struct pm_options *options, const uint8_t *source, size_t length, void *shebang_callback_data); +typedef void (*pm_options_shebang_callback_t)(pm_options_t *options, const uint8_t *source, size_t length, void *shebang_callback_data); /** * The version of Ruby syntax that we should be parsing with. This is used to @@ -104,101 +97,6 @@ typedef enum { PM_OPTIONS_VERSION_LATEST = PM_OPTIONS_VERSION_CRUBY_4_1 } pm_options_version_t; -/** - * The options that can be passed to the parser. - */ -typedef struct pm_options { - /** - * The callback to call when additional switches are found in a shebang - * comment. - */ - pm_options_shebang_callback_t shebang_callback; - - /** - * Any additional data that should be passed along to the shebang callback - * if one was set. - */ - void *shebang_callback_data; - - /** The name of the file that is currently being parsed. */ - pm_string_t filepath; - - /** - * The line within the file that the parse starts on. This value is - * 1-indexed. - */ - int32_t line; - - /** - * The name of the encoding that the source file is in. Note that this must - * correspond to a name that can be found with Encoding.find in Ruby. - */ - pm_string_t encoding; - - /** - * The number of scopes surrounding the code that is being parsed. - */ - size_t scopes_count; - - /** - * The scopes surrounding the code that is being parsed. For most parses - * this will be NULL, but for evals it will be the locals that are in scope - * surrounding the eval. Scopes are ordered from the outermost scope to the - * innermost one. - */ - pm_options_scope_t *scopes; - - /** - * The version of prism that we should be parsing with. This is used to - * allow consumers to specify which behavior they want in case they need to - * parse exactly as a specific version of CRuby. - */ - pm_options_version_t version; - - /** A bitset of the various options that were set on the command line. */ - uint8_t command_line; - - /** - * Whether or not the frozen string literal option has been set. - * May be: - * - PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED - * - PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED - * - PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET - */ - int8_t frozen_string_literal; - - /** - * Whether or not the encoding magic comments should be respected. This is a - * niche use-case where you want to parse a file with a specific encoding - * but ignore any encoding magic comments at the top of the file. - */ - bool encoding_locked; - - /** - * When the file being parsed is the main script, the shebang will be - * considered for command-line flags (or for implicit -x). The caller needs - * to pass this information to the parser so that it can behave correctly. - */ - bool main_script; - - /** - * When the file being parsed is considered a "partial" script, jumps will - * not be marked as errors if they are not contained within loops/blocks. - * This is used in the case that you're parsing a script that you know will - * be embedded inside another script later, but you do not have that context - * yet. For example, when parsing an ERB template that will be evaluated - * inside another script. - */ - bool partial_script; - - /** - * Whether or not the parser should freeze the nodes that it creates. This - * makes it possible to have a deeply frozen AST that is safe to share - * between concurrency primitives. - */ - bool freeze; -} pm_options_t; - /** * A bit representing whether or not the command line -a option was set. -a * splits the input line $_ into $F. @@ -236,6 +134,26 @@ static const uint8_t PM_OPTIONS_COMMAND_LINE_P = 0x10; */ static const uint8_t PM_OPTIONS_COMMAND_LINE_X = 0x20; +/** + * Allocate a new options struct. If the options struct cannot be allocated, + * this function aborts the process. + * + * @return A new options struct with default values. It is the responsibility of + * the caller to free this struct using pm_options_free(). + * + * \public \memberof pm_options + */ +PRISM_EXPORTED_FUNCTION pm_options_t * pm_options_new(void); + +/** + * Free both the held memory of the given options struct and the struct itself. + * + * @param options The options struct to free. + * + * \public \memberof pm_options + */ +PRISM_EXPORTED_FUNCTION void pm_options_free(pm_options_t *options); + /** * Set the shebang callback option on the given options struct. * @@ -248,6 +166,16 @@ static const uint8_t PM_OPTIONS_COMMAND_LINE_X = 0x20; */ PRISM_EXPORTED_FUNCTION void pm_options_shebang_callback_set(pm_options_t *options, pm_options_shebang_callback_t shebang_callback, void *shebang_callback_data); +/** + * Get the filepath option on the given options struct. + * + * @param options The options struct to get the filepath from. + * @return The filepath. + * + * \public \memberof pm_options + */ +PRISM_EXPORTED_FUNCTION pm_string_t * pm_options_filepath_get(pm_options_t *options); + /** * Set the filepath option on the given options struct. * @@ -322,6 +250,26 @@ PRISM_EXPORTED_FUNCTION void pm_options_command_line_set(pm_options_t *options, */ PRISM_EXPORTED_FUNCTION bool pm_options_version_set(pm_options_t *options, const char *version, size_t length); +/** + * Set the version option on the given options struct to the lowest version of + * Ruby that prism supports. + * + * @param options The options struct to set the version on. + * + * \public \memberof pm_options + */ +PRISM_EXPORTED_FUNCTION void pm_options_version_set_lowest(pm_options_t *options); + +/** + * Set the version option on the given options struct to the highest version of + * Ruby that prism supports. + * + * @param options The options struct to set the version on. + * + * \public \memberof pm_options + */ +PRISM_EXPORTED_FUNCTION void pm_options_version_set_highest(pm_options_t *options); + /** * Set the main script option on the given options struct. * @@ -342,6 +290,15 @@ PRISM_EXPORTED_FUNCTION void pm_options_main_script_set(pm_options_t *options, b */ PRISM_EXPORTED_FUNCTION void pm_options_partial_script_set(pm_options_t *options, bool partial_script); +/** + * Get the freeze option on the given options struct. + * + * @param options The options struct to get the freeze value from. + * + * \public \memberof pm_options + */ +PRISM_EXPORTED_FUNCTION bool pm_options_freeze_get(const pm_options_t *options); + /** * Set the freeze option on the given options struct. * @@ -364,16 +321,29 @@ PRISM_EXPORTED_FUNCTION void pm_options_freeze_set(pm_options_t *options, bool f PRISM_EXPORTED_FUNCTION bool pm_options_scopes_init(pm_options_t *options, size_t scopes_count); /** - * Return a pointer to the scope at the given index within the given options. + * Return a constant pointer to the scope at the given index within the given + * options. * * @param options The options struct to get the scope from. * @param index The index of the scope to get. - * @return A pointer to the scope at the given index. + * @return A constant pointer to the scope at the given index. * * \public \memberof pm_options */ PRISM_EXPORTED_FUNCTION const pm_options_scope_t * pm_options_scope_get(const pm_options_t *options, size_t index); +/** + * Return a mutable pointer to the scope at the given index within the given + * options. + * + * @param options The options struct to get the scope from. + * @param index The index of the scope to get. + * @return A mutable pointer to the scope at the given index. + * + * \public \memberof pm_options + */ +PRISM_EXPORTED_FUNCTION pm_options_scope_t * pm_options_scope_get_mut(pm_options_t *options, size_t index); + /** * Create a new options scope struct. This will hold a set of locals that are in * scope surrounding the code that is being parsed. If the scope was unable to @@ -387,33 +357,37 @@ PRISM_EXPORTED_FUNCTION const pm_options_scope_t * pm_options_scope_get(const pm PRISM_EXPORTED_FUNCTION void pm_options_scope_init(pm_options_scope_t *scope, size_t locals_count); /** - * Return a pointer to the local at the given index within the given scope. + * Return a constant pointer to the local at the given index within the given + * scope. * * @param scope The scope struct to get the local from. * @param index The index of the local to get. - * @return A pointer to the local at the given index. + * @return A constant pointer to the local at the given index. * * \public \memberof pm_options */ PRISM_EXPORTED_FUNCTION const pm_string_t * pm_options_scope_local_get(const pm_options_scope_t *scope, size_t index); /** - * Set the forwarding option on the given scope struct. + * Return a mutable pointer to the local at the given index within the given + * scope. * - * @param scope The scope struct to set the forwarding on. - * @param forwarding The forwarding value to set. + * @param scope The scope struct to get the local from. + * @param index The index of the local to get. + * @return A mutable pointer to the local at the given index. * * \public \memberof pm_options */ -PRISM_EXPORTED_FUNCTION void pm_options_scope_forwarding_set(pm_options_scope_t *scope, uint8_t forwarding); +PRISM_EXPORTED_FUNCTION pm_string_t * pm_options_scope_local_get_mut(pm_options_scope_t *scope, size_t index); /** - * Free the internal memory associated with the options. + * Set the forwarding option on the given scope struct. * - * @param options The options struct whose internal memory should be freed. + * @param scope The scope struct to set the forwarding on. + * @param forwarding The forwarding value to set. * * \public \memberof pm_options */ -PRISM_EXPORTED_FUNCTION void pm_options_cleanup(pm_options_t *options); +PRISM_EXPORTED_FUNCTION void pm_options_scope_forwarding_set(pm_options_scope_t *scope, uint8_t forwarding); #endif diff --git a/rust/ruby-prism-sys/build/main.rs b/rust/ruby-prism-sys/build/main.rs index d239dbc51e..4cf5add131 100644 --- a/rust/ruby-prism-sys/build/main.rs +++ b/rust/ruby-prism-sys/build/main.rs @@ -135,7 +135,6 @@ fn generate_bindings(ruby_include_path: &Path) -> bindgen::Bindings { .allowlist_type("pm_node_type") .allowlist_type("pm_options_t") .allowlist_type("pm_options_scope_t") - .allowlist_type("pm_options_version_t") .allowlist_type("pm_parser_t") .allowlist_type("pm_string_t") .allowlist_type(r"^pm_\w+_node_t") @@ -144,7 +143,6 @@ fn generate_bindings(ruby_include_path: &Path) -> bindgen::Bindings { .rustified_non_exhaustive_enum("pm_comment_type_t") .rustified_non_exhaustive_enum(r"pm_\w+_flags") .rustified_non_exhaustive_enum("pm_node_type") - .rustified_non_exhaustive_enum("pm_options_version_t") // Functions .allowlist_function("pm_arena_free") .allowlist_function("pm_line_offset_list_line_column") @@ -152,16 +150,18 @@ fn generate_bindings(ruby_include_path: &Path) -> bindgen::Bindings { .allowlist_function("pm_options_encoding_locked_set") .allowlist_function("pm_options_encoding_set") .allowlist_function("pm_options_filepath_set") - .allowlist_function("pm_options_cleanup") + .allowlist_function("pm_options_free") .allowlist_function("pm_options_frozen_string_literal_set") .allowlist_function("pm_options_line_set") .allowlist_function("pm_options_main_script_set") + .allowlist_function("pm_options_new") .allowlist_function("pm_options_partial_script_set") .allowlist_function("pm_options_scope_forwarding_set") - .allowlist_function("pm_options_scope_get") + .allowlist_function("pm_options_scope_get_mut") .allowlist_function("pm_options_scope_init") - .allowlist_function("pm_options_scope_local_get") + .allowlist_function("pm_options_scope_local_get_mut") .allowlist_function("pm_options_scopes_init") + .allowlist_function("pm_options_version_set") .allowlist_function("pm_parse") .allowlist_function("pm_parser_cleanup") .allowlist_function("pm_parser_init") diff --git a/rust/ruby-prism/src/lib.rs b/rust/ruby-prism/src/lib.rs index 540f140555..7c678e1a67 100644 --- a/rust/ruby-prism/src/lib.rs +++ b/rust/ruby-prism/src/lib.rs @@ -27,8 +27,8 @@ pub use self::node_ext::{ConstantPathError, FullName}; pub use self::parse_result::{Comment, CommentType, Comments, Diagnostic, Diagnostics, Location, MagicComment, MagicComments, ParseResult}; use ruby_prism_sys::{ - pm_arena_t, pm_options_command_line_set, pm_options_encoding_locked_set, pm_options_encoding_set, pm_options_filepath_set, pm_options_cleanup, pm_options_frozen_string_literal_set, pm_options_line_set, pm_options_main_script_set, pm_options_partial_script_set, pm_options_scope_forwarding_set, - pm_options_scope_get, pm_options_scope_init, pm_options_scope_local_get, pm_options_scopes_init, pm_options_t, pm_parse, pm_parser_init, pm_parser_t, pm_string_constant_init, + pm_arena_t, pm_options_command_line_set, pm_options_encoding_locked_set, pm_options_encoding_set, pm_options_filepath_set, pm_options_free, pm_options_frozen_string_literal_set, pm_options_line_set, pm_options_main_script_set, pm_options_new, pm_options_partial_script_set, pm_options_scope_forwarding_set, + pm_options_scope_get_mut, pm_options_scope_init, pm_options_scope_local_get_mut, pm_options_scopes_init, pm_options_t, pm_options_version_set, pm_parse, pm_parser_init, pm_parser_t, pm_string_constant_init, }; /// The version of Ruby syntax to parse with. @@ -46,14 +46,16 @@ pub enum Version { CRuby4_1, } -impl From for ruby_prism_sys::pm_options_version_t { - fn from(version: Version) -> Self { - match version { - Version::Latest => Self::PM_OPTIONS_VERSION_LATEST, - Version::CRuby3_3 => Self::PM_OPTIONS_VERSION_CRUBY_3_3, - Version::CRuby3_4 => Self::PM_OPTIONS_VERSION_CRUBY_3_4, - Version::CRuby3_5 => Self::PM_OPTIONS_VERSION_CRUBY_3_5, - Version::CRuby4_1 => Self::PM_OPTIONS_VERSION_CRUBY_4_1, +impl Version { + /// Calls `pm_options_version_set` with the appropriate version string. + /// `Latest` passes `NULL` to get the default behavior. + unsafe fn set_on(self, opts: *mut pm_options_t) { + match self { + Version::Latest => { pm_options_version_set(opts, std::ptr::null(), 0); }, + Version::CRuby3_3 => { pm_options_version_set(opts, c"3.3".as_ptr(), 3); }, + Version::CRuby3_4 => { pm_options_version_set(opts, c"3.4".as_ptr(), 3); }, + Version::CRuby3_5 => { pm_options_version_set(opts, c"3.5".as_ptr(), 3); }, + Version::CRuby4_1 => { pm_options_version_set(opts, c"4.1".as_ptr(), 3); }, } } } @@ -238,58 +240,58 @@ impl Options { /// Panics if `filepath` or `encoding` contain interior null bytes. #[must_use] pub fn build(self) -> ParseOptions { - let mut opts = pm_options_t::default(); + let opts = unsafe { pm_options_new() }; let c_filepath = self.filepath.map(|filepath| { let cstring = CString::new(filepath).unwrap(); - unsafe { pm_options_filepath_set(&raw mut opts, cstring.as_ptr()) }; + unsafe { pm_options_filepath_set(opts, cstring.as_ptr()) }; cstring }); if let Some(line) = self.line { - unsafe { pm_options_line_set(&raw mut opts, line) }; + unsafe { pm_options_line_set(opts, line) }; } let c_encoding = self.encoding.map(|encoding| { let cstring = CString::new(encoding).unwrap(); - unsafe { pm_options_encoding_set(&raw mut opts, cstring.as_ptr()) }; + unsafe { pm_options_encoding_set(opts, cstring.as_ptr()) }; cstring }); if self.encoding_locked { - unsafe { pm_options_encoding_locked_set(&raw mut opts, true) }; + unsafe { pm_options_encoding_locked_set(opts, true) }; } if let Some(frozen) = self.frozen_string_literal { - unsafe { pm_options_frozen_string_literal_set(&raw mut opts, frozen) }; + unsafe { pm_options_frozen_string_literal_set(opts, frozen) }; } let command_line = self.command_line.iter().fold(0u8, |acc, &flag| acc | u8::from(flag)); if command_line != 0 { - unsafe { pm_options_command_line_set(&raw mut opts, command_line) }; + unsafe { pm_options_command_line_set(opts, command_line) }; } if let Some(version) = self.version { - opts.version = version.into(); + unsafe { version.set_on(opts) }; } if self.main_script { - unsafe { pm_options_main_script_set(&raw mut opts, true) }; + unsafe { pm_options_main_script_set(opts, true) }; } if self.partial_script { - unsafe { pm_options_partial_script_set(&raw mut opts, true) }; + unsafe { pm_options_partial_script_set(opts, true) }; } if !self.scopes.is_empty() { - unsafe { pm_options_scopes_init(&raw mut opts, self.scopes.len()) }; + unsafe { pm_options_scopes_init(opts, self.scopes.len()) }; for (scope_index, scope) in self.scopes.iter().enumerate() { - let pm_scope = unsafe { pm_options_scope_get(&raw const opts, scope_index).cast_mut() }; + let pm_scope = unsafe { pm_options_scope_get_mut(opts, scope_index) }; unsafe { pm_options_scope_init(pm_scope, scope.locals.len()) }; for (local_index, local) in scope.locals.iter().enumerate() { - let pm_local = unsafe { pm_options_scope_local_get(pm_scope, local_index).cast_mut() }; + let pm_local = unsafe { pm_options_scope_local_get_mut(pm_scope, local_index) }; unsafe { pm_string_constant_init(pm_local, local.as_ptr().cast::(), local.len()) }; } @@ -311,9 +313,8 @@ impl Options { /// The C-level parse options. Created from [`Options::build`]. Must outlive /// any [`ParseResult`] created with [`parse_with_options`]. -#[derive(Debug)] pub struct ParseOptions { - options: pm_options_t, + options: *mut pm_options_t, // These CStrings back the constant pm_string_t values inside `options`. // They must not be dropped before `options` is freed. _filepath: Option, @@ -324,7 +325,7 @@ pub struct ParseOptions { impl Drop for ParseOptions { fn drop(&mut self) { - unsafe { pm_options_cleanup(&raw mut self.options) }; + unsafe { pm_options_free(self.options) }; } } @@ -369,7 +370,7 @@ pub fn parse(source: &[u8]) -> ParseResult<'_> { /// #[must_use] pub fn parse_with_options<'a>(source: &'a [u8], options: &'a ParseOptions) -> ParseResult<'a> { - unsafe { parse_impl(source, &raw const options.options) } + unsafe { parse_impl(source, options.options) } } #[cfg(test)] diff --git a/src/options.c b/src/options.c index b99db37ab5..ecdeba58f3 100644 --- a/src/options.c +++ b/src/options.c @@ -1,4 +1,4 @@ -#include "prism/options.h" +#include "prism/internal/options.h" #include "prism/compiler/inline.h" @@ -8,6 +8,49 @@ #include #include +/** + * Allocate a new options struct. If the options struct cannot be allocated, + * this function aborts the process. + */ +pm_options_t * +pm_options_new(void) { + pm_options_t *options = xcalloc(1, sizeof(pm_options_t)); + if (options == NULL) abort(); + return options; +} + +/** + * Free the internal memory associated with the options. + */ +void +pm_options_cleanup(pm_options_t *options) { + pm_string_cleanup(&options->filepath); + pm_string_cleanup(&options->encoding); + + for (size_t scope_index = 0; scope_index < options->scopes_count; scope_index++) { + pm_options_scope_t *scope = &options->scopes[scope_index]; + + for (size_t local_index = 0; local_index < scope->locals_count; local_index++) { + pm_string_cleanup(&scope->locals[local_index]); + } + + xfree_sized(scope->locals, scope->locals_count * sizeof(pm_string_t)); + } + + xfree_sized(options->scopes, options->scopes_count * sizeof(pm_options_scope_t)); +} + +/** + * Free both the held memory of the given options struct and the struct itself. + * + * @param options The options struct to free. + */ +void +pm_options_free(pm_options_t *options) { + pm_options_cleanup(options); + xfree(options); +} + /** * Set the shebang callback option on the given options struct. */ @@ -17,6 +60,14 @@ pm_options_shebang_callback_set(pm_options_t *options, pm_options_shebang_callba options->shebang_callback_data = shebang_callback_data; } +/** + * Get the filepath option on the given options struct. + */ +pm_string_t * +pm_options_filepath_get(pm_options_t *options) { + return &options->filepath; +} + /** * Set the filepath option on the given options struct. */ @@ -141,6 +192,24 @@ pm_options_version_set(pm_options_t *options, const char *version, size_t length return false; } +/** + * Set the version option on the given options struct to the lowest version of + * Ruby that prism supports. + */ +void +pm_options_version_set_lowest(pm_options_t *options) { + options->version = PM_OPTIONS_VERSION_CRUBY_3_3; +} + +/** + * Set the version option on the given options struct to the highest version of + * Ruby that prism supports. + */ +void +pm_options_version_set_highest(pm_options_t *options) { + options->version = PM_OPTIONS_VERSION_LATEST; +} + /** * Set the main script option on the given options struct. */ @@ -157,6 +226,14 @@ pm_options_partial_script_set(pm_options_t *options, bool partial_script) { options->partial_script = partial_script; } +/** + * Get the freeze option on the given options struct. + */ +bool +pm_options_freeze_get(const pm_options_t *options) { + return options->freeze; +} + /** * Set the freeze option on the given options struct. */ @@ -184,13 +261,23 @@ pm_options_scopes_init(pm_options_t *options, size_t scopes_count) { } /** - * Return a pointer to the scope at the given index within the given options. + * Return a constant pointer to the scope at the given index within the given + * options. */ const pm_options_scope_t * pm_options_scope_get(const pm_options_t *options, size_t index) { return &options->scopes[index]; } +/** + * Return a mutable pointer to the scope at the given index within the given + * options. + */ +pm_options_scope_t * +pm_options_scope_get_mut(pm_options_t *options, size_t index) { + return &options->scopes[index]; +} + /** * Create a new options scope struct. This will hold a set of locals that are in * scope surrounding the code that is being parsed. @@ -204,7 +291,8 @@ pm_options_scope_init(pm_options_scope_t *scope, size_t locals_count) { } /** - * Return a pointer to the local at the given index within the given scope. + * Return a constant pointer to the local at the given index within the given + * scope. */ const pm_string_t * pm_options_scope_local_get(const pm_options_scope_t *scope, size_t index) { @@ -212,32 +300,20 @@ pm_options_scope_local_get(const pm_options_scope_t *scope, size_t index) { } /** - * Set the forwarding option on the given scope struct. + * Return a mutable pointer to the local at the given index within the given + * scope. */ -void -pm_options_scope_forwarding_set(pm_options_scope_t *scope, uint8_t forwarding) { - scope->forwarding = forwarding; +pm_string_t * +pm_options_scope_local_get_mut(pm_options_scope_t *scope, size_t index) { + return &scope->locals[index]; } /** - * Free the internal memory associated with the options. + * Set the forwarding option on the given scope struct. */ void -pm_options_cleanup(pm_options_t *options) { - pm_string_cleanup(&options->filepath); - pm_string_cleanup(&options->encoding); - - for (size_t scope_index = 0; scope_index < options->scopes_count; scope_index++) { - pm_options_scope_t *scope = &options->scopes[scope_index]; - - for (size_t local_index = 0; local_index < scope->locals_count; local_index++) { - pm_string_cleanup(&scope->locals[local_index]); - } - - xfree_sized(scope->locals, scope->locals_count * sizeof(pm_string_t)); - } - - xfree_sized(options->scopes, options->scopes_count * sizeof(pm_options_scope_t)); +pm_options_scope_forwarding_set(pm_options_scope_t *scope, uint8_t forwarding) { + scope->forwarding = forwarding; } /** From d3153258af99d8db971445a3c083f3056d365f79 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 17 Mar 2026 20:04:20 -0400 Subject: [PATCH 048/100] Give full lifetime functions to parser --- cpp/test.cpp | 9 +++-- ext/prism/extension.c | 80 ++++++++++++++++++++----------------------- include/prism.h | 58 +++++++++++++++++++++---------- include/prism/node.h | 7 ++-- src/prism.c | 25 ++++++++++++++ 5 files changed, 108 insertions(+), 71 deletions(-) diff --git a/cpp/test.cpp b/cpp/test.cpp index 19a5731de9..8ef742d957 100644 --- a/cpp/test.cpp +++ b/cpp/test.cpp @@ -6,19 +6,18 @@ extern "C" { int main() { pm_arena_t arena = { 0 }; - pm_parser_t parser; - pm_parser_init(&arena, &parser, reinterpret_cast("1 + 2"), 5, NULL); + pm_parser_t *parser = pm_parser_new(&arena, reinterpret_cast("1 + 2"), 5, NULL); - pm_node_t *root = pm_parse(&parser); + pm_node_t *root = pm_parse(parser); pm_buffer_t *buffer = pm_buffer_new(); - pm_prettyprint(buffer, &parser, root); + pm_prettyprint(buffer, parser, root); std::string_view view(pm_buffer_value(buffer), pm_buffer_length(buffer)); std::cout << view << std::endl; pm_buffer_free(buffer); - pm_parser_cleanup(&parser); + pm_parser_free(parser); pm_arena_free(&arena); return 0; diff --git a/ext/prism/extension.c b/ext/prism/extension.c index 5c4e309d42..804e3f0728 100644 --- a/ext/prism/extension.c +++ b/ext/prism/extension.c @@ -373,15 +373,14 @@ dump_input(pm_string_t *input, const pm_options_t *options) { } pm_arena_t arena = { 0 }; - pm_parser_t parser; - pm_parser_init(&arena, &parser, pm_string_source(input), pm_string_length(input), options); + pm_parser_t *parser = pm_parser_new(&arena, pm_string_source(input), pm_string_length(input), options); - pm_node_t *node = pm_parse(&parser); - pm_serialize(&parser, node, buffer); + pm_node_t *node = pm_parse(parser); + pm_serialize(parser, node, buffer); VALUE result = rb_str_new(pm_buffer_value(buffer), pm_buffer_length(buffer)); pm_buffer_free(buffer); - pm_parser_cleanup(&parser); + pm_parser_free(parser); pm_arena_free(&arena); return result; @@ -737,13 +736,12 @@ parse_lex_encoding_changed_callback(pm_parser_t *parser) { static VALUE parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nodes) { pm_arena_t arena = { 0 }; - pm_parser_t parser; - pm_parser_init(&arena, &parser, pm_string_source(input), pm_string_length(input), options); - pm_parser_register_encoding_changed_callback(&parser, parse_lex_encoding_changed_callback); + pm_parser_t *parser = pm_parser_new(&arena, pm_string_source(input), pm_string_length(input), options); + pm_parser_register_encoding_changed_callback(parser, parse_lex_encoding_changed_callback); VALUE source_string = rb_str_new((const char *) pm_string_source(input), pm_string_length(input)); - VALUE offsets = rb_ary_new_capa(parser.line_offsets.size); - VALUE source = rb_funcall(rb_cPrismSource, rb_id_source_for, 3, source_string, LONG2NUM(parser.start_line), offsets); + VALUE offsets = rb_ary_new_capa(parser->line_offsets.size); + VALUE source = rb_funcall(rb_cPrismSource, rb_id_source_for, 3, source_string, LONG2NUM(parser->start_line), offsets); parse_lex_data_t parse_lex_data = { .source = source, @@ -758,18 +756,18 @@ parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nod .callback = parse_lex_token, }; - parser.lex_callback = &lex_callback; - pm_node_t *node = pm_parse(&parser); + parser->lex_callback = &lex_callback; + pm_node_t *node = pm_parse(parser); // Here we need to update the Source object to have the correct // encoding for the source string and the correct newline offsets. // We do it here because we've already created the Source object and given // it over to all of the tokens, and both of these are only set after pm_parse(). - rb_encoding *encoding = rb_enc_find(pm_parser_encoding_name(&parser)); + rb_encoding *encoding = rb_enc_find(pm_parser_encoding_name(parser)); rb_enc_associate(source_string, encoding); - for (size_t index = 0; index < parser.line_offsets.size; index++) { - rb_ary_push(offsets, ULONG2NUM(parser.line_offsets.offsets[index])); + for (size_t index = 0; index < parser->line_offsets.size; index++) { + rb_ary_push(offsets, ULONG2NUM(parser->line_offsets.offsets[index])); } if (pm_options_freeze_get(options)) { @@ -782,15 +780,15 @@ parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nod VALUE result; if (return_nodes) { VALUE value = rb_ary_new_capa(2); - rb_ary_push(value, pm_ast_new(&parser, node, parse_lex_data.encoding, source, pm_options_freeze_get(options))); + rb_ary_push(value, pm_ast_new(parser, node, parse_lex_data.encoding, source, pm_options_freeze_get(options))); rb_ary_push(value, parse_lex_data.tokens); if (pm_options_freeze_get(options)) rb_obj_freeze(value); - result = parse_result_create(rb_cPrismParseLexResult, &parser, value, parse_lex_data.encoding, source, pm_options_freeze_get(options)); + result = parse_result_create(rb_cPrismParseLexResult, parser, value, parse_lex_data.encoding, source, pm_options_freeze_get(options)); } else { - result = parse_result_create(rb_cPrismLexResult, &parser, parse_lex_data.tokens, parse_lex_data.encoding, source, pm_options_freeze_get(options)); + result = parse_result_create(rb_cPrismLexResult, parser, parse_lex_data.tokens, parse_lex_data.encoding, source, pm_options_freeze_get(options)); } - pm_parser_cleanup(&parser); + pm_parser_free(parser); pm_arena_free(&arena); return result; @@ -850,22 +848,21 @@ lex_file(int argc, VALUE *argv, VALUE self) { static VALUE parse_input(pm_string_t *input, const pm_options_t *options) { pm_arena_t arena = { 0 }; - pm_parser_t parser; - pm_parser_init(&arena, &parser, pm_string_source(input), pm_string_length(input), options); + pm_parser_t *parser = pm_parser_new(&arena, pm_string_source(input), pm_string_length(input), options); - pm_node_t *node = pm_parse(&parser); - rb_encoding *encoding = rb_enc_find(pm_parser_encoding_name(&parser)); + pm_node_t *node = pm_parse(parser); + rb_encoding *encoding = rb_enc_find(pm_parser_encoding_name(parser)); bool freeze = pm_options_freeze_get(options); - VALUE source = pm_source_new(&parser, encoding, freeze); - VALUE value = pm_ast_new(&parser, node, encoding, source, freeze); - VALUE result = parse_result_create(rb_cPrismParseResult, &parser, value, encoding, source, freeze); + VALUE source = pm_source_new(parser, encoding, freeze); + VALUE value = pm_ast_new(parser, node, encoding, source, freeze); + VALUE result = parse_result_create(rb_cPrismParseResult, parser, value, encoding, source, freeze); if (freeze) { rb_obj_freeze(source); } - pm_parser_cleanup(&parser); + pm_parser_free(parser); pm_arena_free(&arena); return result; @@ -969,11 +966,10 @@ parse_file(int argc, VALUE *argv, VALUE self) { static void profile_input(pm_string_t *input, const pm_options_t *options) { pm_arena_t arena = { 0 }; - pm_parser_t parser; - pm_parser_init(&arena, &parser, pm_string_source(input), pm_string_length(input), options); + pm_parser_t *parser = pm_parser_new(&arena, pm_string_source(input), pm_string_length(input), options); - pm_parse(&parser); - pm_parser_cleanup(&parser); + pm_parse(parser); + pm_parser_free(parser); pm_arena_free(&arena); } @@ -1094,16 +1090,15 @@ parse_stream(int argc, VALUE *argv, VALUE self) { static VALUE parse_input_comments(pm_string_t *input, const pm_options_t *options) { pm_arena_t arena = { 0 }; - pm_parser_t parser; - pm_parser_init(&arena, &parser, pm_string_source(input), pm_string_length(input), options); + pm_parser_t *parser = pm_parser_new(&arena, pm_string_source(input), pm_string_length(input), options); - pm_parse(&parser); - rb_encoding *encoding = rb_enc_find(pm_parser_encoding_name(&parser)); + pm_parse(parser); + rb_encoding *encoding = rb_enc_find(pm_parser_encoding_name(parser)); - VALUE source = pm_source_new(&parser, encoding, pm_options_freeze_get(options)); - VALUE comments = parser_comments(&parser, source, pm_options_freeze_get(options)); + VALUE source = pm_source_new(parser, encoding, pm_options_freeze_get(options)); + VALUE comments = parser_comments(parser, source, pm_options_freeze_get(options)); - pm_parser_cleanup(&parser); + pm_parser_free(parser); pm_arena_free(&arena); return comments; @@ -1217,13 +1212,12 @@ parse_lex_file(int argc, VALUE *argv, VALUE self) { static VALUE parse_input_success_p(pm_string_t *input, const pm_options_t *options) { pm_arena_t arena = { 0 }; - pm_parser_t parser; - pm_parser_init(&arena, &parser, pm_string_source(input), pm_string_length(input), options); + pm_parser_t *parser = pm_parser_new(&arena, pm_string_source(input), pm_string_length(input), options); - pm_parse(&parser); + pm_parse(parser); - VALUE result = parser.error_list.size == 0 ? Qtrue : Qfalse; - pm_parser_cleanup(&parser); + VALUE result = parser->error_list.size == 0 ? Qtrue : Qfalse; + pm_parser_free(parser); pm_arena_free(&arena); return result; diff --git a/include/prism.h b/include/prism.h index 68b21122d3..631563a996 100644 --- a/include/prism.h +++ b/include/prism.h @@ -31,21 +31,34 @@ PRISM_EXPORTED_FUNCTION const char * pm_version(void); /** * Initialize a parser with the given start and end pointers. * - * The resulting parser must eventually be freed with `pm_parser_cleanup()`. The - * arena is caller-owned and must outlive the parser — `pm_parser_cleanup()` does - * not free the arena. - * - * @param arena The arena to use for all AST-lifetime allocations. + * @param arena The arena to use for all AST-lifetime allocations. It is caller- + * owned and must outlive the parser. * @param parser The parser to initialize. * @param source The source to parse. * @param size The size of the source. * @param options The optional options to use when parsing. These options must - * live for the whole lifetime of this parser. + * live for the whole lifetime of this parser. * * \public \memberof pm_parser */ PRISM_EXPORTED_FUNCTION void pm_parser_init(pm_arena_t *arena, pm_parser_t *parser, const uint8_t *source, size_t size, const pm_options_t *options); +/** + * Allocate and initialize a parser with the given start and end pointers. + * + * @param arena The arena to use for all AST-lifetime allocations. It is caller- + * owned and must outlive the parser. + * @param source The source to parse. + * @param size The size of the source. + * @param options The optional options to use when parsing. These options must + * live for the whole lifetime of this parser. + * @return The initialized parser. It is the responsibility of the caller to + * free the parser with `pm_parser_free()`. + * + * \public \memberof pm_parser + */ +PRISM_EXPORTED_FUNCTION pm_parser_t * pm_parser_new(pm_arena_t *arena, const uint8_t *source, size_t size, const pm_options_t *options); + /** * Register a callback that will be called whenever prism changes the encoding * it is using to parse based on the magic comment. @@ -58,17 +71,26 @@ PRISM_EXPORTED_FUNCTION void pm_parser_init(pm_arena_t *arena, pm_parser_t *pars PRISM_EXPORTED_FUNCTION void pm_parser_register_encoding_changed_callback(pm_parser_t *parser, pm_encoding_changed_callback_t callback); /** - * Free any memory associated with the given parser. + * Free the memory held by the given parser. * * This does not free the `pm_options_t` object that was used to initialize the * parser. * - * @param parser The parser to free. + * @param parser The parser whose held memory should be freed. * * \public \memberof pm_parser */ PRISM_EXPORTED_FUNCTION void pm_parser_cleanup(pm_parser_t *parser); +/** + * Free both the memory held by the given parser and the parser itself. + * + * @param parser The parser to free. + * + * \public \memberof pm_parser + */ +PRISM_EXPORTED_FUNCTION void pm_parser_free(pm_parser_t *parser); + /** * Initiate the parser with the given parser. * @@ -271,9 +293,9 @@ PRISM_EXPORTED_FUNCTION void pm_dump_json(pm_buffer_t *buffer, const pm_parser_t * * * `pm_arena_t` - the arena allocator for AST-lifetime memory * * `pm_parser_t` - the main parser structure - * * `pm_parser_init()` - initialize a parser + * * `pm_parser_new()` - allocate and initialize a new parser * * `pm_parse()` - parse and return the root node - * * `pm_parser_cleanup()` - free the internal memory of the parser + * * `pm_parser_free()` - free the parser and its internal memory * * `pm_arena_free()` - free all AST-lifetime memory * * Putting all of this together would look something like: @@ -281,13 +303,12 @@ PRISM_EXPORTED_FUNCTION void pm_dump_json(pm_buffer_t *buffer, const pm_parser_t * ```c * void parse(const uint8_t *source, size_t length) { * pm_arena_t arena = { 0 }; - * pm_parser_t parser; - * pm_parser_init(&arena, &parser, source, length, NULL); + * pm_parser_t *parser = pm_parser_new(&arena, source, length, NULL); * - * pm_node_t *root = pm_parse(&parser); + * pm_node_t *root = pm_parse(parser); * printf("PARSED!\n"); * - * pm_parser_cleanup(&parser); + * pm_parser_free(parser); * pm_arena_free(&arena); * } * ``` @@ -331,17 +352,16 @@ PRISM_EXPORTED_FUNCTION void pm_dump_json(pm_buffer_t *buffer, const pm_parser_t * ```c * void prettyprint(const uint8_t *source, size_t length) { * pm_arena_t arena = { 0 }; - * pm_parser_t parser; - * pm_parser_init(&arena, &parser, source, length, NULL); + * pm_parser_t *parser = pm_parser_new(&arena, source, length, NULL); * - * pm_node_t *root = pm_parse(&parser); + * pm_node_t *root = pm_parse(parser); * pm_buffer_t *buffer = pm_buffer_new(); * - * pm_prettyprint(buffer, &parser, root); + * pm_prettyprint(buffer, parser, root); * printf("%*.s\n", (int) pm_buffer_length(buffer), pm_buffer_value(buffer)); * * pm_buffer_free(buffer); - * pm_parser_cleanup(&parser); + * pm_parser_free(parser); * pm_arena_free(&arena); * } * ``` diff --git a/include/prism/node.h b/include/prism/node.h index 576919047c..ac7c337ad9 100644 --- a/include/prism/node.h +++ b/include/prism/node.h @@ -84,16 +84,15 @@ PRISM_EXPORTED_FUNCTION const char * pm_node_type_to_str(pm_node_type_t node_typ * pm_arena_t arena = { 0 }; * pm_options_t *options = pm_options_new(); * - * pm_parser_t parser; - * pm_parser_init(&arena, &parser, (const uint8_t *) source, size, options); + * pm_parser_t *parser = pm_parser_new(&arena, (const uint8_t *) source, size, options); * * size_t indent = 0; - * pm_node_t *node = pm_parse(&parser); + * pm_node_t *node = pm_parse(parser); * * size_t *data = &indent; * pm_visit_node(node, visit, data); * - * pm_parser_cleanup(&parser); + * pm_parser_free(parser); * pm_options_free(options); * pm_arena_free(&arena); * diff --git a/src/prism.c b/src/prism.c index 06cfc14b9a..9b9092f2a6 100644 --- a/src/prism.c +++ b/src/prism.c @@ -22475,6 +22475,22 @@ pm_parser_init(pm_arena_t *arena, pm_parser_t *parser, const uint8_t *source, si parser->encoding_comment_start += pm_strspn_inline_whitespace(parser->encoding_comment_start, parser->end - parser->encoding_comment_start); } +/** + * Allocate and initialize a parser with the given start and end pointers. + * + * The resulting parser must eventually be freed with `pm_parser_free()`. The + * arena is caller-owned and must outlive the parser — `pm_parser_cleanup()` + * does not free the arena. + */ +pm_parser_t * +pm_parser_new(pm_arena_t *arena, const uint8_t *source, size_t size, const pm_options_t *options) { + pm_parser_t *parser = (pm_parser_t *) xmalloc(sizeof(pm_parser_t)); + if (parser == NULL) abort(); + + pm_parser_init(arena, parser, source, size, options); + return parser; +} + /** * Register a callback that will be called whenever prism changes the encoding * it is using to parse based on the magic comment. @@ -22505,6 +22521,15 @@ pm_parser_cleanup(pm_parser_t *parser) { } } +/** + * Free both the memory held by the given parser and the parser itself. + */ +void +pm_parser_free(pm_parser_t *parser) { + pm_parser_cleanup(parser); + xfree(parser); +} + /** * Returns true if the given diagnostic ID represents an error that cannot be * fixed by appending more input. These are errors where the existing source From 035061c5062181f33e69f5c08f48a54d042d3258 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 18 Mar 2026 08:25:57 -0400 Subject: [PATCH 049/100] Update pm_parse_stream API to make parser opaque --- ext/prism/extension.c | 14 +++++++------- include/prism.h | 4 ++-- lib/prism/ffi.rb | 7 ++++++- src/prism.c | 24 +++++++++++++----------- 4 files changed, 28 insertions(+), 21 deletions(-) diff --git a/ext/prism/extension.c b/ext/prism/extension.c index 804e3f0728..f2ac4e7766 100644 --- a/ext/prism/extension.c +++ b/ext/prism/extension.c @@ -1066,18 +1066,18 @@ parse_stream(int argc, VALUE *argv, VALUE self) { extract_options(options, Qnil, keywords); pm_arena_t arena = { 0 }; - pm_parser_t parser; + pm_parser_t *parser; pm_buffer_t *buffer = pm_buffer_new(); - pm_node_t *node = pm_parse_stream(&arena, &parser, buffer, (void *) stream, parse_stream_fgets, parse_stream_eof, options); - rb_encoding *encoding = rb_enc_find(pm_parser_encoding_name(&parser)); + pm_node_t *node = pm_parse_stream(&parser, &arena, buffer, (void *) stream, parse_stream_fgets, parse_stream_eof, options); + rb_encoding *encoding = rb_enc_find(pm_parser_encoding_name(parser)); - VALUE source = pm_source_new(&parser, encoding, pm_options_freeze_get(options)); - VALUE value = pm_ast_new(&parser, node, encoding, source, pm_options_freeze_get(options)); - VALUE result = parse_result_create(rb_cPrismParseResult, &parser, value, encoding, source, pm_options_freeze_get(options)); + VALUE source = pm_source_new(parser, encoding, pm_options_freeze_get(options)); + VALUE value = pm_ast_new(parser, node, encoding, source, pm_options_freeze_get(options)); + VALUE result = parse_result_create(rb_cPrismParseResult, parser, value, encoding, source, pm_options_freeze_get(options)); pm_buffer_free(buffer); - pm_parser_cleanup(&parser); + pm_parser_free(parser); pm_arena_free(&arena); pm_options_free(options); diff --git a/include/prism.h b/include/prism.h index 631563a996..768c7312a8 100644 --- a/include/prism.h +++ b/include/prism.h @@ -118,8 +118,8 @@ typedef int (pm_parse_stream_feof_t)(void *stream); /** * Parse a stream of Ruby source and return the tree. * + * @param parser The out parameter to write the parser to. * @param arena The arena to use for all AST-lifetime allocations. - * @param parser The parser to use. * @param buffer The buffer to use. * @param stream The stream to parse. * @param stream_fgets The function to use to read from the stream. @@ -129,7 +129,7 @@ typedef int (pm_parse_stream_feof_t)(void *stream); * * \public \memberof pm_parser */ -PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse_stream(pm_arena_t *arena, pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const pm_options_t *options); +PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse_stream(pm_parser_t **parser, pm_arena_t *arena, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const pm_options_t *options); // We optionally support serializing to a binary string. For systems that don't // want or need this functionality, it can be turned off with the diff --git a/lib/prism/ffi.rb b/lib/prism/ffi.rb index 5f5c55378a..d4a9ad0302 100644 --- a/lib/prism/ffi.rb +++ b/lib/prism/ffi.rb @@ -99,10 +99,15 @@ def self.load_exported_functions_from(header, *functions, callbacks) "pm_serialize_lex", "pm_serialize_parse_lex", "pm_parse_success_p", + [:pm_parse_stream_fgets_t, :pm_parse_stream_feof_t] + ) + + load_exported_functions_from( + "prism/string_query.h", "pm_string_query_local", "pm_string_query_constant", "pm_string_query_method_name", - [:pm_parse_stream_fgets_t, :pm_parse_stream_feof_t] + [] ) load_exported_functions_from( diff --git a/src/prism.c b/src/prism.c index 9b9092f2a6..c0363afd9c 100644 --- a/src/prism.c +++ b/src/prism.c @@ -22763,21 +22763,23 @@ pm_parse_stream_read(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t * can stream stdin in to Ruby so we need to support a streaming API. */ pm_node_t * -pm_parse_stream(pm_arena_t *arena, pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const pm_options_t *options) { +pm_parse_stream(pm_parser_t **parser, pm_arena_t *arena, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const pm_options_t *options) { bool eof = pm_parse_stream_read(buffer, stream, stream_fgets, stream_feof); - pm_parser_init(arena, parser, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options); - pm_node_t *node = pm_parse(parser); + pm_parser_t *tmp = pm_parser_new(arena, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options); + pm_node_t *node = pm_parse(tmp); - while (!eof && parser->error_list.size > 0) { + while (!eof && tmp->error_list.size > 0) { eof = pm_parse_stream_read(buffer, stream, stream_fgets, stream_feof); - pm_parser_cleanup(parser); + pm_parser_free(tmp); pm_arena_free(arena); - pm_parser_init(arena, parser, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options); - node = pm_parse(parser); + + tmp = pm_parser_new(arena, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options); + node = pm_parse(tmp); } + *parser = tmp; return node; } @@ -22863,18 +22865,18 @@ pm_serialize_parse(pm_buffer_t *buffer, const uint8_t *source, size_t size, cons void pm_serialize_parse_stream(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const char *data) { pm_arena_t arena = { 0 }; - pm_parser_t parser; + pm_parser_t *parser; pm_options_t options = { 0 }; pm_options_read(&options, data); pm_buffer_t *parser_buffer = pm_buffer_new(); - pm_node_t *node = pm_parse_stream(&arena, &parser, parser_buffer, stream, stream_fgets, stream_feof, &options); + pm_node_t *node = pm_parse_stream(&parser, &arena, parser_buffer, stream, stream_fgets, stream_feof, &options); pm_serialize_header(buffer); - pm_serialize_content(&parser, node, buffer); + pm_serialize_content(parser, node, buffer); pm_buffer_append_byte(buffer, '\0'); pm_buffer_free(parser_buffer); - pm_parser_cleanup(&parser); + pm_parser_free(parser); pm_arena_free(&arena); pm_options_cleanup(&options); } From 3da08fd638649c2f327c58d33e54f9e6db0166ad Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 18 Mar 2026 09:55:47 -0400 Subject: [PATCH 050/100] Make parser an opaque pointer --- docs/encoding.md | 4 +- ext/prism/extension.c | 55 +- include/prism.h | 12 +- include/prism/internal/constant_pool.h | 2 + include/prism/internal/parser.h | 935 +++++++++++++++++++++ include/prism/parser.h | 1025 +++--------------------- src/parser.c | 135 +++- src/prism.c | 14 +- src/regexp.c | 1 + src/strpbrk.c | 1 + templates/ext/prism/api_node.c.erb | 22 +- templates/src/node.c.erb | 2 +- templates/src/prettyprint.c.erb | 2 + templates/src/serialize.c.erb | 19 +- 14 files changed, 1244 insertions(+), 985 deletions(-) create mode 100644 include/prism/internal/parser.h diff --git a/docs/encoding.md b/docs/encoding.md index a9090a9880..8f2fd18d0a 100644 --- a/docs/encoding.md +++ b/docs/encoding.md @@ -107,7 +107,7 @@ For each of these encodings, prism provides functions for checking if the subseq ## Getting notified when the encoding changes -You may want to get notified when the encoding changes based on the result of parsing an encoding comment. We use this internally for our `lex` function in order to provide the correct encodings for the tokens that are returned. For that you can register a callback with `pm_parser_register_encoding_changed_callback`. The callback will be called with a pointer to the parser. The encoding can be accessed through `parser->encoding`. +You may want to get notified when the encoding changes based on the result of parsing an encoding comment. We use this internally for our `lex` function in order to provide the correct encodings for the tokens that are returned. For that you can register a callback with `pm_parser_encoding_changed_callback_set`. The callback will be called with a pointer to the parser. The encoding can be accessed through `parser->encoding`. ```c // When the encoding that is being used to parse the source is changed by prism, @@ -117,5 +117,5 @@ typedef void (*pm_encoding_changed_callback_t)(pm_parser_t *parser); // Register a callback that will be called whenever prism changes the encoding // it is using to parse based on the magic comment. PRISM_EXPORTED_FUNCTION void -pm_parser_register_encoding_changed_callback(pm_parser_t *parser, pm_encoding_changed_callback_t callback); +pm_parser_encoding_changed_callback_set(pm_parser_t *parser, pm_encoding_changed_callback_t callback); ``` diff --git a/ext/prism/extension.c b/ext/prism/extension.c index f2ac4e7766..9b03387fc9 100644 --- a/ext/prism/extension.c +++ b/ext/prism/extension.c @@ -490,10 +490,11 @@ parser_comment(VALUE source, bool freeze, const pm_comment_t *comment) { */ static VALUE parser_comments(const pm_parser_t *parser, VALUE source, bool freeze) { - VALUE comments = rb_ary_new_capa(parser->comment_list.size); + const pm_list_t *comments_list = pm_parser_comments(parser); + VALUE comments = rb_ary_new_capa(comments_list->size); for ( - const pm_comment_t *comment = (const pm_comment_t *) parser->comment_list.head; + const pm_comment_t *comment = (const pm_comment_t *) comments_list->head; comment != NULL; comment = (const pm_comment_t *) comment->node.next ) { @@ -521,10 +522,11 @@ parser_magic_comment(VALUE source, bool freeze, const pm_magic_comment_t *magic_ */ static VALUE parser_magic_comments(const pm_parser_t *parser, VALUE source, bool freeze) { - VALUE magic_comments = rb_ary_new_capa(parser->magic_comment_list.size); + const pm_list_t *magic_comments_list = pm_parser_magic_comments(parser); + VALUE magic_comments = rb_ary_new_capa(magic_comments_list->size); for ( - const pm_magic_comment_t *magic_comment = (const pm_magic_comment_t *) parser->magic_comment_list.head; + const pm_magic_comment_t *magic_comment = (const pm_magic_comment_t *) magic_comments_list->head; magic_comment != NULL; magic_comment = (const pm_magic_comment_t *) magic_comment->node.next ) { @@ -542,10 +544,12 @@ parser_magic_comments(const pm_parser_t *parser, VALUE source, bool freeze) { */ static VALUE parser_data_loc(const pm_parser_t *parser, VALUE source, bool freeze) { - if (parser->data_loc.length == 0) { + const pm_location_t *data_loc = pm_parser_data_loc(parser); + + if (data_loc->length == 0) { return Qnil; } else { - return parser_location(source, freeze, parser->data_loc.start, parser->data_loc.length); + return parser_location(source, freeze, data_loc->start, data_loc->length); } } @@ -554,10 +558,11 @@ parser_data_loc(const pm_parser_t *parser, VALUE source, bool freeze) { */ static VALUE parser_errors(const pm_parser_t *parser, rb_encoding *encoding, VALUE source, bool freeze) { - VALUE errors = rb_ary_new_capa(parser->error_list.size); + const pm_list_t *error_list = pm_parser_errors(parser); + VALUE errors = rb_ary_new_capa(error_list->size); for ( - const pm_diagnostic_t *error = (const pm_diagnostic_t *) parser->error_list.head; + const pm_diagnostic_t *error = (const pm_diagnostic_t *) error_list->head; error != NULL; error = (const pm_diagnostic_t *) error->node.next ) { @@ -594,10 +599,11 @@ parser_errors(const pm_parser_t *parser, rb_encoding *encoding, VALUE source, bo */ static VALUE parser_warnings(const pm_parser_t *parser, rb_encoding *encoding, VALUE source, bool freeze) { - VALUE warnings = rb_ary_new_capa(parser->warning_list.size); + const pm_list_t *warning_list = pm_parser_warnings(parser); + VALUE warnings = rb_ary_new_capa(warning_list->size); for ( - const pm_diagnostic_t *warning = (const pm_diagnostic_t *) parser->warning_list.head; + const pm_diagnostic_t *warning = (const pm_diagnostic_t *) warning_list->head; warning != NULL; warning = (const pm_diagnostic_t *) warning->node.next ) { @@ -638,7 +644,7 @@ parse_result_create(VALUE class, const pm_parser_t *parser, VALUE value, rb_enco parser_data_loc(parser, source, freeze), parser_errors(parser, encoding, source, freeze), parser_warnings(parser, encoding, source, freeze), - parser->continuable ? Qtrue : Qfalse, + pm_parser_continuable(parser) ? Qtrue : Qfalse, source }; @@ -667,11 +673,11 @@ typedef struct { * onto the tokens array. */ static void -parse_lex_token(void *data, pm_parser_t *parser, pm_token_t *token) { - parse_lex_data_t *parse_lex_data = (parse_lex_data_t *) parser->lex_callback->data; +parse_lex_token(pm_parser_t *parser, pm_token_t *token, void *data) { + parse_lex_data_t *parse_lex_data = (parse_lex_data_t *) data; VALUE value = pm_token_new(parser, token, parse_lex_data->encoding, parse_lex_data->source, parse_lex_data->freeze); - VALUE yields = rb_assoc_new(value, INT2FIX(parser->lex_state)); + VALUE yields = rb_assoc_new(value, INT2FIX(pm_parser_lex_state(parser))); if (parse_lex_data->freeze) { rb_obj_freeze(value); @@ -688,7 +694,7 @@ parse_lex_token(void *data, pm_parser_t *parser, pm_token_t *token) { */ static void parse_lex_encoding_changed_callback(pm_parser_t *parser) { - parse_lex_data_t *parse_lex_data = (parse_lex_data_t *) parser->lex_callback->data; + parse_lex_data_t *parse_lex_data = (parse_lex_data_t *) pm_parser_lex_callback_data(parser); parse_lex_data->encoding = rb_enc_find(pm_parser_encoding_name(parser)); // Since the encoding changed, we need to go back and change the encoding of @@ -737,11 +743,11 @@ static VALUE parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nodes) { pm_arena_t arena = { 0 }; pm_parser_t *parser = pm_parser_new(&arena, pm_string_source(input), pm_string_length(input), options); - pm_parser_register_encoding_changed_callback(parser, parse_lex_encoding_changed_callback); + pm_parser_encoding_changed_callback_set(parser, parse_lex_encoding_changed_callback); VALUE source_string = rb_str_new((const char *) pm_string_source(input), pm_string_length(input)); - VALUE offsets = rb_ary_new_capa(parser->line_offsets.size); - VALUE source = rb_funcall(rb_cPrismSource, rb_id_source_for, 3, source_string, LONG2NUM(parser->start_line), offsets); + VALUE offsets = rb_ary_new_capa(pm_parser_line_offsets(parser)->size); + VALUE source = rb_funcall(rb_cPrismSource, rb_id_source_for, 3, source_string, LONG2NUM(pm_parser_start_line(parser)), offsets); parse_lex_data_t parse_lex_data = { .source = source, @@ -751,12 +757,8 @@ parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nod }; parse_lex_data_t *data = &parse_lex_data; - pm_lex_callback_t lex_callback = (pm_lex_callback_t) { - .data = (void *) data, - .callback = parse_lex_token, - }; + pm_parser_lex_callback_set(parser, parse_lex_token, data); - parser->lex_callback = &lex_callback; pm_node_t *node = pm_parse(parser); // Here we need to update the Source object to have the correct @@ -766,8 +768,9 @@ parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nod rb_encoding *encoding = rb_enc_find(pm_parser_encoding_name(parser)); rb_enc_associate(source_string, encoding); - for (size_t index = 0; index < parser->line_offsets.size; index++) { - rb_ary_push(offsets, ULONG2NUM(parser->line_offsets.offsets[index])); + const pm_line_offset_list_t *line_offsets = pm_parser_line_offsets(parser); + for (size_t index = 0; index < line_offsets->size; index++) { + rb_ary_push(offsets, ULONG2NUM(line_offsets->offsets[index])); } if (pm_options_freeze_get(options)) { @@ -1216,7 +1219,7 @@ parse_input_success_p(pm_string_t *input, const pm_options_t *options) { pm_parse(parser); - VALUE result = parser->error_list.size == 0 ? Qtrue : Qfalse; + VALUE result = pm_parser_errors(parser)->size == 0 ? Qtrue : Qfalse; pm_parser_free(parser); pm_arena_free(&arena); diff --git a/include/prism.h b/include/prism.h index 768c7312a8..3a35b257d6 100644 --- a/include/prism.h +++ b/include/prism.h @@ -13,6 +13,7 @@ extern "C" { #include "prism/arena.h" #include "prism/ast.h" #include "prism/diagnostic.h" +#include "prism/encoding.h" #include "prism/excludes.h" #include "prism/node.h" #include "prism/options.h" @@ -59,17 +60,6 @@ PRISM_EXPORTED_FUNCTION void pm_parser_init(pm_arena_t *arena, pm_parser_t *pars */ PRISM_EXPORTED_FUNCTION pm_parser_t * pm_parser_new(pm_arena_t *arena, const uint8_t *source, size_t size, const pm_options_t *options); -/** - * Register a callback that will be called whenever prism changes the encoding - * it is using to parse based on the magic comment. - * - * @param parser The parser to register the callback with. - * @param callback The callback to register. - * - * \public \memberof pm_parser - */ -PRISM_EXPORTED_FUNCTION void pm_parser_register_encoding_changed_callback(pm_parser_t *parser, pm_encoding_changed_callback_t callback); - /** * Free the memory held by the given parser. * diff --git a/include/prism/internal/constant_pool.h b/include/prism/internal/constant_pool.h index b204dfe39f..03671938a6 100644 --- a/include/prism/internal/constant_pool.h +++ b/include/prism/internal/constant_pool.h @@ -12,6 +12,8 @@ #include "prism/constant_pool.h" +#include "prism/arena.h" + /** * Initialize a list of constant ids. * diff --git a/include/prism/internal/parser.h b/include/prism/internal/parser.h new file mode 100644 index 0000000000..f56dee8964 --- /dev/null +++ b/include/prism/internal/parser.h @@ -0,0 +1,935 @@ +/** + * @file internal/parser.h + * + * The parser used to parse Ruby source. + */ +#ifndef PRISM_INTERNAL_PARSER_H +#define PRISM_INTERNAL_PARSER_H + +#include "prism/compiler/accel.h" + +#include "prism/arena.h" +#include "prism/ast.h" +#include "prism/encoding.h" +#include "prism/line_offset_list.h" +#include "prism/list.h" +#include "prism/options.h" +#include "prism/parser.h" +#include "prism/static_literals.h" + +#include +#include +#include + +/** + * This enum provides various bits that represent different kinds of states that + * the lexer can track. This is used to determine which kind of token to return + * based on the context of the parser. + */ +typedef enum { + PM_LEX_STATE_BIT_BEG, + PM_LEX_STATE_BIT_END, + PM_LEX_STATE_BIT_ENDARG, + PM_LEX_STATE_BIT_ENDFN, + PM_LEX_STATE_BIT_ARG, + PM_LEX_STATE_BIT_CMDARG, + PM_LEX_STATE_BIT_MID, + PM_LEX_STATE_BIT_FNAME, + PM_LEX_STATE_BIT_DOT, + PM_LEX_STATE_BIT_CLASS, + PM_LEX_STATE_BIT_LABEL, + PM_LEX_STATE_BIT_LABELED, + PM_LEX_STATE_BIT_FITEM +} pm_lex_state_bit_t; + +/** + * This enum combines the various bits from the above enum into individual + * values that represent the various states of the lexer. + */ +typedef enum { + PM_LEX_STATE_NONE = 0, + PM_LEX_STATE_BEG = (1 << PM_LEX_STATE_BIT_BEG), + PM_LEX_STATE_END = (1 << PM_LEX_STATE_BIT_END), + PM_LEX_STATE_ENDARG = (1 << PM_LEX_STATE_BIT_ENDARG), + PM_LEX_STATE_ENDFN = (1 << PM_LEX_STATE_BIT_ENDFN), + PM_LEX_STATE_ARG = (1 << PM_LEX_STATE_BIT_ARG), + PM_LEX_STATE_CMDARG = (1 << PM_LEX_STATE_BIT_CMDARG), + PM_LEX_STATE_MID = (1 << PM_LEX_STATE_BIT_MID), + PM_LEX_STATE_FNAME = (1 << PM_LEX_STATE_BIT_FNAME), + PM_LEX_STATE_DOT = (1 << PM_LEX_STATE_BIT_DOT), + PM_LEX_STATE_CLASS = (1 << PM_LEX_STATE_BIT_CLASS), + PM_LEX_STATE_LABEL = (1 << PM_LEX_STATE_BIT_LABEL), + PM_LEX_STATE_LABELED = (1 << PM_LEX_STATE_BIT_LABELED), + PM_LEX_STATE_FITEM = (1 << PM_LEX_STATE_BIT_FITEM), + PM_LEX_STATE_BEG_ANY = PM_LEX_STATE_BEG | PM_LEX_STATE_MID | PM_LEX_STATE_CLASS, + PM_LEX_STATE_ARG_ANY = PM_LEX_STATE_ARG | PM_LEX_STATE_CMDARG, + PM_LEX_STATE_END_ANY = PM_LEX_STATE_END | PM_LEX_STATE_ENDARG | PM_LEX_STATE_ENDFN +} pm_lex_state_t; + +/** + * The type of quote that a heredoc uses. + */ +typedef enum { + PM_HEREDOC_QUOTE_NONE, + PM_HEREDOC_QUOTE_SINGLE = '\'', + PM_HEREDOC_QUOTE_DOUBLE = '"', + PM_HEREDOC_QUOTE_BACKTICK = '`', +} pm_heredoc_quote_t; + +/** + * The type of indentation that a heredoc uses. + */ +typedef enum { + PM_HEREDOC_INDENT_NONE, + PM_HEREDOC_INDENT_DASH, + PM_HEREDOC_INDENT_TILDE, +} pm_heredoc_indent_t; + +/** + * All of the information necessary to store to lexing a heredoc. + */ +typedef struct { + /** A pointer to the start of the heredoc identifier. */ + const uint8_t *ident_start; + + /** The length of the heredoc identifier. */ + size_t ident_length; + + /** The type of quote that the heredoc uses. */ + pm_heredoc_quote_t quote; + + /** The type of indentation that the heredoc uses. */ + pm_heredoc_indent_t indent; +} pm_heredoc_lex_mode_t; + +/** + * When lexing Ruby source, the lexer has a small amount of state to tell which + * kind of token it is currently lexing. For example, when we find the start of + * a string, the first token that we return is a TOKEN_STRING_BEGIN token. After + * that the lexer is now in the PM_LEX_STRING mode, and will return tokens that + * are found as part of a string. + */ +typedef struct pm_lex_mode { + /** The type of this lex mode. */ + enum { + /** This state is used when any given token is being lexed. */ + PM_LEX_DEFAULT, + + /** + * This state is used when we're lexing as normal but inside an embedded + * expression of a string. + */ + PM_LEX_EMBEXPR, + + /** + * This state is used when we're lexing a variable that is embedded + * directly inside of a string with the # shorthand. + */ + PM_LEX_EMBVAR, + + /** This state is used when you are inside the content of a heredoc. */ + PM_LEX_HEREDOC, + + /** + * This state is used when we are lexing a list of tokens, as in a %w + * word list literal or a %i symbol list literal. + */ + PM_LEX_LIST, + + /** + * This state is used when a regular expression has been begun and we + * are looking for the terminator. + */ + PM_LEX_REGEXP, + + /** + * This state is used when we are lexing a string or a string-like + * token, as in string content with either quote or an xstring. + */ + PM_LEX_STRING + } mode; + + /** The data associated with this type of lex mode. */ + union { + struct { + /** This keeps track of the nesting level of the list. */ + size_t nesting; + + /** Whether or not interpolation is allowed in this list. */ + bool interpolation; + + /** + * When lexing a list, it takes into account balancing the + * terminator if the terminator is one of (), [], {}, or <>. + */ + uint8_t incrementor; + + /** This is the terminator of the list literal. */ + uint8_t terminator; + + /** + * This is the character set that should be used to delimit the + * tokens within the list. + */ + uint8_t breakpoints[11]; + } list; + + struct { + /** + * This keeps track of the nesting level of the regular expression. + */ + size_t nesting; + + /** + * When lexing a regular expression, it takes into account balancing + * the terminator if the terminator is one of (), [], {}, or <>. + */ + uint8_t incrementor; + + /** This is the terminator of the regular expression. */ + uint8_t terminator; + + /** + * This is the character set that should be used to delimit the + * tokens within the regular expression. + */ + uint8_t breakpoints[7]; + } regexp; + + struct { + /** This keeps track of the nesting level of the string. */ + size_t nesting; + + /** Whether or not interpolation is allowed in this string. */ + bool interpolation; + + /** + * Whether or not at the end of the string we should allow a :, + * which would indicate this was a dynamic symbol instead of a + * string. + */ + bool label_allowed; + + /** + * When lexing a string, it takes into account balancing the + * terminator if the terminator is one of (), [], {}, or <>. + */ + uint8_t incrementor; + + /** + * This is the terminator of the string. It is typically either a + * single or double quote. + */ + uint8_t terminator; + + /** + * This is the character set that should be used to delimit the + * tokens within the string. + */ + uint8_t breakpoints[7]; + } string; + + struct { + /** + * All of the data necessary to lex a heredoc. + */ + pm_heredoc_lex_mode_t base; + + /** + * This is the pointer to the character where lexing should resume + * once the heredoc has been completely processed. + */ + const uint8_t *next_start; + + /** + * This is used to track the amount of common whitespace on each + * line so that we know how much to dedent each line in the case of + * a tilde heredoc. + */ + size_t *common_whitespace; + + /** True if the previous token ended with a line continuation. */ + bool line_continuation; + } heredoc; + } as; + + /** The previous lex state so that it knows how to pop. */ + struct pm_lex_mode *prev; +} pm_lex_mode_t; + +/** + * We pre-allocate a certain number of lex states in order to avoid having to + * call malloc too many times while parsing. You really shouldn't need more than + * this because you only really nest deeply when doing string interpolation. + */ +#define PM_LEX_STACK_SIZE 4 + +/** + * While parsing, we keep track of a stack of contexts. This is helpful for + * error recovery so that we can pop back to a previous context when we hit a + * token that is understood by a parent context but not by the current context. + */ +typedef enum { + /** a null context, used for returning a value from a function */ + PM_CONTEXT_NONE = 0, + + /** a begin statement */ + PM_CONTEXT_BEGIN, + + /** an ensure statement with an explicit begin */ + PM_CONTEXT_BEGIN_ENSURE, + + /** a rescue else statement with an explicit begin */ + PM_CONTEXT_BEGIN_ELSE, + + /** a rescue statement with an explicit begin */ + PM_CONTEXT_BEGIN_RESCUE, + + /** expressions in block arguments using braces */ + PM_CONTEXT_BLOCK_BRACES, + + /** expressions in block arguments using do..end */ + PM_CONTEXT_BLOCK_KEYWORDS, + + /** an ensure statement within a do..end block */ + PM_CONTEXT_BLOCK_ENSURE, + + /** a rescue else statement within a do..end block */ + PM_CONTEXT_BLOCK_ELSE, + + /** expressions in block parameters `foo do |...| end ` */ + PM_CONTEXT_BLOCK_PARAMETERS, + + /** a rescue statement within a do..end block */ + PM_CONTEXT_BLOCK_RESCUE, + + /** a case when statements */ + PM_CONTEXT_CASE_WHEN, + + /** a case in statements */ + PM_CONTEXT_CASE_IN, + + /** a class declaration */ + PM_CONTEXT_CLASS, + + /** an ensure statement within a class statement */ + PM_CONTEXT_CLASS_ENSURE, + + /** a rescue else statement within a class statement */ + PM_CONTEXT_CLASS_ELSE, + + /** a rescue statement within a class statement */ + PM_CONTEXT_CLASS_RESCUE, + + /** a method definition */ + PM_CONTEXT_DEF, + + /** an ensure statement within a method definition */ + PM_CONTEXT_DEF_ENSURE, + + /** a rescue else statement within a method definition */ + PM_CONTEXT_DEF_ELSE, + + /** a rescue statement within a method definition */ + PM_CONTEXT_DEF_RESCUE, + + /** a method definition's parameters */ + PM_CONTEXT_DEF_PARAMS, + + /** a defined? expression */ + PM_CONTEXT_DEFINED, + + /** a method definition's default parameter */ + PM_CONTEXT_DEFAULT_PARAMS, + + /** an else clause */ + PM_CONTEXT_ELSE, + + /** an elsif clause */ + PM_CONTEXT_ELSIF, + + /** an interpolated expression */ + PM_CONTEXT_EMBEXPR, + + /** a for loop */ + PM_CONTEXT_FOR, + + /** a for loop's index */ + PM_CONTEXT_FOR_INDEX, + + /** an if statement */ + PM_CONTEXT_IF, + + /** a lambda expression with braces */ + PM_CONTEXT_LAMBDA_BRACES, + + /** a lambda expression with do..end */ + PM_CONTEXT_LAMBDA_DO_END, + + /** an ensure statement within a lambda expression */ + PM_CONTEXT_LAMBDA_ENSURE, + + /** a rescue else statement within a lambda expression */ + PM_CONTEXT_LAMBDA_ELSE, + + /** a rescue statement within a lambda expression */ + PM_CONTEXT_LAMBDA_RESCUE, + + /** the predicate clause of a loop statement */ + PM_CONTEXT_LOOP_PREDICATE, + + /** the top level context */ + PM_CONTEXT_MAIN, + + /** a module declaration */ + PM_CONTEXT_MODULE, + + /** an ensure statement within a module statement */ + PM_CONTEXT_MODULE_ENSURE, + + /** a rescue else statement within a module statement */ + PM_CONTEXT_MODULE_ELSE, + + /** a rescue statement within a module statement */ + PM_CONTEXT_MODULE_RESCUE, + + /** a multiple target expression */ + PM_CONTEXT_MULTI_TARGET, + + /** a parenthesized expression */ + PM_CONTEXT_PARENS, + + /** an END block */ + PM_CONTEXT_POSTEXE, + + /** a predicate inside an if/elsif/unless statement */ + PM_CONTEXT_PREDICATE, + + /** a BEGIN block */ + PM_CONTEXT_PREEXE, + + /** a modifier rescue clause */ + PM_CONTEXT_RESCUE_MODIFIER, + + /** a singleton class definition */ + PM_CONTEXT_SCLASS, + + /** an ensure statement with a singleton class */ + PM_CONTEXT_SCLASS_ENSURE, + + /** a rescue else statement with a singleton class */ + PM_CONTEXT_SCLASS_ELSE, + + /** a rescue statement with a singleton class */ + PM_CONTEXT_SCLASS_RESCUE, + + /** a ternary expression */ + PM_CONTEXT_TERNARY, + + /** an unless statement */ + PM_CONTEXT_UNLESS, + + /** an until statement */ + PM_CONTEXT_UNTIL, + + /** a while statement */ + PM_CONTEXT_WHILE, +} pm_context_t; + +/** This is a node in a linked list of contexts. */ +typedef struct pm_context_node { + /** The context that this node represents. */ + pm_context_t context; + + /** A pointer to the previous context in the linked list. */ + struct pm_context_node *prev; +} pm_context_node_t; + +/** The type of shareable constant value that can be set. */ +typedef uint8_t pm_shareable_constant_value_t; +static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_NONE = 0x0; +static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_LITERAL = PM_SHAREABLE_CONSTANT_NODE_FLAGS_LITERAL; +static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_EVERYTHING = PM_SHAREABLE_CONSTANT_NODE_FLAGS_EXPERIMENTAL_EVERYTHING; +static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_COPY = PM_SHAREABLE_CONSTANT_NODE_FLAGS_EXPERIMENTAL_COPY; + +/** + * This tracks an individual local variable in a certain lexical context, as + * well as the number of times is it read. + */ +typedef struct { + /** The name of the local variable. */ + pm_constant_id_t name; + + /** The location of the local variable in the source. */ + pm_location_t location; + + /** The index of the local variable in the local table. */ + uint32_t index; + + /** The number of times the local variable is read. */ + uint32_t reads; + + /** The hash of the local variable. */ + uint32_t hash; +} pm_local_t; + +/** + * This is a set of local variables in a certain lexical context (method, class, + * module, etc.). We need to track how many times these variables are read in + * order to warn if they only get written. + */ +typedef struct pm_locals { + /** The number of local variables in the set. */ + uint32_t size; + + /** The capacity of the local variables set. */ + uint32_t capacity; + + /** + * A bloom filter over constant IDs stored in this set. Used to quickly + * reject lookups for names that are definitely not present, avoiding the + * cost of a linear scan or hash probe. + */ + uint32_t bloom; + + /** The nullable allocated memory for the local variables in the set. */ + pm_local_t *locals; +} pm_locals_t; + +/** The flags about scope parameters that can be set. */ +typedef uint8_t pm_scope_parameters_t; +static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_NONE = 0x0; +static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS = 0x1; +static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS = 0x2; +static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_FORWARDING_BLOCK = 0x4; +static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_FORWARDING_ALL = 0x8; +static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED = 0x10; +static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_NUMBERED_INNER = 0x20; +static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_NUMBERED_FOUND = 0x40; + +/** + * This struct represents a node in a linked list of scopes. Some scopes can see + * into their parent scopes, while others cannot. + */ +typedef struct pm_scope { + /** A pointer to the previous scope in the linked list. */ + struct pm_scope *previous; + + /** The IDs of the locals in the given scope. */ + pm_locals_t locals; + + /** + * This is a list of the implicit parameters contained within the block. + * These will be processed after the block is parsed to determine the kind + * of parameters node that should be used and to check if any errors need to + * be added. + */ + pm_node_list_t implicit_parameters; + + /** + * This is a bitfield that indicates the parameters that are being used in + * this scope. It is a combination of the PM_SCOPE_PARAMETERS_* constants. + * There are three different kinds of parameters that can be used in a + * scope: + * + * - Ordinary parameters (e.g., def foo(bar); end) + * - Numbered parameters (e.g., def foo; _1; end) + * - The it parameter (e.g., def foo; it; end) + * + * If ordinary parameters are being used, then certain parameters can be + * forwarded to another method/structure. Those are indicated by four + * additional bits in the params field. For example, some combinations of: + * + * - def foo(*); end + * - def foo(**); end + * - def foo(&); end + * - def foo(...); end + */ + pm_scope_parameters_t parameters; + + /** + * The current state of constant shareability for this scope. This is + * changed by magic shareable_constant_value comments. + */ + pm_shareable_constant_value_t shareable_constant; + + /** + * A boolean indicating whether or not this scope can see into its parent. + * If closed is true, then the scope cannot see into its parent. + */ + bool closed; +} pm_scope_t; + +/** + * A struct that represents a stack of boolean values. + */ +typedef uint32_t pm_state_stack_t; + +/** + * This struct represents the overall parser. It contains a reference to the + * source file, as well as pointers that indicate where in the source it's + * currently parsing. It also contains the most recent and current token that + * it's considering. + */ +struct pm_parser_t { + /** The arena used for all AST-lifetime allocations. Caller-owned. */ + pm_arena_t *arena; + + /** The arena used for parser metadata (comments, diagnostics, etc.). */ + pm_arena_t metadata_arena; + + /** + * The next node identifier that will be assigned. This is a unique + * identifier used to track nodes such that the syntax tree can be dropped + * but the node can be found through another parse. + */ + uint32_t node_id; + + /** The current state of the lexer. */ + pm_lex_state_t lex_state; + + /** Tracks the current nesting of (), [], and {}. */ + int enclosure_nesting; + + /** + * Used to temporarily track the nesting of enclosures to determine if a { + * is the beginning of a lambda following the parameters of a lambda. + */ + int lambda_enclosure_nesting; + + /** + * Used to track the nesting of braces to ensure we get the correct value + * when we are interpolating blocks with braces. + */ + int brace_nesting; + + /** + * The stack used to determine if a do keyword belongs to the predicate of a + * while, until, or for loop. + */ + pm_state_stack_t do_loop_stack; + + /** + * The stack used to determine if a do keyword belongs to the beginning of a + * block. + */ + pm_state_stack_t accepts_block_stack; + + /** A stack of lex modes. */ + struct { + /** The current mode of the lexer. */ + pm_lex_mode_t *current; + + /** The stack of lexer modes. */ + pm_lex_mode_t stack[PM_LEX_STACK_SIZE]; + + /** The current index into the lexer mode stack. */ + size_t index; + } lex_modes; + + /** The pointer to the start of the source. */ + const uint8_t *start; + + /** The pointer to the end of the source. */ + const uint8_t *end; + + /** The previous token we were considering. */ + pm_token_t previous; + + /** The current token we're considering. */ + pm_token_t current; + + /** + * This is a special field set on the parser when we need the parser to jump + * to a specific location when lexing the next token, as opposed to just + * using the end of the previous token. Normally this is NULL. + */ + const uint8_t *next_start; + + /** + * This field indicates the end of a heredoc whose identifier was found on + * the current line. If another heredoc is found on the same line, then this + * will be moved forward to the end of that heredoc. If no heredocs are + * found on a line then this is NULL. + */ + const uint8_t *heredoc_end; + + /** The list of comments that have been found while parsing. */ + pm_list_t comment_list; + + /** The list of magic comments that have been found while parsing. */ + pm_list_t magic_comment_list; + + /** + * An optional location that represents the location of the __END__ marker + * and the rest of the content of the file. This content is loaded into the + * DATA constant when the file being parsed is the main file being executed. + */ + pm_location_t data_loc; + + /** The list of warnings that have been found while parsing. */ + pm_list_t warning_list; + + /** The list of errors that have been found while parsing. */ + pm_list_t error_list; + + /** The current local scope. */ + pm_scope_t *current_scope; + + /** The current parsing context. */ + pm_context_node_t *current_context; + + /** + * The hash keys for the hash that is currently being parsed. This is not + * usually necessary because it can pass it down the various call chains, + * but in the event that you're parsing a hash that is being directly + * pushed into another hash with **, we need to share the hash keys so that + * we can warn for the nested hash as well. + */ + pm_static_literals_t *current_hash_keys; + + /** + * The encoding functions for the current file is attached to the parser as + * it's parsing so that it can change with a magic comment. + */ + const pm_encoding_t *encoding; + + /** + * When the encoding that is being used to parse the source is changed by + * prism, we provide the ability here to call out to a user-defined + * function. + */ + pm_encoding_changed_callback_t encoding_changed_callback; + + /** + * This pointer indicates where a comment must start if it is to be + * considered an encoding comment. + */ + const uint8_t *encoding_comment_start; + + /** + * When you are lexing through a file, the lexer needs all of the information + * that the parser additionally provides (for example, the local table). So if + * you want to properly lex Ruby, you need to actually lex it in the context of + * the parser. In order to provide this functionality, we optionally allow a + * struct to be attached to the parser that calls back out to a user-provided + * callback when each token is lexed. + */ + struct { + /** + * This is the callback that is called when a token is lexed. It is + * passed the opaque data pointer, the parser, and the token that was + * lexed. + */ + pm_lex_callback_t callback; + + /** + * This opaque pointer is used to provide whatever information the user + * deemed necessary to the callback. In our case we use it to pass the + * array that the tokens get appended into. + */ + void *data; + } lex_callback; + + /** + * This is the path of the file being parsed. We use the filepath when + * constructing SourceFileNodes. + */ + pm_string_t filepath; + + /** + * This constant pool keeps all of the constants defined throughout the file + * so that we can reference them later. + */ + pm_constant_pool_t constant_pool; + + /** This is the list of line offsets in the source file. */ + pm_line_offset_list_t line_offsets; + + /** + * State communicated from the lexer to the parser for integer tokens. + */ + struct { + /** + * A flag indicating the base of the integer (binary, octal, decimal, + * hexadecimal). Set during lexing and read during node creation. + */ + pm_node_flags_t base; + + /** + * When lexing a decimal integer that fits in a uint32_t, we compute + * the value during lexing to avoid re-scanning the digits during + * parsing. If lexed is true, this holds the result and + * pm_integer_parse can be skipped. + */ + uint32_t value; + + /** Whether value holds a valid pre-computed integer. */ + bool lexed; + } integer; + + /** + * This string is used to pass information from the lexer to the parser. It + * is particularly necessary because of escape sequences. + */ + pm_string_t current_string; + + /** + * The line number at the start of the parse. This will be used to offset + * the line numbers of all of the locations. + */ + int32_t start_line; + + /** + * When a string-like expression is being lexed, any byte or escape sequence + * that resolves to a value whose top bit is set (i.e., >= 0x80) will + * explicitly set the encoding to the same encoding as the source. + * Alternatively, if a unicode escape sequence is used (e.g., \\u{80}) that + * resolves to a value whose top bit is set, then the encoding will be + * explicitly set to UTF-8. + * + * The _next_ time this happens, if the encoding that is about to become the + * explicitly set encoding does not match the previously set explicit + * encoding, a mixed encoding error will be emitted. + * + * When the expression is finished being lexed, the explicit encoding + * controls the encoding of the expression. For the most part this means + * that the expression will either be encoded in the source encoding or + * UTF-8. This holds for all encodings except US-ASCII. If the source is + * US-ASCII and an explicit encoding was set that was _not_ UTF-8, then the + * expression will be encoded as ASCII-8BIT. + * + * Note that if the expression is a list, different elements within the same + * list can have different encodings, so this will get reset between each + * element. Furthermore all of this only applies to lists that support + * interpolation, because otherwise escapes that could change the encoding + * are ignored. + * + * At first glance, it may make more sense for this to live on the lexer + * mode, but we need it here to communicate back to the parser for character + * literals that do not push a new lexer mode. + */ + const pm_encoding_t *explicit_encoding; + + /** + * When parsing block exits (e.g., break, next, redo), we need to validate + * that they are in correct contexts. For the most part we can do this by + * looking at our parent contexts. However, modifier while and until + * expressions can change that context to make block exits valid. In these + * cases, we need to keep track of the block exits and then validate them + * after the expression has been parsed. + * + * We use a pointer here because we don't want to keep a whole list attached + * since this will only be used in the context of begin/end expressions. + */ + pm_node_list_t *current_block_exits; + + /** The version of prism that we should use to parse. */ + pm_options_version_t version; + + /** The command line flags given from the options. */ + uint8_t command_line; + + /** + * Whether or not we have found a frozen_string_literal magic comment with + * a true or false value. + * May be: + * - PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED + * - PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED + * - PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET + */ + int8_t frozen_string_literal; + + /** + * Whether or not we are parsing an eval string. This impacts whether or not + * we should evaluate if block exits/yields are valid. + */ + bool parsing_eval; + + /** + * Whether or not we are parsing a "partial" script, which is a script that + * will be evaluated in the context of another script, so we should not + * check jumps (next/break/etc.) for validity. + */ + bool partial_script; + + /** Whether or not we're at the beginning of a command. */ + bool command_start; + + /** + * Whether or not we're currently parsing the body of an endless method + * definition. In this context, PM_TOKEN_KEYWORD_DO_BLOCK should not be + * consumed by commands (it should bubble up to the outer context). + */ + bool in_endless_def_body; + + /** Whether or not we're currently recovering from a syntax error. */ + bool recovering; + + /** + * Whether or not the source being parsed could become valid if more input + * were appended. This is set to false when the parser encounters a token + * that is definitively wrong (e.g., a stray `end` or `]`) as opposed to + * merely incomplete. + */ + bool continuable; + + /** + * This is very specialized behavior for when you want to parse in a context + * that does not respect encoding comments. Its main use case is translating + * into the whitequark/parser AST which re-encodes source files in UTF-8 + * before they are parsed and ignores encoding comments. + */ + bool encoding_locked; + + /** + * Whether or not the encoding has been changed by a magic comment. We use + * this to provide a fast path for the lexer instead of going through the + * function pointer. + */ + bool encoding_changed; + + /** + * This flag indicates that we are currently parsing a pattern matching + * expression and impacts that calculation of newlines. + */ + bool pattern_matching_newlines; + + /** This flag indicates that we are currently parsing a keyword argument. */ + bool in_keyword_arg; + + /** + * Whether or not the parser has seen a token that has semantic meaning + * (i.e., a token that is not a comment or whitespace). + */ + bool semantic_token_seen; + + /** + * By default, Ruby always warns about mismatched indentation. This can be + * toggled with a magic comment. + */ + bool warn_mismatched_indentation; + +#if defined(PRISM_HAS_NEON) || defined(PRISM_HAS_SSSE3) || defined(PRISM_HAS_SWAR) + /** + * Cached lookup tables for pm_strpbrk's SIMD fast path. Avoids rebuilding + * the nibble-based tables on every call when the charset hasn't changed + * (which is the common case during string/regex/list lexing). + */ + struct { + /** The cached charset (null-terminated, max 11 chars + NUL). */ + uint8_t charset[12]; + + /** Nibble-based low lookup table for SIMD matching. */ + uint8_t low_lut[16]; + + /** Nibble-based high lookup table for SIMD matching. */ + uint8_t high_lut[16]; + + /** Scalar fallback table (4 x 64-bit bitmasks covering all ASCII). */ + uint64_t table[4]; + } strpbrk_cache; +#endif +}; + +#endif diff --git a/include/prism/parser.h b/include/prism/parser.h index 53c9472171..047999142c 100644 --- a/include/prism/parser.h +++ b/include/prism/parser.h @@ -6,455 +6,14 @@ #ifndef PRISM_PARSER_H #define PRISM_PARSER_H -#include "prism/compiler/accel.h" - -#include "prism/arena.h" #include "prism/ast.h" -#include "prism/encoding.h" #include "prism/line_offset_list.h" #include "prism/list.h" -#include "prism/options.h" -#include "prism/static_literals.h" - -#include -#include -#include - -/** - * This enum provides various bits that represent different kinds of states that - * the lexer can track. This is used to determine which kind of token to return - * based on the context of the parser. - */ -typedef enum { - PM_LEX_STATE_BIT_BEG, - PM_LEX_STATE_BIT_END, - PM_LEX_STATE_BIT_ENDARG, - PM_LEX_STATE_BIT_ENDFN, - PM_LEX_STATE_BIT_ARG, - PM_LEX_STATE_BIT_CMDARG, - PM_LEX_STATE_BIT_MID, - PM_LEX_STATE_BIT_FNAME, - PM_LEX_STATE_BIT_DOT, - PM_LEX_STATE_BIT_CLASS, - PM_LEX_STATE_BIT_LABEL, - PM_LEX_STATE_BIT_LABELED, - PM_LEX_STATE_BIT_FITEM -} pm_lex_state_bit_t; - -/** - * This enum combines the various bits from the above enum into individual - * values that represent the various states of the lexer. - */ -typedef enum { - PM_LEX_STATE_NONE = 0, - PM_LEX_STATE_BEG = (1 << PM_LEX_STATE_BIT_BEG), - PM_LEX_STATE_END = (1 << PM_LEX_STATE_BIT_END), - PM_LEX_STATE_ENDARG = (1 << PM_LEX_STATE_BIT_ENDARG), - PM_LEX_STATE_ENDFN = (1 << PM_LEX_STATE_BIT_ENDFN), - PM_LEX_STATE_ARG = (1 << PM_LEX_STATE_BIT_ARG), - PM_LEX_STATE_CMDARG = (1 << PM_LEX_STATE_BIT_CMDARG), - PM_LEX_STATE_MID = (1 << PM_LEX_STATE_BIT_MID), - PM_LEX_STATE_FNAME = (1 << PM_LEX_STATE_BIT_FNAME), - PM_LEX_STATE_DOT = (1 << PM_LEX_STATE_BIT_DOT), - PM_LEX_STATE_CLASS = (1 << PM_LEX_STATE_BIT_CLASS), - PM_LEX_STATE_LABEL = (1 << PM_LEX_STATE_BIT_LABEL), - PM_LEX_STATE_LABELED = (1 << PM_LEX_STATE_BIT_LABELED), - PM_LEX_STATE_FITEM = (1 << PM_LEX_STATE_BIT_FITEM), - PM_LEX_STATE_BEG_ANY = PM_LEX_STATE_BEG | PM_LEX_STATE_MID | PM_LEX_STATE_CLASS, - PM_LEX_STATE_ARG_ANY = PM_LEX_STATE_ARG | PM_LEX_STATE_CMDARG, - PM_LEX_STATE_END_ANY = PM_LEX_STATE_END | PM_LEX_STATE_ENDARG | PM_LEX_STATE_ENDFN -} pm_lex_state_t; - -/** - * The type of quote that a heredoc uses. - */ -typedef enum { - PM_HEREDOC_QUOTE_NONE, - PM_HEREDOC_QUOTE_SINGLE = '\'', - PM_HEREDOC_QUOTE_DOUBLE = '"', - PM_HEREDOC_QUOTE_BACKTICK = '`', -} pm_heredoc_quote_t; - -/** - * The type of indentation that a heredoc uses. - */ -typedef enum { - PM_HEREDOC_INDENT_NONE, - PM_HEREDOC_INDENT_DASH, - PM_HEREDOC_INDENT_TILDE, -} pm_heredoc_indent_t; - -/** - * All of the information necessary to store to lexing a heredoc. - */ -typedef struct { - /** A pointer to the start of the heredoc identifier. */ - const uint8_t *ident_start; - - /** The length of the heredoc identifier. */ - size_t ident_length; - - /** The type of quote that the heredoc uses. */ - pm_heredoc_quote_t quote; - - /** The type of indentation that the heredoc uses. */ - pm_heredoc_indent_t indent; -} pm_heredoc_lex_mode_t; - -/** - * The size of the breakpoints and strpbrk cache charset buffers. All - * breakpoint arrays and the strpbrk cache charset must share this size so - * that memcmp can safely compare the full buffer without overreading. - */ -#define PM_STRPBRK_CACHE_SIZE 16 - -/** - * When lexing Ruby source, the lexer has a small amount of state to tell which - * kind of token it is currently lexing. For example, when we find the start of - * a string, the first token that we return is a TOKEN_STRING_BEGIN token. After - * that the lexer is now in the PM_LEX_STRING mode, and will return tokens that - * are found as part of a string. - */ -typedef struct pm_lex_mode { - /** The type of this lex mode. */ - enum { - /** This state is used when any given token is being lexed. */ - PM_LEX_DEFAULT, - - /** - * This state is used when we're lexing as normal but inside an embedded - * expression of a string. - */ - PM_LEX_EMBEXPR, - - /** - * This state is used when we're lexing a variable that is embedded - * directly inside of a string with the # shorthand. - */ - PM_LEX_EMBVAR, - - /** This state is used when you are inside the content of a heredoc. */ - PM_LEX_HEREDOC, - - /** - * This state is used when we are lexing a list of tokens, as in a %w - * word list literal or a %i symbol list literal. - */ - PM_LEX_LIST, - - /** - * This state is used when a regular expression has been begun and we - * are looking for the terminator. - */ - PM_LEX_REGEXP, - - /** - * This state is used when we are lexing a string or a string-like - * token, as in string content with either quote or an xstring. - */ - PM_LEX_STRING - } mode; - - /** The data associated with this type of lex mode. */ - union { - struct { - /** This keeps track of the nesting level of the list. */ - size_t nesting; - - /** Whether or not interpolation is allowed in this list. */ - bool interpolation; - - /** - * When lexing a list, it takes into account balancing the - * terminator if the terminator is one of (), [], {}, or <>. - */ - uint8_t incrementor; - - /** This is the terminator of the list literal. */ - uint8_t terminator; - - /** - * This is the character set that should be used to delimit the - * tokens within the list. - */ - uint8_t breakpoints[PM_STRPBRK_CACHE_SIZE]; - } list; - - struct { - /** - * This keeps track of the nesting level of the regular expression. - */ - size_t nesting; - - /** - * When lexing a regular expression, it takes into account balancing - * the terminator if the terminator is one of (), [], {}, or <>. - */ - uint8_t incrementor; - - /** This is the terminator of the regular expression. */ - uint8_t terminator; - - /** - * This is the character set that should be used to delimit the - * tokens within the regular expression. - */ - uint8_t breakpoints[PM_STRPBRK_CACHE_SIZE]; - } regexp; - - struct { - /** This keeps track of the nesting level of the string. */ - size_t nesting; - - /** Whether or not interpolation is allowed in this string. */ - bool interpolation; - - /** - * Whether or not at the end of the string we should allow a :, - * which would indicate this was a dynamic symbol instead of a - * string. - */ - bool label_allowed; - - /** - * When lexing a string, it takes into account balancing the - * terminator if the terminator is one of (), [], {}, or <>. - */ - uint8_t incrementor; - - /** - * This is the terminator of the string. It is typically either a - * single or double quote. - */ - uint8_t terminator; - - /** - * This is the character set that should be used to delimit the - * tokens within the string. - */ - uint8_t breakpoints[PM_STRPBRK_CACHE_SIZE]; - } string; - - struct { - /** - * All of the data necessary to lex a heredoc. - */ - pm_heredoc_lex_mode_t base; - - /** - * This is the pointer to the character where lexing should resume - * once the heredoc has been completely processed. - */ - const uint8_t *next_start; - - /** - * This is used to track the amount of common whitespace on each - * line so that we know how much to dedent each line in the case of - * a tilde heredoc. - */ - size_t *common_whitespace; - - /** True if the previous token ended with a line continuation. */ - bool line_continuation; - } heredoc; - } as; - - /** The previous lex state so that it knows how to pop. */ - struct pm_lex_mode *prev; -} pm_lex_mode_t; - -/** - * We pre-allocate a certain number of lex states in order to avoid having to - * call malloc too many times while parsing. You really shouldn't need more than - * this because you only really nest deeply when doing string interpolation. - */ -#define PM_LEX_STACK_SIZE 4 /** * The parser used to parse Ruby source. */ -typedef struct pm_parser pm_parser_t; - -/** - * While parsing, we keep track of a stack of contexts. This is helpful for - * error recovery so that we can pop back to a previous context when we hit a - * token that is understood by a parent context but not by the current context. - */ -typedef enum { - /** a null context, used for returning a value from a function */ - PM_CONTEXT_NONE = 0, - - /** a begin statement */ - PM_CONTEXT_BEGIN, - - /** an ensure statement with an explicit begin */ - PM_CONTEXT_BEGIN_ENSURE, - - /** a rescue else statement with an explicit begin */ - PM_CONTEXT_BEGIN_ELSE, - - /** a rescue statement with an explicit begin */ - PM_CONTEXT_BEGIN_RESCUE, - - /** expressions in block arguments using braces */ - PM_CONTEXT_BLOCK_BRACES, - - /** expressions in block arguments using do..end */ - PM_CONTEXT_BLOCK_KEYWORDS, - - /** an ensure statement within a do..end block */ - PM_CONTEXT_BLOCK_ENSURE, - - /** a rescue else statement within a do..end block */ - PM_CONTEXT_BLOCK_ELSE, - - /** expressions in block parameters `foo do |...| end ` */ - PM_CONTEXT_BLOCK_PARAMETERS, - - /** a rescue statement within a do..end block */ - PM_CONTEXT_BLOCK_RESCUE, - - /** a case when statements */ - PM_CONTEXT_CASE_WHEN, - - /** a case in statements */ - PM_CONTEXT_CASE_IN, - - /** a class declaration */ - PM_CONTEXT_CLASS, - - /** an ensure statement within a class statement */ - PM_CONTEXT_CLASS_ENSURE, - - /** a rescue else statement within a class statement */ - PM_CONTEXT_CLASS_ELSE, - - /** a rescue statement within a class statement */ - PM_CONTEXT_CLASS_RESCUE, - - /** a method definition */ - PM_CONTEXT_DEF, - - /** an ensure statement within a method definition */ - PM_CONTEXT_DEF_ENSURE, - - /** a rescue else statement within a method definition */ - PM_CONTEXT_DEF_ELSE, - - /** a rescue statement within a method definition */ - PM_CONTEXT_DEF_RESCUE, - - /** a method definition's parameters */ - PM_CONTEXT_DEF_PARAMS, - - /** a defined? expression */ - PM_CONTEXT_DEFINED, - - /** a method definition's default parameter */ - PM_CONTEXT_DEFAULT_PARAMS, - - /** an else clause */ - PM_CONTEXT_ELSE, - - /** an elsif clause */ - PM_CONTEXT_ELSIF, - - /** an interpolated expression */ - PM_CONTEXT_EMBEXPR, - - /** a for loop */ - PM_CONTEXT_FOR, - - /** a for loop's index */ - PM_CONTEXT_FOR_INDEX, - - /** an if statement */ - PM_CONTEXT_IF, - - /** a lambda expression with braces */ - PM_CONTEXT_LAMBDA_BRACES, - - /** a lambda expression with do..end */ - PM_CONTEXT_LAMBDA_DO_END, - - /** an ensure statement within a lambda expression */ - PM_CONTEXT_LAMBDA_ENSURE, - - /** a rescue else statement within a lambda expression */ - PM_CONTEXT_LAMBDA_ELSE, - - /** a rescue statement within a lambda expression */ - PM_CONTEXT_LAMBDA_RESCUE, - - /** the predicate clause of a loop statement */ - PM_CONTEXT_LOOP_PREDICATE, - - /** the top level context */ - PM_CONTEXT_MAIN, - - /** a module declaration */ - PM_CONTEXT_MODULE, - - /** an ensure statement within a module statement */ - PM_CONTEXT_MODULE_ENSURE, - - /** a rescue else statement within a module statement */ - PM_CONTEXT_MODULE_ELSE, - - /** a rescue statement within a module statement */ - PM_CONTEXT_MODULE_RESCUE, - - /** a multiple target expression */ - PM_CONTEXT_MULTI_TARGET, - - /** a parenthesized expression */ - PM_CONTEXT_PARENS, - - /** an END block */ - PM_CONTEXT_POSTEXE, - - /** a predicate inside an if/elsif/unless statement */ - PM_CONTEXT_PREDICATE, - - /** a BEGIN block */ - PM_CONTEXT_PREEXE, - - /** a modifier rescue clause */ - PM_CONTEXT_RESCUE_MODIFIER, - - /** a singleton class definition */ - PM_CONTEXT_SCLASS, - - /** an ensure statement with a singleton class */ - PM_CONTEXT_SCLASS_ENSURE, - - /** a rescue else statement with a singleton class */ - PM_CONTEXT_SCLASS_ELSE, - - /** a rescue statement with a singleton class */ - PM_CONTEXT_SCLASS_RESCUE, - - /** a ternary expression */ - PM_CONTEXT_TERNARY, - - /** an unless statement */ - PM_CONTEXT_UNLESS, - - /** an until statement */ - PM_CONTEXT_UNTIL, - - /** a while statement */ - PM_CONTEXT_WHILE, -} pm_context_t; - -/** This is a node in a linked list of contexts. */ -typedef struct pm_context_node { - /** The context that this node represents. */ - pm_context_t context; - - /** A pointer to the previous context in the linked list. */ - struct pm_context_node *prev; -} pm_context_node_t; +typedef struct pm_parser_t pm_parser_t; /** This is the type of a comment that we've found while parsing. */ typedef enum { @@ -502,503 +61,149 @@ typedef struct { typedef void (*pm_encoding_changed_callback_t)(pm_parser_t *parser); /** - * When you are lexing through a file, the lexer needs all of the information - * that the parser additionally provides (for example, the local table). So if - * you want to properly lex Ruby, you need to actually lex it in the context of - * the parser. In order to provide this functionality, we optionally allow a - * struct to be attached to the parser that calls back out to a user-provided - * callback when each token is lexed. + * This is the callback that is called when a token is lexed. It is passed + * the opaque data pointer, the parser, and the token that was lexed. */ -typedef struct { - /** - * This opaque pointer is used to provide whatever information the user - * deemed necessary to the callback. In our case we use it to pass the array - * that the tokens get appended into. - */ - void *data; - - /** - * This is the callback that is called when a token is lexed. It is passed - * the opaque data pointer, the parser, and the token that was lexed. - */ - void (*callback)(void *data, pm_parser_t *parser, pm_token_t *token); -} pm_lex_callback_t; - -/** The type of shareable constant value that can be set. */ -typedef uint8_t pm_shareable_constant_value_t; -static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_NONE = 0x0; -static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_LITERAL = PM_SHAREABLE_CONSTANT_NODE_FLAGS_LITERAL; -static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_EVERYTHING = PM_SHAREABLE_CONSTANT_NODE_FLAGS_EXPERIMENTAL_EVERYTHING; -static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_COPY = PM_SHAREABLE_CONSTANT_NODE_FLAGS_EXPERIMENTAL_COPY; +typedef void (*pm_lex_callback_t)(pm_parser_t *parser, pm_token_t *token, void *data); /** - * This tracks an individual local variable in a certain lexical context, as - * well as the number of times is it read. + * Register a callback that will be called whenever prism changes the encoding + * it is using to parse based on the magic comment. + * + * @param parser The parser to register the callback with. + * @param callback The callback to register. + * + * \public \memberof pm_parser */ -typedef struct { - /** The name of the local variable. */ - pm_constant_id_t name; - - /** The location of the local variable in the source. */ - pm_location_t location; - - /** The index of the local variable in the local table. */ - uint32_t index; - - /** The number of times the local variable is read. */ - uint32_t reads; - - /** The hash of the local variable. */ - uint32_t hash; -} pm_local_t; +PRISM_EXPORTED_FUNCTION void pm_parser_encoding_changed_callback_set(pm_parser_t *parser, pm_encoding_changed_callback_t callback); /** - * This is a set of local variables in a certain lexical context (method, class, - * module, etc.). We need to track how many times these variables are read in - * order to warn if they only get written. + * Register a callback that will be called whenever a token is lexed. + * + * @param parser The parser to register the callback with. + * @param data The opaque data to pass to the callback when it is called. + * @param callback The callback to register. + * + * \public \memberof pm_parser */ -typedef struct pm_locals { - /** The number of local variables in the set. */ - uint32_t size; - - /** The capacity of the local variables set. */ - uint32_t capacity; - - /** - * A bloom filter over constant IDs stored in this set. Used to quickly - * reject lookups for names that are definitely not present, avoiding the - * cost of a linear scan or hash probe. - */ - uint32_t bloom; - - /** The nullable allocated memory for the local variables in the set. */ - pm_local_t *locals; -} pm_locals_t; - -/** The flags about scope parameters that can be set. */ -typedef uint8_t pm_scope_parameters_t; -static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_NONE = 0x0; -static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS = 0x1; -static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS = 0x2; -static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_FORWARDING_BLOCK = 0x4; -static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_FORWARDING_ALL = 0x8; -static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED = 0x10; -static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_NUMBERED_INNER = 0x20; -static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_NUMBERED_FOUND = 0x40; +PRISM_EXPORTED_FUNCTION void pm_parser_lex_callback_set(pm_parser_t *parser, pm_lex_callback_t callback, void *data); /** - * This struct represents a node in a linked list of scopes. Some scopes can see - * into their parent scopes, while others cannot. + * Returns the opaque data that is passed to the lex callback when it is called. + * + * @param parser The parser whose lex callback data we want to get. + * @return The opaque data that is passed to the lex callback when it is called. */ -typedef struct pm_scope { - /** A pointer to the previous scope in the linked list. */ - struct pm_scope *previous; - - /** The IDs of the locals in the given scope. */ - pm_locals_t locals; - - /** - * This is a list of the implicit parameters contained within the block. - * These will be processed after the block is parsed to determine the kind - * of parameters node that should be used and to check if any errors need to - * be added. - */ - pm_node_list_t implicit_parameters; - - /** - * This is a bitfield that indicates the parameters that are being used in - * this scope. It is a combination of the PM_SCOPE_PARAMETERS_* constants. - * There are three different kinds of parameters that can be used in a - * scope: - * - * - Ordinary parameters (e.g., def foo(bar); end) - * - Numbered parameters (e.g., def foo; _1; end) - * - The it parameter (e.g., def foo; it; end) - * - * If ordinary parameters are being used, then certain parameters can be - * forwarded to another method/structure. Those are indicated by four - * additional bits in the params field. For example, some combinations of: - * - * - def foo(*); end - * - def foo(**); end - * - def foo(&); end - * - def foo(...); end - */ - pm_scope_parameters_t parameters; - - /** - * The current state of constant shareability for this scope. This is - * changed by magic shareable_constant_value comments. - */ - pm_shareable_constant_value_t shareable_constant; - - /** - * A boolean indicating whether or not this scope can see into its parent. - * If closed is true, then the scope cannot see into its parent. - */ - bool closed; -} pm_scope_t; +PRISM_EXPORTED_FUNCTION void * pm_parser_lex_callback_data(pm_parser_t *parser); /** - * A struct that represents a stack of boolean values. + * Returns the raw pointer to the start of the source that is being parsed. + * + * @param parser the parser whose start pointer we want to get + * @return the raw pointer to the start of the source that is being parsed */ -typedef uint32_t pm_state_stack_t; +PRISM_EXPORTED_FUNCTION const uint8_t * pm_parser_start(const pm_parser_t *parser); /** - * This struct represents the overall parser. It contains a reference to the - * source file, as well as pointers that indicate where in the source it's - * currently parsing. It also contains the most recent and current token that - * it's considering. + * Returns the raw pointer to the end of the source that is being parsed. + * + * @param parser the parser whose end pointer we want to get + * @return the raw pointer to the end of the source that is being parsed */ -struct pm_parser { - /** The arena used for all AST-lifetime allocations. Caller-owned. */ - pm_arena_t *arena; - - /** The arena used for parser metadata (comments, diagnostics, etc.). */ - pm_arena_t metadata_arena; - - /** - * The next node identifier that will be assigned. This is a unique - * identifier used to track nodes such that the syntax tree can be dropped - * but the node can be found through another parse. - */ - uint32_t node_id; - - /** The current state of the lexer. */ - pm_lex_state_t lex_state; - - /** Tracks the current nesting of (), [], and {}. */ - int enclosure_nesting; - - /** - * Used to temporarily track the nesting of enclosures to determine if a { - * is the beginning of a lambda following the parameters of a lambda. - */ - int lambda_enclosure_nesting; - - /** - * Used to track the nesting of braces to ensure we get the correct value - * when we are interpolating blocks with braces. - */ - int brace_nesting; - - /** - * The stack used to determine if a do keyword belongs to the predicate of a - * while, until, or for loop. - */ - pm_state_stack_t do_loop_stack; - - /** - * The stack used to determine if a do keyword belongs to the beginning of a - * block. - */ - pm_state_stack_t accepts_block_stack; - - /** A stack of lex modes. */ - struct { - /** The current mode of the lexer. */ - pm_lex_mode_t *current; - - /** The stack of lexer modes. */ - pm_lex_mode_t stack[PM_LEX_STACK_SIZE]; - - /** The current index into the lexer mode stack. */ - size_t index; - } lex_modes; - - /** The pointer to the start of the source. */ - const uint8_t *start; - - /** The pointer to the end of the source. */ - const uint8_t *end; - - /** The previous token we were considering. */ - pm_token_t previous; - - /** The current token we're considering. */ - pm_token_t current; - - /** - * This is a special field set on the parser when we need the parser to jump - * to a specific location when lexing the next token, as opposed to just - * using the end of the previous token. Normally this is NULL. - */ - const uint8_t *next_start; - - /** - * This field indicates the end of a heredoc whose identifier was found on - * the current line. If another heredoc is found on the same line, then this - * will be moved forward to the end of that heredoc. If no heredocs are - * found on a line then this is NULL. - */ - const uint8_t *heredoc_end; - - /** The list of comments that have been found while parsing. */ - pm_list_t comment_list; - - /** The list of magic comments that have been found while parsing. */ - pm_list_t magic_comment_list; - - /** - * An optional location that represents the location of the __END__ marker - * and the rest of the content of the file. This content is loaded into the - * DATA constant when the file being parsed is the main file being executed. - */ - pm_location_t data_loc; - - /** The list of warnings that have been found while parsing. */ - pm_list_t warning_list; - - /** The list of errors that have been found while parsing. */ - pm_list_t error_list; - - /** The current local scope. */ - pm_scope_t *current_scope; - - /** The current parsing context. */ - pm_context_node_t *current_context; - - /** - * The hash keys for the hash that is currently being parsed. This is not - * usually necessary because it can pass it down the various call chains, - * but in the event that you're parsing a hash that is being directly - * pushed into another hash with **, we need to share the hash keys so that - * we can warn for the nested hash as well. - */ - pm_static_literals_t *current_hash_keys; - - /** - * The encoding functions for the current file is attached to the parser as - * it's parsing so that it can change with a magic comment. - */ - const pm_encoding_t *encoding; - - /** - * When the encoding that is being used to parse the source is changed by - * prism, we provide the ability here to call out to a user-defined - * function. - */ - pm_encoding_changed_callback_t encoding_changed_callback; - - /** - * This pointer indicates where a comment must start if it is to be - * considered an encoding comment. - */ - const uint8_t *encoding_comment_start; - - /** - * This is an optional callback that can be attached to the parser that will - * be called whenever a new token is lexed by the parser. - */ - pm_lex_callback_t *lex_callback; +PRISM_EXPORTED_FUNCTION const uint8_t * pm_parser_end(const pm_parser_t *parser); - /** - * This is the path of the file being parsed. We use the filepath when - * constructing SourceFileNodes. - */ - pm_string_t filepath; - - /** - * This constant pool keeps all of the constants defined throughout the file - * so that we can reference them later. - */ - pm_constant_pool_t constant_pool; - - /** This is the list of line offsets in the source file. */ - pm_line_offset_list_t line_offsets; - - /** - * State communicated from the lexer to the parser for integer tokens. - */ - struct { - /** - * A flag indicating the base of the integer (binary, octal, decimal, - * hexadecimal). Set during lexing and read during node creation. - */ - pm_node_flags_t base; - - /** - * When lexing a decimal integer that fits in a uint32_t, we compute - * the value during lexing to avoid re-scanning the digits during - * parsing. If lexed is true, this holds the result and - * pm_integer_parse can be skipped. - */ - uint32_t value; - - /** Whether value holds a valid pre-computed integer. */ - bool lexed; - } integer; - - /** - * This string is used to pass information from the lexer to the parser. It - * is particularly necessary because of escape sequences. - */ - pm_string_t current_string; - - /** - * The line number at the start of the parse. This will be used to offset - * the line numbers of all of the locations. - */ - int32_t start_line; - - /** - * When a string-like expression is being lexed, any byte or escape sequence - * that resolves to a value whose top bit is set (i.e., >= 0x80) will - * explicitly set the encoding to the same encoding as the source. - * Alternatively, if a unicode escape sequence is used (e.g., \\u{80}) that - * resolves to a value whose top bit is set, then the encoding will be - * explicitly set to UTF-8. - * - * The _next_ time this happens, if the encoding that is about to become the - * explicitly set encoding does not match the previously set explicit - * encoding, a mixed encoding error will be emitted. - * - * When the expression is finished being lexed, the explicit encoding - * controls the encoding of the expression. For the most part this means - * that the expression will either be encoded in the source encoding or - * UTF-8. This holds for all encodings except US-ASCII. If the source is - * US-ASCII and an explicit encoding was set that was _not_ UTF-8, then the - * expression will be encoded as ASCII-8BIT. - * - * Note that if the expression is a list, different elements within the same - * list can have different encodings, so this will get reset between each - * element. Furthermore all of this only applies to lists that support - * interpolation, because otherwise escapes that could change the encoding - * are ignored. - * - * At first glance, it may make more sense for this to live on the lexer - * mode, but we need it here to communicate back to the parser for character - * literals that do not push a new lexer mode. - */ - const pm_encoding_t *explicit_encoding; - - /** - * When parsing block exits (e.g., break, next, redo), we need to validate - * that they are in correct contexts. For the most part we can do this by - * looking at our parent contexts. However, modifier while and until - * expressions can change that context to make block exits valid. In these - * cases, we need to keep track of the block exits and then validate them - * after the expression has been parsed. - * - * We use a pointer here because we don't want to keep a whole list attached - * since this will only be used in the context of begin/end expressions. - */ - pm_node_list_t *current_block_exits; - - /** The version of prism that we should use to parse. */ - pm_options_version_t version; - - /** The command line flags given from the options. */ - uint8_t command_line; - - /** - * Whether or not we have found a frozen_string_literal magic comment with - * a true or false value. - * May be: - * - PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED - * - PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED - * - PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET - */ - int8_t frozen_string_literal; - - /** - * Whether or not we are parsing an eval string. This impacts whether or not - * we should evaluate if block exits/yields are valid. - */ - bool parsing_eval; - - /** - * Whether or not we are parsing a "partial" script, which is a script that - * will be evaluated in the context of another script, so we should not - * check jumps (next/break/etc.) for validity. - */ - bool partial_script; - - /** Whether or not we're at the beginning of a command. */ - bool command_start; - - /** - * Whether or not we're currently parsing the body of an endless method - * definition. In this context, PM_TOKEN_KEYWORD_DO_BLOCK should not be - * consumed by commands (it should bubble up to the outer context). - */ - bool in_endless_def_body; - - /** Whether or not we're currently recovering from a syntax error. */ - bool recovering; - - /** - * Whether or not the source being parsed could become valid if more input - * were appended. This is set to false when the parser encounters a token - * that is definitively wrong (e.g., a stray `end` or `]`) as opposed to - * merely incomplete. - */ - bool continuable; - - /** - * This is very specialized behavior for when you want to parse in a context - * that does not respect encoding comments. Its main use case is translating - * into the whitequark/parser AST which re-encodes source files in UTF-8 - * before they are parsed and ignores encoding comments. - */ - bool encoding_locked; +/** + * Returns the line that the parser was considered to have started on. + * + * @param parser the parser whose start line we want to get + * @return the line that the parser was considered to have started on + */ +PRISM_EXPORTED_FUNCTION int32_t pm_parser_start_line(const pm_parser_t *parser); - /** - * Whether or not the encoding has been changed by a magic comment. We use - * this to provide a fast path for the lexer instead of going through the - * function pointer. - */ - bool encoding_changed; +/** + * Returns the name of the encoding that is being used to parse the source. + * + * @param parser the parser whose encoding name we want to get + * @return the name of the encoding that is being used to parse the source + */ +PRISM_EXPORTED_FUNCTION const char * pm_parser_encoding_name(const pm_parser_t *parser); - /** - * This flag indicates that we are currently parsing a pattern matching - * expression and impacts that calculation of newlines. - */ - bool pattern_matching_newlines; +/** + * Returns the errors that are associated with the given parser. + * + * @param parser the parser whose errors we want to get + * @return the errors that are associated with the given parser + */ +PRISM_EXPORTED_FUNCTION const pm_list_t * pm_parser_errors(const pm_parser_t *parser); - /** This flag indicates that we are currently parsing a keyword argument. */ - bool in_keyword_arg; +/** + * Returns the warnings that are associated with the given parser. + * + * @param parser the parser whose warnings we want to get + * @return the warnings that are associated with the given parser + */ +PRISM_EXPORTED_FUNCTION const pm_list_t * pm_parser_warnings(const pm_parser_t *parser); - /** - * Whether or not the parser has seen a token that has semantic meaning - * (i.e., a token that is not a comment or whitespace). - */ - bool semantic_token_seen; +/** + * Returns the comments that are associated with the given parser. + * + * @param parser the parser whose comments we want to get + * @return the comments that are associated with the given parser + */ +PRISM_EXPORTED_FUNCTION const pm_list_t * pm_parser_comments(const pm_parser_t *parser); - /** - * By default, Ruby always warns about mismatched indentation. This can be - * toggled with a magic comment. - */ - bool warn_mismatched_indentation; +/** + * Returns the magic comments that are associated with the given parser. + * + * @param parser the parser whose magic comments we want to get + * @return the magic comments that are associated with the given parser + */ +PRISM_EXPORTED_FUNCTION const pm_list_t * pm_parser_magic_comments(const pm_parser_t *parser); -#if defined(PRISM_HAS_NEON) || defined(PRISM_HAS_SSSE3) || defined(PRISM_HAS_SWAR) - /** - * Cached lookup tables for pm_strpbrk's SIMD fast path. Avoids rebuilding - * the nibble-based tables on every call when the charset hasn't changed - * (which is the common case during string/regex/list lexing). - */ - struct { - /** The cached charset (null-terminated, NUL-padded). */ - uint8_t charset[PM_STRPBRK_CACHE_SIZE]; +/** + * Returns the line offsets that are associated with the given parser. + * + * @param parser the parser whose line offsets we want to get + * @return the line offsets that are associated with the given parser + */ +PRISM_EXPORTED_FUNCTION const pm_line_offset_list_t * pm_parser_line_offsets(const pm_parser_t *parser); - /** Nibble-based low lookup table for SIMD matching. */ - uint8_t low_lut[16]; +/** + * Returns the constant pool associated with the given parser. + * + * @param parser the parser whose constant pool we want to get + * @return the constant pool associated with the given parser + */ +PRISM_EXPORTED_FUNCTION const pm_constant_pool_t * pm_parser_constant_pool(const pm_parser_t *parser); - /** Nibble-based high lookup table for SIMD matching. */ - uint8_t high_lut[16]; +/** + * Returns the location of the __DATA__ section that is associated with the + * given parser. + * + * @param parser the parser whose data location we want to get + * @return the location of the __DATA__ section that is associated with the + * given parser. If it is unset, then the length will be set to 0. + */ +PRISM_EXPORTED_FUNCTION const pm_location_t * pm_parser_data_loc(const pm_parser_t *parser); - /** Scalar fallback table (4 x 64-bit bitmasks covering all ASCII). */ - uint64_t table[4]; - } strpbrk_cache; -#endif -}; +/** + * Returns whether the given parser is continuable, meaning that it could become + * valid if more input were appended, as opposed to being definitively invalid. + * + * @param parser the parser whose continuable status we want to get + * @return whether the given parser is continuable + */ +PRISM_EXPORTED_FUNCTION bool pm_parser_continuable(const pm_parser_t *parser); /** - * Returns the name of the encoding that is being used to parse the source. + * Returns the lex state of the parser. Note that this is an internal detail, + * and we are purposefully not returning an instance of the internal enum that + * we use to track this. This is only exposed because we need it for some very + * niche use cases. Most consumers should avoid this function. * - * @param parser the parser whose encoding name we want to get - * @return the name of the encoding that is being used to parse the source + * @param parser the parser whose lex state we want to get + * @return the lex state of the parser */ -PRISM_EXPORTED_FUNCTION const char * pm_parser_encoding_name(const pm_parser_t *parser); +PRISM_EXPORTED_FUNCTION int pm_parser_lex_state(const pm_parser_t *parser); #endif diff --git a/src/parser.c b/src/parser.c index 356700796f..209f748ff2 100644 --- a/src/parser.c +++ b/src/parser.c @@ -1,7 +1,60 @@ -#include "prism/parser.h" +#include "prism/internal/parser.h" #include "prism/internal/encoding.h" +/** + * Register a callback that will be called whenever prism changes the encoding + * it is using to parse based on the magic comment. + */ +void +pm_parser_encoding_changed_callback_set(pm_parser_t *parser, pm_encoding_changed_callback_t callback) { + parser->encoding_changed_callback = callback; +} + +/** + * Register a callback that will be called whenever a token is lexed. + */ +void +pm_parser_lex_callback_set(pm_parser_t *parser, pm_lex_callback_t callback, void *data) { + parser->lex_callback.callback = callback; + parser->lex_callback.data = data; +} + +/** + * Returns the opaque data that is passed to the lex callback when it is called. + */ +void * +pm_parser_lex_callback_data(pm_parser_t *parser) { + return parser->lex_callback.data; +} + +/** + * Returns the raw pointer to the start of the source that is being parsed. + */ +const uint8_t * +pm_parser_start(const pm_parser_t *parser) { + return parser->start; +} + +/** + * Returns the raw pointer to the end of the source that is being parsed. + */ +const uint8_t * +pm_parser_end(const pm_parser_t *parser) { + return parser->end; +} + +/** + * Returns the line that the parser was considered to have started on. + * + * @param parser the parser whose start line we want to get + * @return the line that the parser was considered to have started on + */ +int32_t +pm_parser_start_line(const pm_parser_t *parser) { + return parser->start_line; +} + /** * Returns the name of the encoding that is being used to parse the source. */ @@ -9,3 +62,83 @@ const char * pm_parser_encoding_name(const pm_parser_t *parser) { return parser->encoding->name; } + +/** + * Returns the errors that are associated with the given parser. + */ +const pm_list_t * +pm_parser_errors(const pm_parser_t *parser) { + return &parser->error_list; +} + +/** + * Returns the warnings that are associated with the given parser. + */ +const pm_list_t * +pm_parser_warnings(const pm_parser_t *parser) { + return &parser->warning_list; +} + +/** + * Returns the comments that are associated with the given parser. + */ +const pm_list_t * +pm_parser_comments(const pm_parser_t *parser) { + return &parser->comment_list; +} + +/** + * Returns the magic comments that are associated with the given parser. + */ +const pm_list_t * +pm_parser_magic_comments(const pm_parser_t *parser) { + return &parser->magic_comment_list; +} + +/** + * Returns the line offsets that are associated with the given parser. + * + * @param parser the parser whose line offsets we want to get + * @return the line offsets that are associated with the given parser + */ +const pm_line_offset_list_t * +pm_parser_line_offsets(const pm_parser_t *parser) { + return &parser->line_offsets; +} + +/** + * Returns the constant pool associated with the given parser. + */ +const pm_constant_pool_t * +pm_parser_constant_pool(const pm_parser_t *parser) { + return &parser->constant_pool; +} + +/** + * Returns the location of the __DATA__ section that is associated with the + * given parser, if it exists. + */ +const pm_location_t * +pm_parser_data_loc(const pm_parser_t *parser) { + return &parser->data_loc; +} + +/** + * Returns whether the given parser is continuable, meaning that it could become + * valid if more input were appended, as opposed to being definitively invalid. + */ +bool +pm_parser_continuable(const pm_parser_t *parser) { + return parser->continuable; +} + +/** + * Returns the lex state of the parser. Note that this is an internal detail, + * and we are purposefully not returning an instance of the internal enum that + * we use to track this. This is only exposed because we need it for some very + * niche use cases. Most consumers should avoid this function. + */ +int +pm_parser_lex_state(const pm_parser_t *parser) { + return (int) parser->lex_state; +} diff --git a/src/prism.c b/src/prism.c index c0363afd9c..5814fa4fee 100644 --- a/src/prism.c +++ b/src/prism.c @@ -19,6 +19,7 @@ #include "prism/internal/memchr.h" #include "prism/internal/node.h" #include "prism/internal/options.h" +#include "prism/internal/parser.h" #include "prism/internal/regexp.h" #include "prism/internal/static_literals.h" #include "prism/internal/strings.h" @@ -9519,8 +9520,8 @@ lex_at_variable(pm_parser_t *parser) { */ static PRISM_INLINE void parser_lex_callback(pm_parser_t *parser) { - if (parser->lex_callback) { - parser->lex_callback->callback(parser->lex_callback->data, parser, &parser->current); + if (parser->lex_callback.callback) { + parser->lex_callback.callback(parser, &parser->current, parser->lex_callback.data); } } @@ -22491,15 +22492,6 @@ pm_parser_new(pm_arena_t *arena, const uint8_t *source, size_t size, const pm_op return parser; } -/** - * Register a callback that will be called whenever prism changes the encoding - * it is using to parse based on the magic comment. - */ -void -pm_parser_register_encoding_changed_callback(pm_parser_t *parser, pm_encoding_changed_callback_t callback) { - parser->encoding_changed_callback = callback; -} - /** * Free any memory associated with the given parser. */ diff --git a/src/regexp.c b/src/regexp.c index 2ee2555686..05ef3b6b41 100644 --- a/src/regexp.c +++ b/src/regexp.c @@ -7,6 +7,7 @@ #include "prism/internal/diagnostic.h" #include "prism/internal/encoding.h" #include "prism/internal/memchr.h" +#include "prism/internal/parser.h" #include "prism/internal/strings.h" #include "prism/internal/strncasecmp.h" diff --git a/src/strpbrk.c b/src/strpbrk.c index 6db4fd31bf..41ab8eec3e 100644 --- a/src/strpbrk.c +++ b/src/strpbrk.c @@ -7,6 +7,7 @@ #include "prism/internal/bit.h" #include "prism/internal/diagnostic.h" #include "prism/internal/encoding.h" +#include "prism/internal/parser.h" #include #include diff --git a/templates/ext/prism/api_node.c.erb b/templates/ext/prism/api_node.c.erb index be6bd113ad..fffb2bebb5 100644 --- a/templates/ext/prism/api_node.c.erb +++ b/templates/ext/prism/api_node.c.erb @@ -26,7 +26,7 @@ pm_location_new(const uint32_t start, const uint32_t length, VALUE source, bool VALUE pm_token_new(const pm_parser_t *parser, const pm_token_t *token, rb_encoding *encoding, VALUE source, bool freeze) { ID type = rb_intern(pm_token_type_name(token->type)); - VALUE location = pm_location_new((uint32_t) (token->start - parser->start), (uint32_t) (token->end - token->start), source, freeze); + VALUE location = pm_location_new((uint32_t) (token->start - pm_parser_start(parser)), (uint32_t) (token->end - token->start), source, freeze); VALUE slice = rb_enc_str_new((const char *) token->start, token->end - token->start, encoding); if (freeze) rb_obj_freeze(slice); @@ -75,11 +75,14 @@ pm_integer_new(const pm_integer_t *integer) { // Create a Prism::Source object from the given parser, after pm_parse() was called. VALUE pm_source_new(const pm_parser_t *parser, rb_encoding *encoding, bool freeze) { - VALUE source_string = rb_enc_str_new((const char *) parser->start, parser->end - parser->start, encoding); + const uint8_t *start = pm_parser_start(parser); + VALUE source_string = rb_enc_str_new((const char *) start, pm_parser_end(parser) - start, encoding); - VALUE offsets = rb_ary_new_capa(parser->line_offsets.size); - for (size_t index = 0; index < parser->line_offsets.size; index++) { - rb_ary_push(offsets, ULONG2NUM(parser->line_offsets.offsets[index])); + const pm_line_offset_list_t *line_offsets = pm_parser_line_offsets(parser); + VALUE offsets = rb_ary_new_capa(line_offsets->size); + + for (size_t index = 0; index < line_offsets->size; index++) { + rb_ary_push(offsets, ULONG2NUM(line_offsets->offsets[index])); } if (freeze) { @@ -87,7 +90,7 @@ pm_source_new(const pm_parser_t *parser, rb_encoding *encoding, bool freeze) { rb_obj_freeze(offsets); } - VALUE source = rb_funcall(rb_cPrismSource, rb_intern("for"), 3, source_string, LONG2NUM(parser->start_line), offsets); + VALUE source = rb_funcall(rb_cPrismSource, rb_intern("for"), 3, source_string, LONG2NUM(pm_parser_start_line(parser)), offsets); if (freeze) rb_obj_freeze(source); return source; @@ -121,10 +124,11 @@ pm_node_stack_pop(pm_node_stack_node_t **stack) { VALUE pm_ast_new(const pm_parser_t *parser, const pm_node_t *node, rb_encoding *encoding, VALUE source, bool freeze) { - VALUE constants = rb_ary_new_capa(parser->constant_pool.size); + const pm_constant_pool_t *constant_pool = pm_parser_constant_pool(parser); + VALUE constants = rb_ary_new_capa(constant_pool->size); - for (uint32_t index = 0; index < parser->constant_pool.size; index++) { - pm_constant_t *constant = &parser->constant_pool.constants[index]; + for (uint32_t index = 0; index < constant_pool->size; index++) { + pm_constant_t *constant = &constant_pool->constants[index]; int state = 0; VALUE string = rb_enc_str_new((const char *) constant->start, constant->length, encoding); diff --git a/templates/src/node.c.erb b/templates/src/node.c.erb index c0016647a8..da249141ed 100644 --- a/templates/src/node.c.erb +++ b/templates/src/node.c.erb @@ -4,7 +4,7 @@ #include "prism/internal/buffer.h" #include "prism/internal/constant_pool.h" #include "prism/internal/integer.h" -#include "prism/parser.h" +#include "prism/internal/parser.h" #include #include diff --git a/templates/src/prettyprint.c.erb b/templates/src/prettyprint.c.erb index 4af8155c47..ffe9b1f307 100644 --- a/templates/src/prettyprint.c.erb +++ b/templates/src/prettyprint.c.erb @@ -5,6 +5,8 @@ #include "prism/internal/buffer.h" #include "prism/internal/constant_pool.h" #include "prism/internal/integer.h" +#include "prism/internal/parser.h" +#include "prism/line_offset_list.h" #include diff --git a/templates/src/serialize.c.erb b/templates/src/serialize.c.erb index b305a95d7e..cfd073b7e6 100644 --- a/templates/src/serialize.c.erb +++ b/templates/src/serialize.c.erb @@ -11,11 +11,12 @@ #include "prism/internal/encoding.h" #include "prism/internal/list.h" #include "prism/internal/options.h" +#include "prism/internal/parser.h" #include "prism.h" #include "prism/ast.h" #include "prism/diagnostic.h" -#include "prism/parser.h" +#include "prism/line_offset_list.h" #include #include @@ -312,7 +313,7 @@ pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) } static void -serialize_token(void *data, pm_parser_t *parser, pm_token_t *token) { +serialize_token(pm_parser_t *parser, pm_token_t *token, void *data) { pm_buffer_t *buffer = (pm_buffer_t *) data; pm_buffer_append_varuint(buffer, token->type); @@ -333,12 +334,7 @@ pm_serialize_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, const pm_parser_t parser; pm_parser_init(&arena, &parser, source, size, &options); - pm_lex_callback_t lex_callback = (pm_lex_callback_t) { - .data = (void *) buffer, - .callback = serialize_token, - }; - - parser.lex_callback = &lex_callback; + pm_parser_lex_callback_set(&parser, serialize_token, buffer); pm_parse(&parser); // Append 0 to mark end of tokens. @@ -364,12 +360,7 @@ pm_serialize_parse_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, pm_parser_t parser; pm_parser_init(&arena, &parser, source, size, &options); - pm_lex_callback_t lex_callback = (pm_lex_callback_t) { - .data = (void *) buffer, - .callback = serialize_token, - }; - - parser.lex_callback = &lex_callback; + pm_parser_lex_callback_set(&parser, serialize_token, buffer); pm_node_t *node = pm_parse(&parser); pm_buffer_append_byte(buffer, 0); From 7cb8b59590f4351b6e3136c670c01136eb846b09 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 18 Mar 2026 09:58:37 -0400 Subject: [PATCH 051/100] Move static literals entirely internal --- include/prism/internal/parser.h | 3 ++- include/prism/internal/static_literals.h | 5 ++--- include/prism/static_literals.h | 12 ------------ prism.gemspec | 1 - src/prism.c | 2 +- 5 files changed, 5 insertions(+), 18 deletions(-) delete mode 100644 include/prism/static_literals.h diff --git a/include/prism/internal/parser.h b/include/prism/internal/parser.h index f56dee8964..ad21044d3b 100644 --- a/include/prism/internal/parser.h +++ b/include/prism/internal/parser.h @@ -8,6 +8,8 @@ #include "prism/compiler/accel.h" +#include "prism/internal/static_literals.h" + #include "prism/arena.h" #include "prism/ast.h" #include "prism/encoding.h" @@ -15,7 +17,6 @@ #include "prism/list.h" #include "prism/options.h" #include "prism/parser.h" -#include "prism/static_literals.h" #include #include diff --git a/include/prism/internal/static_literals.h b/include/prism/internal/static_literals.h index f924dd9e6a..0512313259 100644 --- a/include/prism/internal/static_literals.h +++ b/include/prism/internal/static_literals.h @@ -9,7 +9,6 @@ #include "prism/ast.h" #include "prism/buffer.h" #include "prism/line_offset_list.h" -#include "prism/static_literals.h" /** * An internal hash table for a set of nodes. @@ -33,7 +32,7 @@ typedef struct { * We bucket the nodes based on their type to minimize the number of comparisons * that need to be performed. */ -struct pm_static_literals_t { +typedef struct { /** * This is the set of IntegerNode and SourceLineNode instances. */ @@ -84,7 +83,7 @@ struct pm_static_literals_t { * NULL. */ pm_node_t *source_encoding_node; -}; +} pm_static_literals_t; /** * Add a node to the set of static literals. diff --git a/include/prism/static_literals.h b/include/prism/static_literals.h deleted file mode 100644 index 4519510280..0000000000 --- a/include/prism/static_literals.h +++ /dev/null @@ -1,12 +0,0 @@ -/** - * @file static_literals.h - * - * A set of static literal nodes that can be checked for duplicates. - */ -#ifndef PRISM_STATIC_LITERALS_H -#define PRISM_STATIC_LITERALS_H - -/** An opaque struct that holds the static literals. */ -typedef struct pm_static_literals_t pm_static_literals_t; - -#endif diff --git a/prism.gemspec b/prism.gemspec index 762ab7b590..2d0a0d43aa 100644 --- a/prism.gemspec +++ b/prism.gemspec @@ -92,7 +92,6 @@ Gem::Specification.new do |spec| "include/prism/options.h", "include/prism/parser.h", "include/prism/prettyprint.h", - "include/prism/static_literals.h", "include/prism/string_query.h", "include/prism/strings.h", "include/prism/version.h", diff --git a/src/prism.c b/src/prism.c index 5814fa4fee..eb1ab8a256 100644 --- a/src/prism.c +++ b/src/prism.c @@ -22255,7 +22255,7 @@ pm_parser_init(pm_arena_t *arena, pm_parser_t *parser, const uint8_t *source, si .encoding = PM_ENCODING_UTF_8_ENTRY, .encoding_changed_callback = NULL, .encoding_comment_start = source, - .lex_callback = NULL, + .lex_callback = { 0 }, .filepath = { 0 }, .constant_pool = { 0 }, .line_offsets = { 0 }, From 2b84d22051a494908d7ed366c28b2e1e43d6ec12 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 18 Mar 2026 10:02:52 -0400 Subject: [PATCH 052/100] Move some serialize functions internal --- include/prism.h | 25 ---------------- include/prism/internal/serialize.h | 47 ++++++++++++++++++++++++++++++ src/prism.c | 1 + 3 files changed, 48 insertions(+), 25 deletions(-) create mode 100644 include/prism/internal/serialize.h diff --git a/include/prism.h b/include/prism.h index 3a35b257d6..fd3b80a12f 100644 --- a/include/prism.h +++ b/include/prism.h @@ -138,31 +138,6 @@ PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse_stream(pm_parser_t **parser, pm_are */ PRISM_EXPORTED_FUNCTION void pm_serialize_parse_stream(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const char *data); -/** - * Serialize the given list of comments to the given buffer. - * - * @param list The list of comments to serialize. - * @param buffer The buffer to serialize to. - */ -void pm_serialize_comment_list(pm_list_t *list, pm_buffer_t *buffer); - -/** - * Serialize the name of the encoding to the buffer. - * - * @param encoding The encoding to serialize. - * @param buffer The buffer to serialize to. - */ -void pm_serialize_encoding(const pm_encoding_t *encoding, pm_buffer_t *buffer); - -/** - * Serialize the encoding, metadata, nodes, and constant pool. - * - * @param parser The parser to serialize. - * @param node The node to serialize. - * @param buffer The buffer to serialize to. - */ -void pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer); - /** * Serialize the AST represented by the given node to the given buffer. * diff --git a/include/prism/internal/serialize.h b/include/prism/internal/serialize.h new file mode 100644 index 0000000000..a67ebd1a18 --- /dev/null +++ b/include/prism/internal/serialize.h @@ -0,0 +1,47 @@ +/** + * @file internal/serialize.h + */ +#ifndef PRISM_INTERNAL_SERIALIZE_H +#define PRISM_INTERNAL_SERIALIZE_H + +#include "prism/internal/encoding.h" + +#include "prism/ast.h" +#include "prism/buffer.h" +#include "prism/excludes.h" +#include "prism/list.h" +#include "prism/parser.h" + +/* We optionally support serializing to a binary string. For systems that do not + * want or need this functionality, it can be turned off with the + * PRISM_EXCLUDE_SERIALIZATION define. */ +#ifndef PRISM_EXCLUDE_SERIALIZATION + +/** + * Serialize the given list of comments to the given buffer. + * + * @param list The list of comments to serialize. + * @param buffer The buffer to serialize to. + */ +void pm_serialize_comment_list(pm_list_t *list, pm_buffer_t *buffer); + +/** + * Serialize the name of the encoding to the buffer. + * + * @param encoding The encoding to serialize. + * @param buffer The buffer to serialize to. + */ +void pm_serialize_encoding(const pm_encoding_t *encoding, pm_buffer_t *buffer); + +/** + * Serialize the encoding, metadata, nodes, and constant pool. + * + * @param parser The parser to serialize. + * @param node The node to serialize. + * @param buffer The buffer to serialize to. + */ +void pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer); + +#endif + +#endif diff --git a/src/prism.c b/src/prism.c index eb1ab8a256..a5d91fe5bb 100644 --- a/src/prism.c +++ b/src/prism.c @@ -21,6 +21,7 @@ #include "prism/internal/options.h" #include "prism/internal/parser.h" #include "prism/internal/regexp.h" +#include "prism/internal/serialize.h" #include "prism/internal/static_literals.h" #include "prism/internal/strings.h" #include "prism/internal/strncasecmp.h" From 10ebcaf9086ce16a5b53df37f3169d609524060e Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 18 Mar 2026 10:04:23 -0400 Subject: [PATCH 053/100] Move encoding entirely internal --- include/prism.h | 1 - include/prism/encoding.h | 13 ------------- include/prism/internal/encoding.h | 6 ++---- include/prism/internal/parser.h | 2 +- prism.gemspec | 1 - 5 files changed, 3 insertions(+), 20 deletions(-) delete mode 100644 include/prism/encoding.h diff --git a/include/prism.h b/include/prism.h index fd3b80a12f..9af3e6cf6c 100644 --- a/include/prism.h +++ b/include/prism.h @@ -13,7 +13,6 @@ extern "C" { #include "prism/arena.h" #include "prism/ast.h" #include "prism/diagnostic.h" -#include "prism/encoding.h" #include "prism/excludes.h" #include "prism/node.h" #include "prism/options.h" diff --git a/include/prism/encoding.h b/include/prism/encoding.h deleted file mode 100644 index a2061b65b6..0000000000 --- a/include/prism/encoding.h +++ /dev/null @@ -1,13 +0,0 @@ -/** - * @file encoding.h - * - * The encoding interface and implementations used by the parser. - */ -#ifndef PRISM_ENCODING_H -#define PRISM_ENCODING_H - -/* The encoding that the parser uses to process the source code. An opaque - * struct that is defined in the implementation file. */ -typedef struct pm_encoding_t pm_encoding_t; - -#endif diff --git a/include/prism/internal/encoding.h b/include/prism/internal/encoding.h index a62c1fd548..eb68ad6250 100644 --- a/include/prism/internal/encoding.h +++ b/include/prism/internal/encoding.h @@ -6,8 +6,6 @@ #ifndef PRISM_INTERNAL_ENCODING_H #define PRISM_INTERNAL_ENCODING_H -#include "prism/encoding.h" - #include #include #include @@ -18,7 +16,7 @@ * Each callback should return the number of bytes, or 0 if the next bytes are * invalid for the encoding and type. */ -struct pm_encoding_t { +typedef struct { /** * Return the number of bytes that the next character takes if it is valid * in the encoding. Does not read more than n bytes. It is assumed that n is @@ -57,7 +55,7 @@ struct pm_encoding_t { * Return true if the encoding is a multibyte encoding. */ bool multibyte; -}; +} pm_encoding_t; /** * All of the lookup tables use the first bit of each embedded byte to indicate diff --git a/include/prism/internal/parser.h b/include/prism/internal/parser.h index ad21044d3b..86e577b314 100644 --- a/include/prism/internal/parser.h +++ b/include/prism/internal/parser.h @@ -8,11 +8,11 @@ #include "prism/compiler/accel.h" +#include "prism/internal/encoding.h" #include "prism/internal/static_literals.h" #include "prism/arena.h" #include "prism/ast.h" -#include "prism/encoding.h" #include "prism/line_offset_list.h" #include "prism/list.h" #include "prism/options.h" diff --git a/prism.gemspec b/prism.gemspec index 2d0a0d43aa..70e5aefe1b 100644 --- a/prism.gemspec +++ b/prism.gemspec @@ -82,7 +82,6 @@ Gem::Specification.new do |spec| "include/prism/buffer.h", "include/prism/constant_pool.h", "include/prism/diagnostic.h", - "include/prism/encoding.h", "include/prism/excludes.h", "include/prism/integer.h", "include/prism/line_offset_list.h", From da00377f7d4f9d7b07803155ce5b0470ac599850 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 18 Mar 2026 10:06:45 -0400 Subject: [PATCH 054/100] Move some options internal metadata internal --- include/prism/internal/options.h | 31 +++++++++++++++++++++++++++++++ include/prism/internal/parser.h | 2 +- include/prism/options.h | 31 ------------------------------- 3 files changed, 32 insertions(+), 32 deletions(-) diff --git a/include/prism/internal/options.h b/include/prism/internal/options.h index 2dea2f9422..59606b09e6 100644 --- a/include/prism/internal/options.h +++ b/include/prism/internal/options.h @@ -22,6 +22,37 @@ struct pm_options_scope_t { uint8_t forwarding; }; +/** + * The version of Ruby syntax that we should be parsing with. This is used to + * allow consumers to specify which behavior they want in case they need to + * parse in the same way as a specific version of CRuby would have. + */ +typedef enum { + /** + * If an explicit version is not provided, the current version of prism will + * be used. + */ + PM_OPTIONS_VERSION_UNSET = 0, + + /** The vendored version of prism in CRuby 3.3.x. */ + PM_OPTIONS_VERSION_CRUBY_3_3 = 1, + + /** The vendored version of prism in CRuby 3.4.x. */ + PM_OPTIONS_VERSION_CRUBY_3_4 = 2, + + /** The vendored version of prism in CRuby 4.0.x. */ + PM_OPTIONS_VERSION_CRUBY_3_5 = 3, + + /** The vendored version of prism in CRuby 4.0.x. */ + PM_OPTIONS_VERSION_CRUBY_4_0 = 3, + + /** The vendored version of prism in CRuby 4.1.x. */ + PM_OPTIONS_VERSION_CRUBY_4_1 = 4, + + /** The current version of prism. */ + PM_OPTIONS_VERSION_LATEST = PM_OPTIONS_VERSION_CRUBY_4_1 +} pm_options_version_t; + /** * The options that can be passed to the parser. */ diff --git a/include/prism/internal/parser.h b/include/prism/internal/parser.h index 86e577b314..f8baf49cdb 100644 --- a/include/prism/internal/parser.h +++ b/include/prism/internal/parser.h @@ -9,13 +9,13 @@ #include "prism/compiler/accel.h" #include "prism/internal/encoding.h" +#include "prism/internal/options.h" #include "prism/internal/static_literals.h" #include "prism/arena.h" #include "prism/ast.h" #include "prism/line_offset_list.h" #include "prism/list.h" -#include "prism/options.h" #include "prism/parser.h" #include diff --git a/include/prism/options.h b/include/prism/options.h index 477b593b2a..aba122f705 100644 --- a/include/prism/options.h +++ b/include/prism/options.h @@ -66,37 +66,6 @@ static const uint8_t PM_OPTIONS_SCOPE_FORWARDING_ALL = 0x8; */ typedef void (*pm_options_shebang_callback_t)(pm_options_t *options, const uint8_t *source, size_t length, void *shebang_callback_data); -/** - * The version of Ruby syntax that we should be parsing with. This is used to - * allow consumers to specify which behavior they want in case they need to - * parse in the same way as a specific version of CRuby would have. - */ -typedef enum { - /** - * If an explicit version is not provided, the current version of prism will - * be used. - */ - PM_OPTIONS_VERSION_UNSET = 0, - - /** The vendored version of prism in CRuby 3.3.x. */ - PM_OPTIONS_VERSION_CRUBY_3_3 = 1, - - /** The vendored version of prism in CRuby 3.4.x. */ - PM_OPTIONS_VERSION_CRUBY_3_4 = 2, - - /** The vendored version of prism in CRuby 4.0.x. */ - PM_OPTIONS_VERSION_CRUBY_3_5 = 3, - - /** The vendored version of prism in CRuby 4.0.x. */ - PM_OPTIONS_VERSION_CRUBY_4_0 = 3, - - /** The vendored version of prism in CRuby 4.1.x. */ - PM_OPTIONS_VERSION_CRUBY_4_1 = 4, - - /** The current version of prism. */ - PM_OPTIONS_VERSION_LATEST = PM_OPTIONS_VERSION_CRUBY_4_1 -} pm_options_version_t; - /** * A bit representing whether or not the command line -a option was set. -a * splits the input line $_ into $F. From a29aab41797a7baec3b5613d3e37e73e89af9438 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 18 Mar 2026 10:16:46 -0400 Subject: [PATCH 055/100] Consistency in naming --- ext/prism/extension.c | 4 ++-- templates/include/prism/diagnostic.h.erb | 2 +- templates/src/diagnostic.c.erb | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ext/prism/extension.c b/ext/prism/extension.c index 9b03387fc9..70f7fd0eb0 100644 --- a/ext/prism/extension.c +++ b/ext/prism/extension.c @@ -566,7 +566,7 @@ parser_errors(const pm_parser_t *parser, rb_encoding *encoding, VALUE source, bo error != NULL; error = (const pm_diagnostic_t *) error->node.next ) { - VALUE type = ID2SYM(rb_intern(pm_diagnostic_id_human(error->diag_id))); + VALUE type = ID2SYM(rb_intern(pm_diagnostic_id_str(error->diag_id))); VALUE message = rb_obj_freeze(rb_enc_str_new_cstr(error->message, encoding)); VALUE location = PARSER_LOCATION(source, freeze, error->location); @@ -607,7 +607,7 @@ parser_warnings(const pm_parser_t *parser, rb_encoding *encoding, VALUE source, warning != NULL; warning = (const pm_diagnostic_t *) warning->node.next ) { - VALUE type = ID2SYM(rb_intern(pm_diagnostic_id_human(warning->diag_id))); + VALUE type = ID2SYM(rb_intern(pm_diagnostic_id_str(warning->diag_id))); VALUE message = rb_obj_freeze(rb_enc_str_new_cstr(warning->message, encoding)); VALUE location = PARSER_LOCATION(source, freeze, warning->location); diff --git a/templates/include/prism/diagnostic.h.erb b/templates/include/prism/diagnostic.h.erb index dceb21cf65..89ef49a2d4 100644 --- a/templates/include/prism/diagnostic.h.erb +++ b/templates/include/prism/diagnostic.h.erb @@ -83,6 +83,6 @@ typedef enum { * @param diag_id The diagnostic ID to get the name of. * @returns The human-readable name of the given diagnostic ID. */ -PRISM_EXPORTED_FUNCTION const char * pm_diagnostic_id_human(pm_diagnostic_id_t diag_id); +PRISM_EXPORTED_FUNCTION const char * pm_diagnostic_id_str(pm_diagnostic_id_t diag_id); #endif diff --git a/templates/src/diagnostic.c.erb b/templates/src/diagnostic.c.erb index 15b2a776a7..996599b0e5 100644 --- a/templates/src/diagnostic.c.erb +++ b/templates/src/diagnostic.c.erb @@ -426,7 +426,7 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_MAX] = { * Get the human-readable name of the given diagnostic ID. */ const char * -pm_diagnostic_id_human(pm_diagnostic_id_t diag_id) { +pm_diagnostic_id_str(pm_diagnostic_id_t diag_id) { switch (diag_id) { <%- errors.each do |error| -%> case PM_ERR_<%= error.name %>: return "<%= error.name.downcase %>"; From 0b17e49b8b023cde13e33a6fa6b0efbcc4588e5e Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 18 Mar 2026 10:38:03 -0400 Subject: [PATCH 056/100] Make pm_comment_t opaque --- ext/prism/extension.c | 14 ++-- include/prism/internal/parser.h | 26 ++++++++ include/prism/parser.h | 109 +++++++++++++++++++++++--------- src/parser.c | 59 +++++++++++++++-- 4 files changed, 168 insertions(+), 40 deletions(-) diff --git a/ext/prism/extension.c b/ext/prism/extension.c index 70f7fd0eb0..241419135d 100644 --- a/ext/prism/extension.c +++ b/ext/prism/extension.c @@ -480,8 +480,8 @@ parser_location(VALUE source, bool freeze, uint32_t start, uint32_t length) { */ static inline VALUE parser_comment(VALUE source, bool freeze, const pm_comment_t *comment) { - VALUE argv[] = { PARSER_LOCATION(source, freeze, comment->location) }; - VALUE type = (comment->type == PM_COMMENT_EMBDOC) ? rb_cPrismEmbDocComment : rb_cPrismInlineComment; + VALUE argv[] = { PARSER_LOCATION(source, freeze, pm_comment_location(comment)) }; + VALUE type = (pm_comment_type(comment) == PM_COMMENT_EMBDOC) ? rb_cPrismEmbDocComment : rb_cPrismInlineComment; return rb_class_new_instance_freeze(1, argv, type, freeze); } @@ -490,19 +490,21 @@ parser_comment(VALUE source, bool freeze, const pm_comment_t *comment) { */ static VALUE parser_comments(const pm_parser_t *parser, VALUE source, bool freeze) { - const pm_list_t *comments_list = pm_parser_comments(parser); - VALUE comments = rb_ary_new_capa(comments_list->size); + pm_comments_iter_t *comments_iter = pm_comments_iter(parser); + VALUE comments = rb_ary_new_capa(pm_comments_iter_size(comments_iter)); for ( - const pm_comment_t *comment = (const pm_comment_t *) comments_list->head; + const pm_comment_t *comment = pm_comments_iter_next(comments_iter); comment != NULL; - comment = (const pm_comment_t *) comment->node.next + comment = pm_comments_iter_next(comments_iter) ) { VALUE value = parser_comment(source, freeze, comment); rb_ary_push(comments, value); } + pm_comments_iter_free(comments_iter); if (freeze) rb_obj_freeze(comments); + return comments; } diff --git a/include/prism/internal/parser.h b/include/prism/internal/parser.h index f8baf49cdb..ed696b1221 100644 --- a/include/prism/internal/parser.h +++ b/include/prism/internal/parser.h @@ -933,4 +933,30 @@ struct pm_parser_t { #endif }; +/** + * A comment found while parsing. + */ +struct pm_comment_t { + /** The embedded base node. */ + pm_list_node_t node; + + /** The location of the comment in the source. */ + pm_location_t location; + + /** The type of the comment. */ + pm_comment_type_t type; +}; + +/** + * A struct used as an opaque pointer for the client to iterate through the + * comments found while parsing. + */ +struct pm_comments_iter_t { + /** The number of comments in the list. */ + size_t size; + + /** The current node in the list. */ + const pm_list_node_t *current; +}; + #endif diff --git a/include/prism/parser.h b/include/prism/parser.h index 047999142c..b13e9e8190 100644 --- a/include/prism/parser.h +++ b/include/prism/parser.h @@ -15,28 +15,6 @@ */ typedef struct pm_parser_t pm_parser_t; -/** This is the type of a comment that we've found while parsing. */ -typedef enum { - PM_COMMENT_INLINE, - PM_COMMENT_EMBDOC -} pm_comment_type_t; - -/** - * This is a node in the linked list of comments that we've found while parsing. - * - * @extends pm_list_node_t - */ -typedef struct pm_comment { - /** The embedded base node. */ - pm_list_node_t node; - - /** The location of the comment in the source. */ - pm_location_t location; - - /** The type of comment that we've found. */ - pm_comment_type_t type; -} pm_comment_t; - /** * This is a node in the linked list of magic comments that we've found while * parsing. @@ -144,14 +122,6 @@ PRISM_EXPORTED_FUNCTION const pm_list_t * pm_parser_errors(const pm_parser_t *pa */ PRISM_EXPORTED_FUNCTION const pm_list_t * pm_parser_warnings(const pm_parser_t *parser); -/** - * Returns the comments that are associated with the given parser. - * - * @param parser the parser whose comments we want to get - * @return the comments that are associated with the given parser - */ -PRISM_EXPORTED_FUNCTION const pm_list_t * pm_parser_comments(const pm_parser_t *parser); - /** * Returns the magic comments that are associated with the given parser. * @@ -206,4 +176,83 @@ PRISM_EXPORTED_FUNCTION bool pm_parser_continuable(const pm_parser_t *parser); */ PRISM_EXPORTED_FUNCTION int pm_parser_lex_state(const pm_parser_t *parser); +/******************************************************************************/ +/* Comments */ +/******************************************************************************/ + +/** This is the type of a comment that we've found while parsing. */ +typedef enum { + PM_COMMENT_INLINE, + PM_COMMENT_EMBDOC +} pm_comment_type_t; + +/** An opaque pointer to a comment found while parsing. */ +typedef struct pm_comment_t pm_comment_t; + +/** + * Returns the location associated with the given comment. + * + * @param comment the comment whose location we want to get + * @return the location associated with the given comment + */ +PRISM_EXPORTED_FUNCTION pm_location_t pm_comment_location(const pm_comment_t *comment); + +/** + * Returns the type associated with the given comment. + * + * @param comment the comment whose type we want to get + * @return the type associated with the given comment. This can either be + * PM_COMMENT_INLINE or PM_COMMENT_EMBDOC. + */ +PRISM_EXPORTED_FUNCTION pm_comment_type_t pm_comment_type(const pm_comment_t *comment); + +/* An opaque pointer to an iterator that can be used to iterate over the + * comments associated with a parser. */ +typedef struct pm_comments_iter_t pm_comments_iter_t; + +/** + * Returns an iterator that knows how to iterate over the comments that are + * associated with the given parser. + * + * @param parser the parser whose comments we want to get + * @return the iterator that knows how to iterate over the comments that are + * associated with the given parser. It is the responsibility of the caller + * to free the memory associated with the iterator through + * pm_comments_iter_free. + * + * \public \memberof pm_parser + */ +PRISM_EXPORTED_FUNCTION pm_comments_iter_t * pm_comments_iter(const pm_parser_t *parser); + +/** + * Returns the number of comments associated with the comment iterator. + * + * @param iter the iterator to get the number of comments from + * @return the number of comments associated with the comment iterator + * + * \public \memberof pm_comments_iter_t + */ +PRISM_EXPORTED_FUNCTION size_t pm_comments_iter_size(const pm_comments_iter_t *iter); + +/** + * Returns the next comment in the iteration, or NULL if there are no more + * comments. + * + * @param iter the iterator to get the next comment from + * @return the next comment in the iteration, or NULL if there are no more + * comments. + * + * \public \memberof pm_comments_iter_t + */ +PRISM_EXPORTED_FUNCTION const pm_comment_t * pm_comments_iter_next(pm_comments_iter_t *iter); + +/** + * Frees the memory associated with the given comments iterator. + * + * @param iter the iterator to free + * + * \public \memberof pm_comments_iter_t + */ +PRISM_EXPORTED_FUNCTION void pm_comments_iter_free(pm_comments_iter_t *iter); + #endif diff --git a/src/parser.c b/src/parser.c index 209f748ff2..401ae3a386 100644 --- a/src/parser.c +++ b/src/parser.c @@ -1,7 +1,10 @@ #include "prism/internal/parser.h" +#include "prism/internal/allocator.h" #include "prism/internal/encoding.h" +#include + /** * Register a callback that will be called whenever prism changes the encoding * it is using to parse based on the magic comment. @@ -80,11 +83,59 @@ pm_parser_warnings(const pm_parser_t *parser) { } /** - * Returns the comments that are associated with the given parser. + * Returns the location associated with the given comment. */ -const pm_list_t * -pm_parser_comments(const pm_parser_t *parser) { - return &parser->comment_list; +pm_location_t +pm_comment_location(const pm_comment_t *comment) { + return comment->location; +} + +/** + * Returns the type associated with the given comment. + */ +pm_comment_type_t +pm_comment_type(const pm_comment_t *comment) { + return comment->type; +} + +/** + * Returns an iterator that knows how to iterate over the comments that are + * associated with the given parser. + */ +pm_comments_iter_t * +pm_comments_iter(const pm_parser_t *parser) { + pm_comments_iter_t *iter = (pm_comments_iter_t *) xmalloc(sizeof(pm_comments_iter_t)); + iter->size = parser->comment_list.size; + iter->current = parser->comment_list.head; + return iter; +} + +/** + * Returns the number of comments associated with the comment iterator. + */ +size_t +pm_comments_iter_size(const pm_comments_iter_t *iter) { + return iter->size; +} + +/** + * Returns the next comment in the iteration, or NULL if there are no more + * comments. + */ +const pm_comment_t * +pm_comments_iter_next(pm_comments_iter_t *iter) { + if (iter->current == NULL) return NULL; + const pm_comment_t *comment = (const pm_comment_t *) iter->current; + iter->current = iter->current->next; + return comment; +} + +/** + * Frees the memory associated with the given comments iterator. + */ +void +pm_comments_iter_free(pm_comments_iter_t *iter) { + xfree(iter); } /** From 1ddff36b54905d7c5bdd56928b4384f52d6f816d Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 18 Mar 2026 10:44:19 -0400 Subject: [PATCH 057/100] Move comment into its own section --- include/prism.h | 1 + include/prism/comment.h | 91 ++++++++++++++++++++++++++++++++ include/prism/internal/comment.h | 35 ++++++++++++ include/prism/internal/parser.h | 26 --------- include/prism/parser.h | 79 --------------------------- src/comment.c | 62 ++++++++++++++++++++++ src/parser.c | 56 -------------------- src/prism.c | 1 + templates/src/serialize.c.erb | 1 + 9 files changed, 191 insertions(+), 161 deletions(-) create mode 100644 include/prism/comment.h create mode 100644 include/prism/internal/comment.h create mode 100644 src/comment.c diff --git a/include/prism.h b/include/prism.h index 9af3e6cf6c..9dcfe9a6ec 100644 --- a/include/prism.h +++ b/include/prism.h @@ -12,6 +12,7 @@ extern "C" { #include "prism/arena.h" #include "prism/ast.h" +#include "prism/comment.h" #include "prism/diagnostic.h" #include "prism/excludes.h" #include "prism/node.h" diff --git a/include/prism/comment.h b/include/prism/comment.h new file mode 100644 index 0000000000..22b3f2d2fb --- /dev/null +++ b/include/prism/comment.h @@ -0,0 +1,91 @@ +/** + * @file comment.h + * + * The comment module used to handle comments in Ruby source. + */ +#ifndef PRISM_COMMENT_H +#define PRISM_COMMENT_H + +#include "prism/compiler/exported.h" + +#include "prism/ast.h" +#include "prism/parser.h" + +#include + +/** This is the type of a comment that we've found while parsing. */ +typedef enum { + PM_COMMENT_INLINE, + PM_COMMENT_EMBDOC +} pm_comment_type_t; + +/** An opaque pointer to a comment found while parsing. */ +typedef struct pm_comment_t pm_comment_t; + +/** + * Returns the location associated with the given comment. + * + * @param comment the comment whose location we want to get + * @return the location associated with the given comment + */ +PRISM_EXPORTED_FUNCTION pm_location_t pm_comment_location(const pm_comment_t *comment); + +/** + * Returns the type associated with the given comment. + * + * @param comment the comment whose type we want to get + * @return the type associated with the given comment. This can either be + * PM_COMMENT_INLINE or PM_COMMENT_EMBDOC. + */ +PRISM_EXPORTED_FUNCTION pm_comment_type_t pm_comment_type(const pm_comment_t *comment); + +/* An opaque pointer to an iterator that can be used to iterate over the + * comments associated with a parser. */ +typedef struct pm_comments_iter_t pm_comments_iter_t; + +/** + * Returns an iterator that knows how to iterate over the comments that are + * associated with the given parser. + * + * @param parser the parser whose comments we want to get + * @return the iterator that knows how to iterate over the comments that are + * associated with the given parser. It is the responsibility of the caller + * to free the memory associated with the iterator through + * pm_comments_iter_free. + * + * \public \memberof pm_parser + */ +PRISM_EXPORTED_FUNCTION pm_comments_iter_t * pm_comments_iter(const pm_parser_t *parser); + +/** + * Returns the number of comments associated with the comment iterator. + * + * @param iter the iterator to get the number of comments from + * @return the number of comments associated with the comment iterator + * + * \public \memberof pm_comments_iter_t + */ +PRISM_EXPORTED_FUNCTION size_t pm_comments_iter_size(const pm_comments_iter_t *iter); + +/** + * Returns the next comment in the iteration, or NULL if there are no more + * comments. + * + * @param iter the iterator to get the next comment from + * @return the next comment in the iteration, or NULL if there are no more + * comments. + * + * \public \memberof pm_comments_iter_t + */ +PRISM_EXPORTED_FUNCTION const pm_comment_t * pm_comments_iter_next(pm_comments_iter_t *iter); + +/** + * Frees the memory associated with the given comments iterator. + * + * @param iter the iterator to free + * + * \public \memberof pm_comments_iter_t + */ +PRISM_EXPORTED_FUNCTION void pm_comments_iter_free(pm_comments_iter_t *iter); + +#endif diff --git a/include/prism/internal/comment.h b/include/prism/internal/comment.h new file mode 100644 index 0000000000..c717432075 --- /dev/null +++ b/include/prism/internal/comment.h @@ -0,0 +1,35 @@ +/** + * @file internal/comment.h + */ +#ifndef PRISM_INTERNAL_COMMENT_H +#define PRISM_INTERNAL_COMMENT_H + +#include "prism/comment.h" + +/** + * A comment found while parsing. + */ +struct pm_comment_t { + /** The embedded base node. */ + pm_list_node_t node; + + /** The location of the comment in the source. */ + pm_location_t location; + + /** The type of the comment. */ + pm_comment_type_t type; +}; + +/** + * A struct used as an opaque pointer for the client to iterate through the + * comments found while parsing. + */ +struct pm_comments_iter_t { + /** The number of comments in the list. */ + size_t size; + + /** The current node in the list. */ + const pm_list_node_t *current; +}; + +#endif diff --git a/include/prism/internal/parser.h b/include/prism/internal/parser.h index ed696b1221..f8baf49cdb 100644 --- a/include/prism/internal/parser.h +++ b/include/prism/internal/parser.h @@ -933,30 +933,4 @@ struct pm_parser_t { #endif }; -/** - * A comment found while parsing. - */ -struct pm_comment_t { - /** The embedded base node. */ - pm_list_node_t node; - - /** The location of the comment in the source. */ - pm_location_t location; - - /** The type of the comment. */ - pm_comment_type_t type; -}; - -/** - * A struct used as an opaque pointer for the client to iterate through the - * comments found while parsing. - */ -struct pm_comments_iter_t { - /** The number of comments in the list. */ - size_t size; - - /** The current node in the list. */ - const pm_list_node_t *current; -}; - #endif diff --git a/include/prism/parser.h b/include/prism/parser.h index b13e9e8190..d1cb5b8827 100644 --- a/include/prism/parser.h +++ b/include/prism/parser.h @@ -176,83 +176,4 @@ PRISM_EXPORTED_FUNCTION bool pm_parser_continuable(const pm_parser_t *parser); */ PRISM_EXPORTED_FUNCTION int pm_parser_lex_state(const pm_parser_t *parser); -/******************************************************************************/ -/* Comments */ -/******************************************************************************/ - -/** This is the type of a comment that we've found while parsing. */ -typedef enum { - PM_COMMENT_INLINE, - PM_COMMENT_EMBDOC -} pm_comment_type_t; - -/** An opaque pointer to a comment found while parsing. */ -typedef struct pm_comment_t pm_comment_t; - -/** - * Returns the location associated with the given comment. - * - * @param comment the comment whose location we want to get - * @return the location associated with the given comment - */ -PRISM_EXPORTED_FUNCTION pm_location_t pm_comment_location(const pm_comment_t *comment); - -/** - * Returns the type associated with the given comment. - * - * @param comment the comment whose type we want to get - * @return the type associated with the given comment. This can either be - * PM_COMMENT_INLINE or PM_COMMENT_EMBDOC. - */ -PRISM_EXPORTED_FUNCTION pm_comment_type_t pm_comment_type(const pm_comment_t *comment); - -/* An opaque pointer to an iterator that can be used to iterate over the - * comments associated with a parser. */ -typedef struct pm_comments_iter_t pm_comments_iter_t; - -/** - * Returns an iterator that knows how to iterate over the comments that are - * associated with the given parser. - * - * @param parser the parser whose comments we want to get - * @return the iterator that knows how to iterate over the comments that are - * associated with the given parser. It is the responsibility of the caller - * to free the memory associated with the iterator through - * pm_comments_iter_free. - * - * \public \memberof pm_parser - */ -PRISM_EXPORTED_FUNCTION pm_comments_iter_t * pm_comments_iter(const pm_parser_t *parser); - -/** - * Returns the number of comments associated with the comment iterator. - * - * @param iter the iterator to get the number of comments from - * @return the number of comments associated with the comment iterator - * - * \public \memberof pm_comments_iter_t - */ -PRISM_EXPORTED_FUNCTION size_t pm_comments_iter_size(const pm_comments_iter_t *iter); - -/** - * Returns the next comment in the iteration, or NULL if there are no more - * comments. - * - * @param iter the iterator to get the next comment from - * @return the next comment in the iteration, or NULL if there are no more - * comments. - * - * \public \memberof pm_comments_iter_t - */ -PRISM_EXPORTED_FUNCTION const pm_comment_t * pm_comments_iter_next(pm_comments_iter_t *iter); - -/** - * Frees the memory associated with the given comments iterator. - * - * @param iter the iterator to free - * - * \public \memberof pm_comments_iter_t - */ -PRISM_EXPORTED_FUNCTION void pm_comments_iter_free(pm_comments_iter_t *iter); - #endif diff --git a/src/comment.c b/src/comment.c new file mode 100644 index 0000000000..a087a4c135 --- /dev/null +++ b/src/comment.c @@ -0,0 +1,62 @@ +#include "prism/internal/comment.h" + +#include "prism/internal/allocator.h" +#include "prism/internal/parser.h" + +#include + +/** + * Returns the location associated with the given comment. + */ +pm_location_t +pm_comment_location(const pm_comment_t *comment) { + return comment->location; +} + +/** + * Returns the type associated with the given comment. + */ +pm_comment_type_t +pm_comment_type(const pm_comment_t *comment) { + return comment->type; +} + +/** + * Returns an iterator that knows how to iterate over the comments that are + * associated with the given parser. + */ +pm_comments_iter_t * +pm_comments_iter(const pm_parser_t *parser) { + pm_comments_iter_t *iter = (pm_comments_iter_t *) xmalloc(sizeof(pm_comments_iter_t)); + iter->size = parser->comment_list.size; + iter->current = parser->comment_list.head; + return iter; +} + +/** + * Returns the number of comments associated with the comment iterator. + */ +size_t +pm_comments_iter_size(const pm_comments_iter_t *iter) { + return iter->size; +} + +/** + * Returns the next comment in the iteration, or NULL if there are no more + * comments. + */ +const pm_comment_t * +pm_comments_iter_next(pm_comments_iter_t *iter) { + if (iter->current == NULL) return NULL; + const pm_comment_t *comment = (const pm_comment_t *) iter->current; + iter->current = iter->current->next; + return comment; +} + +/** + * Frees the memory associated with the given comments iterator. + */ +void +pm_comments_iter_free(pm_comments_iter_t *iter) { + xfree(iter); +} diff --git a/src/parser.c b/src/parser.c index 401ae3a386..c4784c27ea 100644 --- a/src/parser.c +++ b/src/parser.c @@ -82,62 +82,6 @@ pm_parser_warnings(const pm_parser_t *parser) { return &parser->warning_list; } -/** - * Returns the location associated with the given comment. - */ -pm_location_t -pm_comment_location(const pm_comment_t *comment) { - return comment->location; -} - -/** - * Returns the type associated with the given comment. - */ -pm_comment_type_t -pm_comment_type(const pm_comment_t *comment) { - return comment->type; -} - -/** - * Returns an iterator that knows how to iterate over the comments that are - * associated with the given parser. - */ -pm_comments_iter_t * -pm_comments_iter(const pm_parser_t *parser) { - pm_comments_iter_t *iter = (pm_comments_iter_t *) xmalloc(sizeof(pm_comments_iter_t)); - iter->size = parser->comment_list.size; - iter->current = parser->comment_list.head; - return iter; -} - -/** - * Returns the number of comments associated with the comment iterator. - */ -size_t -pm_comments_iter_size(const pm_comments_iter_t *iter) { - return iter->size; -} - -/** - * Returns the next comment in the iteration, or NULL if there are no more - * comments. - */ -const pm_comment_t * -pm_comments_iter_next(pm_comments_iter_t *iter) { - if (iter->current == NULL) return NULL; - const pm_comment_t *comment = (const pm_comment_t *) iter->current; - iter->current = iter->current->next; - return comment; -} - -/** - * Frees the memory associated with the given comments iterator. - */ -void -pm_comments_iter_free(pm_comments_iter_t *iter) { - xfree(iter); -} - /** * Returns the magic comments that are associated with the given parser. */ diff --git a/src/prism.c b/src/prism.c index a5d91fe5bb..421e081ace 100644 --- a/src/prism.c +++ b/src/prism.c @@ -9,6 +9,7 @@ #include "prism/internal/bit.h" #include "prism/internal/buffer.h" #include "prism/internal/char.h" +#include "prism/internal/comment.h" #include "prism/internal/constant_pool.h" #include "prism/internal/diagnostic.h" #include "prism/internal/encoding.h" diff --git a/templates/src/serialize.c.erb b/templates/src/serialize.c.erb index cfd073b7e6..a8c2daa532 100644 --- a/templates/src/serialize.c.erb +++ b/templates/src/serialize.c.erb @@ -8,6 +8,7 @@ #include "prism/compiler/inline.h" #include "prism/internal/buffer.h" +#include "prism/internal/comment.h" #include "prism/internal/encoding.h" #include "prism/internal/list.h" #include "prism/internal/options.h" From 84b08e23d65b7c59a5782d428b93b18c68911f74 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 18 Mar 2026 10:46:29 -0400 Subject: [PATCH 058/100] Move more constants internal --- ext/prism/extension.c | 2 +- include/prism.h | 1 - include/prism/{comment.h => comments.h} | 23 +++---------------- include/prism/constant_pool.h | 7 ------ .../prism/internal/{comment.h => comments.h} | 10 ++++---- include/prism/internal/constant_pool.h | 9 ++++++++ include/prism/parser.h | 15 ++++++++++++ src/{comment.c => comments.c} | 14 +---------- src/constant_pool.c | 2 +- src/parser.c | 13 +++++++++++ src/prism.c | 2 +- templates/src/serialize.c.erb | 2 +- 12 files changed, 51 insertions(+), 49 deletions(-) rename include/prism/{comment.h => comments.h} (74%) rename include/prism/internal/{comment.h => comments.h} (79%) rename src/{comment.c => comments.c} (71%) diff --git a/ext/prism/extension.c b/ext/prism/extension.c index 241419135d..0c98b988d0 100644 --- a/ext/prism/extension.c +++ b/ext/prism/extension.c @@ -490,7 +490,7 @@ parser_comment(VALUE source, bool freeze, const pm_comment_t *comment) { */ static VALUE parser_comments(const pm_parser_t *parser, VALUE source, bool freeze) { - pm_comments_iter_t *comments_iter = pm_comments_iter(parser); + pm_comments_iter_t *comments_iter = pm_parser_comments(parser); VALUE comments = rb_ary_new_capa(pm_comments_iter_size(comments_iter)); for ( diff --git a/include/prism.h b/include/prism.h index 9dcfe9a6ec..9af3e6cf6c 100644 --- a/include/prism.h +++ b/include/prism.h @@ -12,7 +12,6 @@ extern "C" { #include "prism/arena.h" #include "prism/ast.h" -#include "prism/comment.h" #include "prism/diagnostic.h" #include "prism/excludes.h" #include "prism/node.h" diff --git a/include/prism/comment.h b/include/prism/comments.h similarity index 74% rename from include/prism/comment.h rename to include/prism/comments.h index 22b3f2d2fb..7c95d0ca72 100644 --- a/include/prism/comment.h +++ b/include/prism/comments.h @@ -1,15 +1,12 @@ /** - * @file comment.h - * - * The comment module used to handle comments in Ruby source. + * @file comments.h */ -#ifndef PRISM_COMMENT_H -#define PRISM_COMMENT_H +#ifndef PRISM_COMMENTS_H +#define PRISM_COMMENTS_H #include "prism/compiler/exported.h" #include "prism/ast.h" -#include "prism/parser.h" #include @@ -43,20 +40,6 @@ PRISM_EXPORTED_FUNCTION pm_comment_type_t pm_comment_type(const pm_comment_t *co * comments associated with a parser. */ typedef struct pm_comments_iter_t pm_comments_iter_t; -/** - * Returns an iterator that knows how to iterate over the comments that are - * associated with the given parser. - * - * @param parser the parser whose comments we want to get - * @return the iterator that knows how to iterate over the comments that are - * associated with the given parser. It is the responsibility of the caller - * to free the memory associated with the iterator through - * pm_comments_iter_free. - * - * \public \memberof pm_parser - */ -PRISM_EXPORTED_FUNCTION pm_comments_iter_t * pm_comments_iter(const pm_parser_t *parser); - /** * Returns the number of comments associated with the comment iterator. * diff --git a/include/prism/constant_pool.h b/include/prism/constant_pool.h index cc426bb0ab..b1db33f8e3 100644 --- a/include/prism/constant_pool.h +++ b/include/prism/constant_pool.h @@ -13,13 +13,6 @@ #include #include -/** - * When we allocate constants into the pool, we reserve 0 to mean that the slot - * is not yet filled. This constant is reused in other places to indicate the - * lack of a constant id. - */ -#define PM_CONSTANT_ID_UNSET 0 - /** * A constant id is a unique identifier for a constant in the constant pool. */ diff --git a/include/prism/internal/comment.h b/include/prism/internal/comments.h similarity index 79% rename from include/prism/internal/comment.h rename to include/prism/internal/comments.h index c717432075..c7adf52464 100644 --- a/include/prism/internal/comment.h +++ b/include/prism/internal/comments.h @@ -1,10 +1,12 @@ /** - * @file internal/comment.h + * @file internal/comments.h */ -#ifndef PRISM_INTERNAL_COMMENT_H -#define PRISM_INTERNAL_COMMENT_H +#ifndef PRISM_INTERNAL_COMMENTS_H +#define PRISM_INTERNAL_COMMENTS_H -#include "prism/comment.h" +#include "prism/comments.h" + +#include "prism/list.h" /** * A comment found while parsing. diff --git a/include/prism/internal/constant_pool.h b/include/prism/internal/constant_pool.h index 03671938a6..9e7d3cd74b 100644 --- a/include/prism/internal/constant_pool.h +++ b/include/prism/internal/constant_pool.h @@ -14,6 +14,15 @@ #include "prism/arena.h" +#include + +/** + * When we allocate constants into the pool, we reserve 0 to mean that the slot + * is not yet filled. This constant is reused in other places to indicate the + * lack of a constant id. + */ +#define PM_CONSTANT_ID_UNSET 0 + /** * Initialize a list of constant ids. * diff --git a/include/prism/parser.h b/include/prism/parser.h index d1cb5b8827..b1726b4975 100644 --- a/include/prism/parser.h +++ b/include/prism/parser.h @@ -7,6 +7,7 @@ #define PRISM_PARSER_H #include "prism/ast.h" +#include "prism/comments.h" #include "prism/line_offset_list.h" #include "prism/list.h" @@ -176,4 +177,18 @@ PRISM_EXPORTED_FUNCTION bool pm_parser_continuable(const pm_parser_t *parser); */ PRISM_EXPORTED_FUNCTION int pm_parser_lex_state(const pm_parser_t *parser); +/** + * Returns an iterator that knows how to iterate over the comments that are + * associated with the given parser. + * + * @param parser the parser whose comments we want to get + * @return the iterator that knows how to iterate over the comments that are + * associated with the given parser. It is the responsibility of the caller + * to free the memory associated with the iterator through + * pm_comments_iter_free. + * + * \public \memberof pm_parser + */ +PRISM_EXPORTED_FUNCTION pm_comments_iter_t * pm_parser_comments(const pm_parser_t *parser); + #endif diff --git a/src/comment.c b/src/comments.c similarity index 71% rename from src/comment.c rename to src/comments.c index a087a4c135..7e2f1e2f10 100644 --- a/src/comment.c +++ b/src/comments.c @@ -1,4 +1,4 @@ -#include "prism/internal/comment.h" +#include "prism/internal/comments.h" #include "prism/internal/allocator.h" #include "prism/internal/parser.h" @@ -21,18 +21,6 @@ pm_comment_type(const pm_comment_t *comment) { return comment->type; } -/** - * Returns an iterator that knows how to iterate over the comments that are - * associated with the given parser. - */ -pm_comments_iter_t * -pm_comments_iter(const pm_parser_t *parser) { - pm_comments_iter_t *iter = (pm_comments_iter_t *) xmalloc(sizeof(pm_comments_iter_t)); - iter->size = parser->comment_list.size; - iter->current = parser->comment_list.head; - return iter; -} - /** * Returns the number of comments associated with the comment iterator. */ diff --git a/src/constant_pool.c b/src/constant_pool.c index 0baab71997..3f0baac702 100644 --- a/src/constant_pool.c +++ b/src/constant_pool.c @@ -1,4 +1,4 @@ -#include "prism/constant_pool.h" +#include "prism/internal/constant_pool.h" #include "prism/compiler/align.h" #include "prism/compiler/inline.h" diff --git a/src/parser.c b/src/parser.c index c4784c27ea..7447b00f24 100644 --- a/src/parser.c +++ b/src/parser.c @@ -1,6 +1,7 @@ #include "prism/internal/parser.h" #include "prism/internal/allocator.h" +#include "prism/internal/comments.h" #include "prism/internal/encoding.h" #include @@ -137,3 +138,15 @@ int pm_parser_lex_state(const pm_parser_t *parser) { return (int) parser->lex_state; } + +/** + * Returns an iterator that knows how to iterate over the comments that are + * associated with the given parser. + */ +pm_comments_iter_t * +pm_parser_comments(const pm_parser_t *parser) { + pm_comments_iter_t *iter = (pm_comments_iter_t *) xmalloc(sizeof(pm_comments_iter_t)); + iter->size = parser->comment_list.size; + iter->current = parser->comment_list.head; + return iter; +} diff --git a/src/prism.c b/src/prism.c index 421e081ace..f948473db8 100644 --- a/src/prism.c +++ b/src/prism.c @@ -9,7 +9,7 @@ #include "prism/internal/bit.h" #include "prism/internal/buffer.h" #include "prism/internal/char.h" -#include "prism/internal/comment.h" +#include "prism/internal/comments.h" #include "prism/internal/constant_pool.h" #include "prism/internal/diagnostic.h" #include "prism/internal/encoding.h" diff --git a/templates/src/serialize.c.erb b/templates/src/serialize.c.erb index a8c2daa532..7edb297501 100644 --- a/templates/src/serialize.c.erb +++ b/templates/src/serialize.c.erb @@ -8,7 +8,7 @@ #include "prism/compiler/inline.h" #include "prism/internal/buffer.h" -#include "prism/internal/comment.h" +#include "prism/internal/comments.h" #include "prism/internal/encoding.h" #include "prism/internal/list.h" #include "prism/internal/options.h" From 6f40516041af14166bc97cf2976dc6c5725d8b7c Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 18 Mar 2026 11:17:01 -0400 Subject: [PATCH 059/100] Move diagnostics entirely internal --- .gitignore | 2 +- ext/prism/extension.c | 46 +++--- include/prism/diagnostic.h | 134 ++++++++++++++++++ include/prism/internal/diagnostic.h | 37 ----- include/prism/parser.h | 45 +++--- src/parser.c | 41 +++--- templates/include/prism/diagnostic.h.erb | 88 ------------ .../include/prism/internal/diagnostic.h.erb | 89 ++++++++++++ templates/src/diagnostic.c.erb | 88 ++++++++++-- templates/src/serialize.c.erb | 2 +- templates/template.rb | 2 +- 11 files changed, 386 insertions(+), 188 deletions(-) create mode 100644 include/prism/diagnostic.h delete mode 100644 include/prism/internal/diagnostic.h delete mode 100644 templates/include/prism/diagnostic.h.erb create mode 100644 templates/include/prism/internal/diagnostic.h.erb diff --git a/.gitignore b/.gitignore index b914ce4cd9..a8370790b2 100644 --- a/.gitignore +++ b/.gitignore @@ -31,8 +31,8 @@ out.svg /fuzz/output/ /gemfiles/typecheck/bin/ /include/prism/ast.h -/include/prism/diagnostic.h /include/prism/node_new.h +/include/prism/internal/diagnostic.h /javascript/node_modules/ /javascript/package-lock.json /javascript/src/deserialize.js diff --git a/ext/prism/extension.c b/ext/prism/extension.c index 0c98b988d0..1980e44d89 100644 --- a/ext/prism/extension.c +++ b/ext/prism/extension.c @@ -560,20 +560,22 @@ parser_data_loc(const pm_parser_t *parser, VALUE source, bool freeze) { */ static VALUE parser_errors(const pm_parser_t *parser, rb_encoding *encoding, VALUE source, bool freeze) { - const pm_list_t *error_list = pm_parser_errors(parser); - VALUE errors = rb_ary_new_capa(error_list->size); + pm_diagnostics_iter_t *iter = pm_parser_errors(parser); + VALUE errors = rb_ary_new_capa(pm_diagnostics_iter_size(iter)); for ( - const pm_diagnostic_t *error = (const pm_diagnostic_t *) error_list->head; + const pm_diagnostic_t *error = pm_diagnostics_iter_next(iter); error != NULL; - error = (const pm_diagnostic_t *) error->node.next + error = pm_diagnostics_iter_next(iter) ) { - VALUE type = ID2SYM(rb_intern(pm_diagnostic_id_str(error->diag_id))); - VALUE message = rb_obj_freeze(rb_enc_str_new_cstr(error->message, encoding)); - VALUE location = PARSER_LOCATION(source, freeze, error->location); + VALUE type = ID2SYM(rb_intern(pm_diagnostic_type(error))); + VALUE message = rb_obj_freeze(rb_enc_str_new_cstr(pm_diagnostic_message(error), encoding)); + VALUE location = PARSER_LOCATION(source, freeze, pm_diagnostic_location(error)); + pm_error_level_t error_level = pm_diagnostic_error_level(error); VALUE level = Qnil; - switch (error->level) { + + switch (error_level) { case PM_ERROR_LEVEL_SYNTAX: level = ID2SYM(rb_intern("syntax")); break; @@ -584,7 +586,7 @@ parser_errors(const pm_parser_t *parser, rb_encoding *encoding, VALUE source, bo level = ID2SYM(rb_intern("load")); break; default: - rb_raise(rb_eRuntimeError, "Unknown level: %" PRIu8, error->level); + rb_raise(rb_eRuntimeError, "Unknown level: %" PRIu8, error_level); } VALUE argv[] = { type, message, location, level }; @@ -592,7 +594,9 @@ parser_errors(const pm_parser_t *parser, rb_encoding *encoding, VALUE source, bo rb_ary_push(errors, value); } + pm_diagnostics_iter_free(iter); if (freeze) rb_obj_freeze(errors); + return errors; } @@ -601,20 +605,22 @@ parser_errors(const pm_parser_t *parser, rb_encoding *encoding, VALUE source, bo */ static VALUE parser_warnings(const pm_parser_t *parser, rb_encoding *encoding, VALUE source, bool freeze) { - const pm_list_t *warning_list = pm_parser_warnings(parser); - VALUE warnings = rb_ary_new_capa(warning_list->size); + pm_diagnostics_iter_t *iter = pm_parser_warnings(parser); + VALUE warnings = rb_ary_new_capa(pm_diagnostics_iter_size(iter)); for ( - const pm_diagnostic_t *warning = (const pm_diagnostic_t *) warning_list->head; + const pm_diagnostic_t *warning = pm_diagnostics_iter_next(iter); warning != NULL; - warning = (const pm_diagnostic_t *) warning->node.next + warning = pm_diagnostics_iter_next(iter) ) { - VALUE type = ID2SYM(rb_intern(pm_diagnostic_id_str(warning->diag_id))); - VALUE message = rb_obj_freeze(rb_enc_str_new_cstr(warning->message, encoding)); - VALUE location = PARSER_LOCATION(source, freeze, warning->location); + VALUE type = ID2SYM(rb_intern(pm_diagnostic_type(warning))); + VALUE message = rb_obj_freeze(rb_enc_str_new_cstr(pm_diagnostic_message(warning), encoding)); + VALUE location = PARSER_LOCATION(source, freeze, pm_diagnostic_location(warning)); + pm_warning_level_t warning_level = pm_diagnostic_warning_level(warning); VALUE level = Qnil; - switch (warning->level) { + + switch (warning_level) { case PM_WARNING_LEVEL_DEFAULT: level = ID2SYM(rb_intern("default")); break; @@ -622,7 +628,7 @@ parser_warnings(const pm_parser_t *parser, rb_encoding *encoding, VALUE source, level = ID2SYM(rb_intern("verbose")); break; default: - rb_raise(rb_eRuntimeError, "Unknown level: %" PRIu8, warning->level); + rb_raise(rb_eRuntimeError, "Unknown level: %" PRIu8, warning_level); } VALUE argv[] = { type, message, location, level }; @@ -630,7 +636,9 @@ parser_warnings(const pm_parser_t *parser, rb_encoding *encoding, VALUE source, rb_ary_push(warnings, value); } + pm_diagnostics_iter_free(iter); if (freeze) rb_obj_freeze(warnings); + return warnings; } @@ -1221,7 +1229,7 @@ parse_input_success_p(pm_string_t *input, const pm_options_t *options) { pm_parse(parser); - VALUE result = pm_parser_errors(parser)->size == 0 ? Qtrue : Qfalse; + VALUE result = pm_diagnostics_iter_size(pm_parser_errors(parser)) == 0 ? Qtrue : Qfalse; pm_parser_free(parser); pm_arena_free(&arena); diff --git a/include/prism/diagnostic.h b/include/prism/diagnostic.h new file mode 100644 index 0000000000..0619c274ef --- /dev/null +++ b/include/prism/diagnostic.h @@ -0,0 +1,134 @@ +/*----------------------------------------------------------------------------*/ +/* This file is generated by the templates/template.rb script and should not */ +/* be modified manually. See */ +/* templates/include/prism/diagnostic.h.erb */ +/* if you are looking to modify the */ +/* template */ +/*----------------------------------------------------------------------------*/ + +/** + * @file diagnostic.h + * + * A list of diagnostics generated during parsing. + */ +#ifndef PRISM_DIAGNOSTIC_H +#define PRISM_DIAGNOSTIC_H + +#include "prism/compiler/exported.h" + +#include "prism/ast.h" + +/** + * An opaque pointer to a diagnostic generated during parsing. + */ +typedef struct pm_diagnostic_t pm_diagnostic_t; + +/** + * The levels of errors generated during parsing. + */ +typedef enum { + /** For errors that should raise a syntax error. */ + PM_ERROR_LEVEL_SYNTAX = 0, + + /** For errors that should raise an argument error. */ + PM_ERROR_LEVEL_ARGUMENT = 1, + + /** For errors that should raise a load error. */ + PM_ERROR_LEVEL_LOAD = 2 +} pm_error_level_t; + +/** + * The levels of warnings generated during parsing. + */ +typedef enum { + /** For warnings which should be emitted if $VERBOSE != nil. */ + PM_WARNING_LEVEL_DEFAULT = 0, + + /** For warnings which should be emitted if $VERBOSE == true. */ + PM_WARNING_LEVEL_VERBOSE = 1 +} pm_warning_level_t; + +/** + * Get the type of the given diagnostic. + * + * @param diagnostic The diagnostic to get the type of. + * @returns The type of the given diagnostic. Note that this is a string + * representation of an internal ID, and is not meant to be relied upon as a + * stable identifier for the diagnostic. We do not guarantee that these will + * not change in the future. This is meant to be used for debugging and + * error reporting purposes, and not for programmatic checks. + */ +PRISM_EXPORTED_FUNCTION const char * pm_diagnostic_type(const pm_diagnostic_t *diagnostic); + +/** + * Get the location of the given diagnostic. + * + * @param diagnostic The diagnostic to get the location of. + * @returns The location of the given diagnostic. + */ +PRISM_EXPORTED_FUNCTION pm_location_t pm_diagnostic_location(const pm_diagnostic_t *diagnostic); + +/** + * Get the message of the given diagnostic. + * + * @param diagnostic The diagnostic to get the message of. + * @returns The message of the given diagnostic. + */ +PRISM_EXPORTED_FUNCTION const char * pm_diagnostic_message(const pm_diagnostic_t *diagnostic); + +/** + * Get the error level associated with the given diagnostic. + * + * @param diagnostic The diagnostic to get the error level of. + * @returns The error level of the given diagnostic. If the diagnostic was a + * warning, or is in any way not an error, then the return value is + * undefined and should not be relied upon. + */ +PRISM_EXPORTED_FUNCTION pm_error_level_t pm_diagnostic_error_level(const pm_diagnostic_t *diagnostic); + +/** + * Get the warning level associated with the given diagnostic. + * + * @param diagnostic The diagnostic to get the warning level of. + * @returns The warning level of the given diagnostic. If the diagnostic was an + * error, or is in any way not a warning, then the return value is + * undefined and should not be relied upon. + */ +PRISM_EXPORTED_FUNCTION pm_warning_level_t pm_diagnostic_warning_level(const pm_diagnostic_t *diagnostic); + +/* An opaque pointer to an iterator that can be used to iterate over a set of + * diagnostics associated with a parser. */ +typedef struct pm_diagnostics_iter_t pm_diagnostics_iter_t; + +/** + * Returns the number of diagnostics associated with the diagnostics iterator. + * + * @param iter the iterator to get the number of diagnostics from + * @return the number of diagnostics associated with the diagnostics iterator + * + * \public \memberof pm_diagnostics_iter_t + */ +PRISM_EXPORTED_FUNCTION size_t pm_diagnostics_iter_size(const pm_diagnostics_iter_t *iter); + +/** + * Returns the next diagnostic in the iteration, or NULL if there are no more + * diagnostics. + * + * @param iter the iterator to get the next diagnostic from + * @return the next diagnostic in the iteration, or NULL if there are no more + * diagnostics. + * + * \public \memberof pm_diagnostics_iter_t + */ +PRISM_EXPORTED_FUNCTION const pm_diagnostic_t * pm_diagnostics_iter_next(pm_diagnostics_iter_t *iter); + +/** + * Frees the memory associated with the given diagnostics iterator. + * + * @param iter the iterator to free + * + * \public \memberof pm_diagnostics_iter_t + */ +PRISM_EXPORTED_FUNCTION void pm_diagnostics_iter_free(pm_diagnostics_iter_t *iter); + +#endif diff --git a/include/prism/internal/diagnostic.h b/include/prism/internal/diagnostic.h deleted file mode 100644 index 3e58c2ad2e..0000000000 --- a/include/prism/internal/diagnostic.h +++ /dev/null @@ -1,37 +0,0 @@ -/** - * @file internal/diagnostic.h - * - * A list of diagnostics generated during parsing. - */ -#ifndef PRISM_INTERNAL_DIAGNOSTIC_H -#define PRISM_INTERNAL_DIAGNOSTIC_H - -#include "prism/arena.h" -#include "prism/diagnostic.h" - -/** - * Append a diagnostic to the given list of diagnostics that is using shared - * memory for its message. - * - * @param arena The arena to allocate from. - * @param list The list to append to. - * @param start The source offset of the start of the diagnostic. - * @param length The length of the diagnostic. - * @param diag_id The diagnostic ID. - */ -void pm_diagnostic_list_append(pm_arena_t *arena, pm_list_t *list, uint32_t start, uint32_t length, pm_diagnostic_id_t diag_id); - -/** - * Append a diagnostic to the given list of diagnostics that is using a format - * string for its message. - * - * @param arena The arena to allocate from. - * @param list The list to append to. - * @param start The source offset of the start of the diagnostic. - * @param length The length of the diagnostic. - * @param diag_id The diagnostic ID. - * @param ... The arguments to the format string for the message. - */ -void pm_diagnostic_list_append_format(pm_arena_t *arena, pm_list_t *list, uint32_t start, uint32_t length, pm_diagnostic_id_t diag_id, ...); - -#endif diff --git a/include/prism/parser.h b/include/prism/parser.h index b1726b4975..c00743cbe1 100644 --- a/include/prism/parser.h +++ b/include/prism/parser.h @@ -8,6 +8,7 @@ #include "prism/ast.h" #include "prism/comments.h" +#include "prism/diagnostic.h" #include "prism/line_offset_list.h" #include "prism/list.h" @@ -107,22 +108,6 @@ PRISM_EXPORTED_FUNCTION int32_t pm_parser_start_line(const pm_parser_t *parser); */ PRISM_EXPORTED_FUNCTION const char * pm_parser_encoding_name(const pm_parser_t *parser); -/** - * Returns the errors that are associated with the given parser. - * - * @param parser the parser whose errors we want to get - * @return the errors that are associated with the given parser - */ -PRISM_EXPORTED_FUNCTION const pm_list_t * pm_parser_errors(const pm_parser_t *parser); - -/** - * Returns the warnings that are associated with the given parser. - * - * @param parser the parser whose warnings we want to get - * @return the warnings that are associated with the given parser - */ -PRISM_EXPORTED_FUNCTION const pm_list_t * pm_parser_warnings(const pm_parser_t *parser); - /** * Returns the magic comments that are associated with the given parser. * @@ -191,4 +176,32 @@ PRISM_EXPORTED_FUNCTION int pm_parser_lex_state(const pm_parser_t *parser); */ PRISM_EXPORTED_FUNCTION pm_comments_iter_t * pm_parser_comments(const pm_parser_t *parser); +/** + * Returns an iterator that knows how to iterate over the errors that are + * associated with the given parser. + * + * @param parser the parser whose errors we want to get + * @return the iterator that knows how to iterate over the errors that are + * associated with the given parser. It is the responsibility of the caller + * to free the memory associated with the iterator through + * pm_diagnostics_iter_free. + * + * \public \memberof pm_parser + */ +PRISM_EXPORTED_FUNCTION pm_diagnostics_iter_t * pm_parser_errors(const pm_parser_t *parser); + +/** + * Returns an iterator that knows how to iterate over the warnings that are + * associated with the given parser. + * + * @param parser the parser whose warnings we want to get + * @return the iterator that knows how to iterate over the warnings that are + * associated with the given parser. It is the responsibility of the caller + * to free the memory associated with the iterator through + * pm_diagnostics_iter_free. + * + * \public \memberof pm_parser + */ +PRISM_EXPORTED_FUNCTION pm_diagnostics_iter_t * pm_parser_warnings(const pm_parser_t *parser); + #endif diff --git a/src/parser.c b/src/parser.c index 7447b00f24..6900b228e7 100644 --- a/src/parser.c +++ b/src/parser.c @@ -2,6 +2,7 @@ #include "prism/internal/allocator.h" #include "prism/internal/comments.h" +#include "prism/internal/diagnostic.h" #include "prism/internal/encoding.h" #include @@ -67,22 +68,6 @@ pm_parser_encoding_name(const pm_parser_t *parser) { return parser->encoding->name; } -/** - * Returns the errors that are associated with the given parser. - */ -const pm_list_t * -pm_parser_errors(const pm_parser_t *parser) { - return &parser->error_list; -} - -/** - * Returns the warnings that are associated with the given parser. - */ -const pm_list_t * -pm_parser_warnings(const pm_parser_t *parser) { - return &parser->warning_list; -} - /** * Returns the magic comments that are associated with the given parser. */ @@ -150,3 +135,27 @@ pm_parser_comments(const pm_parser_t *parser) { iter->current = parser->comment_list.head; return iter; } + +/** + * Returns an iterator that knows how to iterate over the errors that are + * associated with the given parser. + */ +pm_diagnostics_iter_t * +pm_parser_errors(const pm_parser_t *parser) { + pm_diagnostics_iter_t *iter = (pm_diagnostics_iter_t *) xmalloc(sizeof(pm_diagnostics_iter_t)); + iter->size = parser->error_list.size; + iter->current = parser->error_list.head; + return iter; +} + +/** + * Returns an iterator that knows how to iterate over the warnings that are + * associated with the given parser. + */ +pm_diagnostics_iter_t * +pm_parser_warnings(const pm_parser_t *parser) { + pm_diagnostics_iter_t *iter = (pm_diagnostics_iter_t *) xmalloc(sizeof(pm_diagnostics_iter_t)); + iter->size = parser->warning_list.size; + iter->current = parser->warning_list.head; + return iter; +} diff --git a/templates/include/prism/diagnostic.h.erb b/templates/include/prism/diagnostic.h.erb deleted file mode 100644 index 89ef49a2d4..0000000000 --- a/templates/include/prism/diagnostic.h.erb +++ /dev/null @@ -1,88 +0,0 @@ -/** - * @file diagnostic.h - * - * A list of diagnostics generated during parsing. - */ -#ifndef PRISM_DIAGNOSTIC_H -#define PRISM_DIAGNOSTIC_H - -#include "prism/compiler/exported.h" - -#include "prism/ast.h" -#include "prism/list.h" - -/** - * The diagnostic IDs of all of the diagnostics, used to communicate the types - * of errors between the parser and the user. - */ -typedef enum { - // These are the error diagnostics. - <%- errors.each do |error| -%> - PM_ERR_<%= error.name %>, - <%- end -%> - - // These are the warning diagnostics. - <%- warnings.each do |warning| -%> - PM_WARN_<%= warning.name %>, - <%- end -%> -} pm_diagnostic_id_t; - -/** - * This struct represents a diagnostic generated during parsing. - * - * @extends pm_list_node_t - */ -typedef struct { - /** The embedded base node. */ - pm_list_node_t node; - - /** The location of the diagnostic in the source. */ - pm_location_t location; - - /** The ID of the diagnostic. */ - pm_diagnostic_id_t diag_id; - - /** The message associated with the diagnostic. */ - const char *message; - - /** - * The level of the diagnostic, see `pm_error_level_t` and - * `pm_warning_level_t` for possible values. - */ - uint8_t level; -} pm_diagnostic_t; - -/** - * The levels of errors generated during parsing. - */ -typedef enum { - /** For errors that should raise a syntax error. */ - PM_ERROR_LEVEL_SYNTAX = 0, - - /** For errors that should raise an argument error. */ - PM_ERROR_LEVEL_ARGUMENT = 1, - - /** For errors that should raise a load error. */ - PM_ERROR_LEVEL_LOAD = 2 -} pm_error_level_t; - -/** - * The levels of warnings generated during parsing. - */ -typedef enum { - /** For warnings which should be emitted if $VERBOSE != nil. */ - PM_WARNING_LEVEL_DEFAULT = 0, - - /** For warnings which should be emitted if $VERBOSE == true. */ - PM_WARNING_LEVEL_VERBOSE = 1 -} pm_warning_level_t; - -/** - * Get the human-readable name of the given diagnostic ID. - * - * @param diag_id The diagnostic ID to get the name of. - * @returns The human-readable name of the given diagnostic ID. - */ -PRISM_EXPORTED_FUNCTION const char * pm_diagnostic_id_str(pm_diagnostic_id_t diag_id); - -#endif diff --git a/templates/include/prism/internal/diagnostic.h.erb b/templates/include/prism/internal/diagnostic.h.erb new file mode 100644 index 0000000000..f15e3936a1 --- /dev/null +++ b/templates/include/prism/internal/diagnostic.h.erb @@ -0,0 +1,89 @@ +/** + * @file internal/diagnostic.h + * + * A list of diagnostics generated during parsing. + */ +#ifndef PRISM_INTERNAL_DIAGNOSTIC_H +#define PRISM_INTERNAL_DIAGNOSTIC_H + +#include "prism/arena.h" +#include "prism/diagnostic.h" +#include "prism/list.h" + +/** + * The diagnostic IDs of all of the diagnostics, used to communicate the types + * of errors between the parser and the user. + */ +typedef enum { + /* These are the error diagnostics. */ + <%- errors.each do |error| -%> + PM_ERR_<%= error.name %>, + <%- end -%> + + /* These are the warning diagnostics. */ + <%- warnings.each do |warning| -%> + PM_WARN_<%= warning.name %>, + <%- end -%> +} pm_diagnostic_id_t; + +/** + * This struct represents a diagnostic generated during parsing. + */ +struct pm_diagnostic_t { + /** The embedded base node. */ + pm_list_node_t node; + + /** The location of the diagnostic in the source. */ + pm_location_t location; + + /** The ID of the diagnostic. */ + pm_diagnostic_id_t diag_id; + + /** The message associated with the diagnostic. */ + const char *message; + + /** + * The level of the diagnostic, see `pm_error_level_t` and + * `pm_warning_level_t` for possible values. + */ + uint8_t level; +}; + +/** + * Append a diagnostic to the given list of diagnostics that is using shared + * memory for its message. + * + * @param arena The arena to allocate from. + * @param list The list to append to. + * @param start The source offset of the start of the diagnostic. + * @param length The length of the diagnostic. + * @param diag_id The diagnostic ID. + */ +void pm_diagnostic_list_append(pm_arena_t *arena, pm_list_t *list, uint32_t start, uint32_t length, pm_diagnostic_id_t diag_id); + +/** + * Append a diagnostic to the given list of diagnostics that is using a format + * string for its message. + * + * @param arena The arena to allocate from. + * @param list The list to append to. + * @param start The source offset of the start of the diagnostic. + * @param length The length of the diagnostic. + * @param diag_id The diagnostic ID. + * @param ... The arguments to the format string for the message. + */ +void pm_diagnostic_list_append_format(pm_arena_t *arena, pm_list_t *list, uint32_t start, uint32_t length, pm_diagnostic_id_t diag_id, ...); + +/** + * A struct used as an opaque pointer for the client to iterate through the + * diagnostics found while parsing. + */ +struct pm_diagnostics_iter_t { + /** The number of diagnostics in the list. */ + size_t size; + + /** The current node in the list. */ + const pm_list_node_t *current; +}; + +#endif diff --git a/templates/src/diagnostic.c.erb b/templates/src/diagnostic.c.erb index 996599b0e5..a4aba2e586 100644 --- a/templates/src/diagnostic.c.erb +++ b/templates/src/diagnostic.c.erb @@ -1,6 +1,8 @@ -#include "prism/diagnostic.h" +#include "prism/internal/diagnostic.h" #include "prism/compiler/inline.h" + +#include "prism/internal/allocator.h" #include "prism/internal/arena.h" #include "prism/internal/list.h" @@ -8,6 +10,7 @@ #include #include #include +#include #define PM_DIAGNOSTIC_ID_MAX <%= errors.length + warnings.length %> @@ -425,8 +428,8 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_MAX] = { /** * Get the human-readable name of the given diagnostic ID. */ -const char * -pm_diagnostic_id_str(pm_diagnostic_id_t diag_id) { +static const char * +pm_diagnostic_id_name(pm_diagnostic_id_t diag_id) { switch (diag_id) { <%- errors.each do |error| -%> case PM_ERR_<%= error.name %>: return "<%= error.name.downcase %>"; @@ -441,7 +444,7 @@ pm_diagnostic_id_str(pm_diagnostic_id_t diag_id) { } static PRISM_INLINE const char * -pm_diagnostic_message(pm_diagnostic_id_t diag_id) { +pm_diagnostic_id_message(pm_diagnostic_id_t diag_id) { assert(diag_id < PM_DIAGNOSTIC_ID_MAX); const char *message = diagnostic_messages[diag_id].message; @@ -451,12 +454,52 @@ pm_diagnostic_message(pm_diagnostic_id_t diag_id) { } static PRISM_INLINE uint8_t -pm_diagnostic_level(pm_diagnostic_id_t diag_id) { +pm_diagnostic_id_level(pm_diagnostic_id_t diag_id) { assert(diag_id < PM_DIAGNOSTIC_ID_MAX); return (uint8_t) diagnostic_messages[diag_id].level; } +/** + * Get the type of the given diagnostic. + */ +const char * +pm_diagnostic_type(const pm_diagnostic_t *diagnostic) { + return pm_diagnostic_id_name(diagnostic->diag_id); +} + +/** + * Get the location of the given diagnostic. + */ +pm_location_t +pm_diagnostic_location(const pm_diagnostic_t *diagnostic) { + return diagnostic->location; +} + +/** + * Get the message of the given diagnostic. + */ +const char * +pm_diagnostic_message(const pm_diagnostic_t *diagnostic) { + return diagnostic->message; +} + +/** + * Get the error level associated with the given diagnostic. + */ +pm_error_level_t +pm_diagnostic_error_level(const pm_diagnostic_t *diagnostic) { + return (pm_error_level_t) pm_diagnostic_id_level(diagnostic->diag_id); +} + +/** + * Get the warning level associated with the given diagnostic. + */ +pm_warning_level_t +pm_diagnostic_warning_level(const pm_diagnostic_t *diagnostic) { + return (pm_warning_level_t) pm_diagnostic_id_level(diagnostic->diag_id); +} + /** * Append an error to the given list of diagnostic. */ @@ -467,8 +510,8 @@ pm_diagnostic_list_append(pm_arena_t *arena, pm_list_t *list, uint32_t start, ui *diagnostic = (pm_diagnostic_t) { .location = { .start = start, .length = length }, .diag_id = diag_id, - .message = pm_diagnostic_message(diag_id), - .level = pm_diagnostic_level(diag_id) + .message = pm_diagnostic_id_message(diag_id), + .level = pm_diagnostic_id_level(diag_id) }; pm_list_append(list, (pm_list_node_t *) diagnostic); @@ -483,7 +526,7 @@ pm_diagnostic_list_append_format(pm_arena_t *arena, pm_list_t *list, uint32_t st va_list arguments; va_start(arguments, diag_id); - const char *format = pm_diagnostic_message(diag_id); + const char *format = pm_diagnostic_id_message(diag_id); int result = vsnprintf(NULL, 0, format, arguments); va_end(arguments); @@ -504,9 +547,36 @@ pm_diagnostic_list_append_format(pm_arena_t *arena, pm_list_t *list, uint32_t st .location = { .start = start, .length = length }, .diag_id = diag_id, .message = message, - .level = pm_diagnostic_level(diag_id) + .level = pm_diagnostic_id_level(diag_id) }; pm_list_append(list, (pm_list_node_t *) diagnostic); } +/** + * Returns the number of diagnostics associated with the diagnostics iterator. + */ +size_t +pm_diagnostics_iter_size(const pm_diagnostics_iter_t *iter) { + return iter->size; +} + +/** + * Returns the next diagnostic in the iteration, or NULL if there are no more + * diagnostics. + */ +const pm_diagnostic_t * +pm_diagnostics_iter_next(pm_diagnostics_iter_t *iter) { + if (iter->current == NULL) return NULL; + const pm_diagnostic_t *diagnostic = (const pm_diagnostic_t *) iter->current; + iter->current = iter->current->next; + return diagnostic; +} + +/** + * Frees the memory associated with the given diagnostics iterator. + */ +void +pm_diagnostics_iter_free(pm_diagnostics_iter_t *iter) { + xfree(iter); +} diff --git a/templates/src/serialize.c.erb b/templates/src/serialize.c.erb index 7edb297501..782e09219d 100644 --- a/templates/src/serialize.c.erb +++ b/templates/src/serialize.c.erb @@ -9,6 +9,7 @@ #include "prism/internal/buffer.h" #include "prism/internal/comments.h" +#include "prism/internal/diagnostic.h" #include "prism/internal/encoding.h" #include "prism/internal/list.h" #include "prism/internal/options.h" @@ -16,7 +17,6 @@ #include "prism.h" #include "prism/ast.h" -#include "prism/diagnostic.h" #include "prism/line_offset_list.h" #include diff --git a/templates/template.rb b/templates/template.rb index 70fa17c83d..b3ccf373d3 100755 --- a/templates/template.rb +++ b/templates/template.rb @@ -684,8 +684,8 @@ def locals TEMPLATES = [ "ext/prism/api_node.c", "include/prism/ast.h", - "include/prism/diagnostic.h", "include/prism/node_new.h", + "include/prism/internal/diagnostic.h", "javascript/src/deserialize.js", "javascript/src/nodes.js", "javascript/src/visitor.js", From f2e8648522b099e1eefbb59e639b9f5885424378 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 18 Mar 2026 11:31:44 -0400 Subject: [PATCH 060/100] Move magic comments entirely internal --- ext/prism/extension.c | 17 ++++--- include/prism/internal/magic_comments.h | 40 +++++++++++++++ include/prism/magic_comments.h | 67 +++++++++++++++++++++++++ include/prism/parser.h | 40 ++++++--------- src/magic_comments.c | 51 +++++++++++++++++++ src/parser.c | 21 +++++--- src/prism.c | 1 + templates/src/serialize.c.erb | 1 + 8 files changed, 199 insertions(+), 39 deletions(-) create mode 100644 include/prism/internal/magic_comments.h create mode 100644 include/prism/magic_comments.h create mode 100644 src/magic_comments.c diff --git a/ext/prism/extension.c b/ext/prism/extension.c index 1980e44d89..af67bee388 100644 --- a/ext/prism/extension.c +++ b/ext/prism/extension.c @@ -513,8 +513,12 @@ parser_comments(const pm_parser_t *parser, VALUE source, bool freeze) { */ static inline VALUE parser_magic_comment(VALUE source, bool freeze, const pm_magic_comment_t *magic_comment) { - VALUE key_loc = parser_location(source, freeze, magic_comment->key.start, magic_comment->key.length); - VALUE value_loc = parser_location(source, freeze, magic_comment->value.start, magic_comment->value.length); + pm_location_t key = pm_magic_comment_key(magic_comment); + pm_location_t value = pm_magic_comment_value(magic_comment); + + VALUE key_loc = parser_location(source, freeze, key.start, key.length); + VALUE value_loc = parser_location(source, freeze, value.start, value.length); + VALUE argv[] = { key_loc, value_loc }; return rb_class_new_instance_freeze(2, argv, rb_cPrismMagicComment, freeze); } @@ -524,18 +528,19 @@ parser_magic_comment(VALUE source, bool freeze, const pm_magic_comment_t *magic_ */ static VALUE parser_magic_comments(const pm_parser_t *parser, VALUE source, bool freeze) { - const pm_list_t *magic_comments_list = pm_parser_magic_comments(parser); - VALUE magic_comments = rb_ary_new_capa(magic_comments_list->size); + pm_magic_comments_iter_t *iter = pm_parser_magic_comments(parser); + VALUE magic_comments = rb_ary_new_capa(pm_magic_comments_iter_size(iter)); for ( - const pm_magic_comment_t *magic_comment = (const pm_magic_comment_t *) magic_comments_list->head; + const pm_magic_comment_t *magic_comment = pm_magic_comments_iter_next(iter); magic_comment != NULL; - magic_comment = (const pm_magic_comment_t *) magic_comment->node.next + magic_comment = pm_magic_comments_iter_next(iter) ) { VALUE value = parser_magic_comment(source, freeze, magic_comment); rb_ary_push(magic_comments, value); } + pm_magic_comments_iter_free(iter); if (freeze) rb_obj_freeze(magic_comments); return magic_comments; } diff --git a/include/prism/internal/magic_comments.h b/include/prism/internal/magic_comments.h new file mode 100644 index 0000000000..499f8e5e09 --- /dev/null +++ b/include/prism/internal/magic_comments.h @@ -0,0 +1,40 @@ +/** + * @file internal/magic_comments.h + */ +#ifndef PRISM_INTERNAL_MAGIC_COMMENTS_H +#define PRISM_INTERNAL_MAGIC_COMMENTS_H + +#include "prism/magic_comments.h" + +#include "prism/list.h" + +/** + * This is a node in the linked list of magic comments that we've found while + * parsing. + * + * @extends pm_list_node_t + */ +struct pm_magic_comment_t { + /** The embedded base node. */ + pm_list_node_t node; + + /** The key of the magic comment. */ + pm_location_t key; + + /** The value of the magic comment. */ + pm_location_t value; +}; + +/** + * A struct used as an opaque pointer for the client to iterate through the + * magic comments found while parsing. + */ +struct pm_magic_comments_iter_t { + /** The number of magic comments in the list. */ + size_t size; + + /** The current node in the list. */ + const pm_list_node_t *current; +}; + +#endif diff --git a/include/prism/magic_comments.h b/include/prism/magic_comments.h new file mode 100644 index 0000000000..10f9cfa32d --- /dev/null +++ b/include/prism/magic_comments.h @@ -0,0 +1,67 @@ +/** + * @file magic_comments.h + */ +#ifndef PRISM_MAGIC_COMMENTS_H +#define PRISM_MAGIC_COMMENTS_H + +#include "prism/compiler/exported.h" + +#include "prism/ast.h" + +#include + +/** An opaque pointer to a magic comment found while parsing. */ +typedef struct pm_magic_comment_t pm_magic_comment_t; + +/** + * Returns the location of the key associated with the given magic comment. + * + * @param comment the magic comment whose key location we want to get + * @return the location of the key associated with the given magic comment + */ +PRISM_EXPORTED_FUNCTION pm_location_t pm_magic_comment_key(const pm_magic_comment_t *comment); + +/** + * Returns the location of the value associated with the given magic comment. + * + * @param comment the magic comment whose value location we want to get + * @return the location of the value associated with the given magic comment + */ +PRISM_EXPORTED_FUNCTION pm_location_t pm_magic_comment_value(const pm_magic_comment_t *comment); + +/* An opaque pointer to an iterator that can be used to iterate over the + * magic comments associated with a parser. */ +typedef struct pm_magic_comments_iter_t pm_magic_comments_iter_t; + +/** + * Returns the number of magic comments associated with the magic comments iterator. + * + * @param iter the iterator to get the number of magic comments from + * @return the number of magic comments associated with the magic comments iterator + * + * \public \memberof pm_magic_comments_iter_t + */ +PRISM_EXPORTED_FUNCTION size_t pm_magic_comments_iter_size(const pm_magic_comments_iter_t *iter); + +/** + * Returns the next magic comment in the iteration, or NULL if there are no more + * magic comments. + * + * @param iter the iterator to get the next magic comment from + * @return the next magic comment in the iteration, or NULL if there are no more + * magic comments. + * + * \public \memberof pm_magic_comments_iter_t + */ +PRISM_EXPORTED_FUNCTION const pm_magic_comment_t * pm_magic_comments_iter_next(pm_magic_comments_iter_t *iter); + +/** + * Frees the memory associated with the given magic comments iterator. + * + * @param iter the iterator to free + * + * \public \memberof pm_magic_comments_iter_t + */ +PRISM_EXPORTED_FUNCTION void pm_magic_comments_iter_free(pm_magic_comments_iter_t *iter); + +#endif diff --git a/include/prism/parser.h b/include/prism/parser.h index c00743cbe1..bc1191797d 100644 --- a/include/prism/parser.h +++ b/include/prism/parser.h @@ -11,29 +11,13 @@ #include "prism/diagnostic.h" #include "prism/line_offset_list.h" #include "prism/list.h" +#include "prism/magic_comments.h" /** * The parser used to parse Ruby source. */ typedef struct pm_parser_t pm_parser_t; -/** - * This is a node in the linked list of magic comments that we've found while - * parsing. - * - * @extends pm_list_node_t - */ -typedef struct { - /** The embedded base node. */ - pm_list_node_t node; - - /** The key of the magic comment. */ - pm_location_t key; - - /** The value of the magic comment. */ - pm_location_t value; -} pm_magic_comment_t; - /** * When the encoding that is being used to parse the source is changed by prism, * we provide the ability here to call out to a user-defined function. @@ -108,14 +92,6 @@ PRISM_EXPORTED_FUNCTION int32_t pm_parser_start_line(const pm_parser_t *parser); */ PRISM_EXPORTED_FUNCTION const char * pm_parser_encoding_name(const pm_parser_t *parser); -/** - * Returns the magic comments that are associated with the given parser. - * - * @param parser the parser whose magic comments we want to get - * @return the magic comments that are associated with the given parser - */ -PRISM_EXPORTED_FUNCTION const pm_list_t * pm_parser_magic_comments(const pm_parser_t *parser); - /** * Returns the line offsets that are associated with the given parser. * @@ -176,6 +152,20 @@ PRISM_EXPORTED_FUNCTION int pm_parser_lex_state(const pm_parser_t *parser); */ PRISM_EXPORTED_FUNCTION pm_comments_iter_t * pm_parser_comments(const pm_parser_t *parser); +/** + * Returns an iterator that knows how to iterate over the magic comments that + * are associated with the given parser. + * + * @param parser the parser whose magic comments we want to get + * @return the iterator that knows how to iterate over the magic comments that are + * associated with the given parser. It is the responsibility of the caller + * to free the memory associated with the iterator through + * pm_magic_comments_iter_free. + * + * \public \memberof pm_parser + */ +PRISM_EXPORTED_FUNCTION pm_magic_comments_iter_t * pm_parser_magic_comments(const pm_parser_t *parser); + /** * Returns an iterator that knows how to iterate over the errors that are * associated with the given parser. diff --git a/src/magic_comments.c b/src/magic_comments.c new file mode 100644 index 0000000000..fc570868a6 --- /dev/null +++ b/src/magic_comments.c @@ -0,0 +1,51 @@ +#include "prism/internal/magic_comments.h" + +#include "prism/internal/allocator.h" +#include "prism/internal/parser.h" + +#include + +/** + * Returns the location associated with the given magic comment key. + */ +pm_location_t +pm_magic_comment_key(const pm_magic_comment_t *magic_comment) { + return magic_comment->key; +} + +/** + * Returns the location associated with the given magic comment value. + */ +pm_location_t +pm_magic_comment_value(const pm_magic_comment_t *magic_comment) { + return magic_comment->value; +} + +/** + * Returns the number of magic comments associated with the magic comment + * iterator. + */ +size_t +pm_magic_comments_iter_size(const pm_magic_comments_iter_t *iter) { + return iter->size; +} + +/** + * Returns the next magic comment in the iteration, or NULL if there are no more + * magic comments. + */ +const pm_magic_comment_t * +pm_magic_comments_iter_next(pm_magic_comments_iter_t *iter) { + if (iter->current == NULL) return NULL; + const pm_magic_comment_t *magic_comment = (const pm_magic_comment_t *) iter->current; + iter->current = iter->current->next; + return magic_comment; +} + +/** + * Frees the memory associated with the given magic comments iterator. + */ +void +pm_magic_comments_iter_free(pm_magic_comments_iter_t *iter) { + xfree(iter); +} diff --git a/src/parser.c b/src/parser.c index 6900b228e7..84d8dff992 100644 --- a/src/parser.c +++ b/src/parser.c @@ -4,6 +4,7 @@ #include "prism/internal/comments.h" #include "prism/internal/diagnostic.h" #include "prism/internal/encoding.h" +#include "prism/internal/magic_comments.h" #include @@ -68,14 +69,6 @@ pm_parser_encoding_name(const pm_parser_t *parser) { return parser->encoding->name; } -/** - * Returns the magic comments that are associated with the given parser. - */ -const pm_list_t * -pm_parser_magic_comments(const pm_parser_t *parser) { - return &parser->magic_comment_list; -} - /** * Returns the line offsets that are associated with the given parser. * @@ -136,6 +129,18 @@ pm_parser_comments(const pm_parser_t *parser) { return iter; } +/** + * Returns an iterator that knows how to iterate over the magic comments that + * are associated with the given parser. + */ +pm_magic_comments_iter_t * +pm_parser_magic_comments(const pm_parser_t *parser) { + pm_magic_comments_iter_t *iter = (pm_magic_comments_iter_t *) xmalloc(sizeof(pm_magic_comments_iter_t)); + iter->size = parser->magic_comment_list.size; + iter->current = parser->magic_comment_list.head; + return iter; +} + /** * Returns an iterator that knows how to iterate over the errors that are * associated with the given parser. diff --git a/src/prism.c b/src/prism.c index f948473db8..47cfdf6ea4 100644 --- a/src/prism.c +++ b/src/prism.c @@ -17,6 +17,7 @@ #include "prism/internal/isinf.h" #include "prism/internal/line_offset_list.h" #include "prism/internal/list.h" +#include "prism/internal/magic_comments.h" #include "prism/internal/memchr.h" #include "prism/internal/node.h" #include "prism/internal/options.h" diff --git a/templates/src/serialize.c.erb b/templates/src/serialize.c.erb index 782e09219d..2d7bbfcb93 100644 --- a/templates/src/serialize.c.erb +++ b/templates/src/serialize.c.erb @@ -12,6 +12,7 @@ #include "prism/internal/diagnostic.h" #include "prism/internal/encoding.h" #include "prism/internal/list.h" +#include "prism/internal/magic_comments.h" #include "prism/internal/options.h" #include "prism/internal/parser.h" From bbc2023d0bfa001cc75e6a537dba954bcb35bcf4 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 18 Mar 2026 11:37:42 -0400 Subject: [PATCH 061/100] Fix up build --- include/prism/internal/comments.h | 2 +- include/prism/internal/list.h | 52 +++++++++++++++- include/prism/internal/magic_comments.h | 2 +- include/prism/internal/parser.h | 2 +- include/prism/internal/serialize.h | 2 +- include/prism/list.h | 61 ------------------- include/prism/parser.h | 1 - prism.gemspec | 9 ++- src/prism.c | 1 + .../include/prism/internal/diagnostic.h.erb | 3 +- templates/src/serialize.c.erb | 6 +- 11 files changed, 69 insertions(+), 72 deletions(-) delete mode 100644 include/prism/list.h diff --git a/include/prism/internal/comments.h b/include/prism/internal/comments.h index c7adf52464..8b1009daf6 100644 --- a/include/prism/internal/comments.h +++ b/include/prism/internal/comments.h @@ -6,7 +6,7 @@ #include "prism/comments.h" -#include "prism/list.h" +#include "prism/internal/list.h" /** * A comment found while parsing. diff --git a/include/prism/internal/list.h b/include/prism/internal/list.h index f770b1dd2d..9a73e6bd3f 100644 --- a/include/prism/internal/list.h +++ b/include/prism/internal/list.h @@ -6,7 +6,57 @@ #ifndef PRISM_INTERNAL_LIST_H #define PRISM_INTERNAL_LIST_H -#include "prism/list.h" +#include + +/** + * This struct represents an abstract linked list that provides common + * functionality. It is meant to be used any time a linked list is necessary to + * store data. + * + * The linked list itself operates off a set of pointers. Because the pointers + * are not necessarily sequential, they can be of any size. We use this fact to + * allow the consumer of this linked list to extend the node struct to include + * any data they want. This is done by using the pm_list_node_t as the first + * member of the struct. + * + * For example, if we want to store a list of integers, we can do the following: + * + * ```c + * typedef struct { + * pm_list_node_t node; + * int value; + * } pm_int_node_t; + * + * pm_list_t list = { 0 }; + * pm_int_node_t *node = xmalloc(sizeof(pm_int_node_t)); + * node->value = 5; + * + * pm_list_append(&list, &node->node); + * ``` + * + * The pm_list_t struct is used to represent the overall linked list. It + * contains a pointer to the head and tail of the list. This allows for easy + * iteration and appending of new nodes. + */ +typedef struct pm_list_node { + /** A pointer to the next node in the list. */ + struct pm_list_node *next; +} pm_list_node_t; + +/** + * This represents the overall linked list. It keeps a pointer to the head and + * tail so that iteration is easy and pushing new nodes is easy. + */ +typedef struct { + /** The size of the list. */ + size_t size; + + /** A pointer to the head of the list. */ + pm_list_node_t *head; + + /** A pointer to the tail of the list. */ + pm_list_node_t *tail; +} pm_list_t; /** * Returns the size of the list. diff --git a/include/prism/internal/magic_comments.h b/include/prism/internal/magic_comments.h index 499f8e5e09..eb4154928d 100644 --- a/include/prism/internal/magic_comments.h +++ b/include/prism/internal/magic_comments.h @@ -6,7 +6,7 @@ #include "prism/magic_comments.h" -#include "prism/list.h" +#include "prism/internal/list.h" /** * This is a node in the linked list of magic comments that we've found while diff --git a/include/prism/internal/parser.h b/include/prism/internal/parser.h index f8baf49cdb..f6618f67ed 100644 --- a/include/prism/internal/parser.h +++ b/include/prism/internal/parser.h @@ -9,13 +9,13 @@ #include "prism/compiler/accel.h" #include "prism/internal/encoding.h" +#include "prism/internal/list.h" #include "prism/internal/options.h" #include "prism/internal/static_literals.h" #include "prism/arena.h" #include "prism/ast.h" #include "prism/line_offset_list.h" -#include "prism/list.h" #include "prism/parser.h" #include diff --git a/include/prism/internal/serialize.h b/include/prism/internal/serialize.h index a67ebd1a18..c691a1fed9 100644 --- a/include/prism/internal/serialize.h +++ b/include/prism/internal/serialize.h @@ -5,11 +5,11 @@ #define PRISM_INTERNAL_SERIALIZE_H #include "prism/internal/encoding.h" +#include "prism/internal/list.h" #include "prism/ast.h" #include "prism/buffer.h" #include "prism/excludes.h" -#include "prism/list.h" #include "prism/parser.h" /* We optionally support serializing to a binary string. For systems that do not diff --git a/include/prism/list.h b/include/prism/list.h deleted file mode 100644 index c9fb18278c..0000000000 --- a/include/prism/list.h +++ /dev/null @@ -1,61 +0,0 @@ -/** - * @file list.h - * - * An abstract linked list. - */ -#ifndef PRISM_LIST_H -#define PRISM_LIST_H - -#include - -/** - * This struct represents an abstract linked list that provides common - * functionality. It is meant to be used any time a linked list is necessary to - * store data. - * - * The linked list itself operates off a set of pointers. Because the pointers - * are not necessarily sequential, they can be of any size. We use this fact to - * allow the consumer of this linked list to extend the node struct to include - * any data they want. This is done by using the pm_list_node_t as the first - * member of the struct. - * - * For example, if we want to store a list of integers, we can do the following: - * - * ```c - * typedef struct { - * pm_list_node_t node; - * int value; - * } pm_int_node_t; - * - * pm_list_t list = { 0 }; - * pm_int_node_t *node = xmalloc(sizeof(pm_int_node_t)); - * node->value = 5; - * - * pm_list_append(&list, &node->node); - * ``` - * - * The pm_list_t struct is used to represent the overall linked list. It - * contains a pointer to the head and tail of the list. This allows for easy - * iteration and appending of new nodes. - */ -typedef struct pm_list_node { - /** A pointer to the next node in the list. */ - struct pm_list_node *next; -} pm_list_node_t; - -/** - * This represents the overall linked list. It keeps a pointer to the head and - * tail so that iteration is easy and pushing new nodes is easy. - */ -typedef struct { - /** The size of the list. */ - size_t size; - - /** A pointer to the head of the list. */ - pm_list_node_t *head; - - /** A pointer to the tail of the list. */ - pm_list_node_t *tail; -} pm_list_t; - -#endif diff --git a/include/prism/parser.h b/include/prism/parser.h index bc1191797d..9e94ad1631 100644 --- a/include/prism/parser.h +++ b/include/prism/parser.h @@ -10,7 +10,6 @@ #include "prism/comments.h" #include "prism/diagnostic.h" #include "prism/line_offset_list.h" -#include "prism/list.h" #include "prism/magic_comments.h" /** diff --git a/prism.gemspec b/prism.gemspec index 70e5aefe1b..ca21b792cc 100644 --- a/prism.gemspec +++ b/prism.gemspec @@ -62,6 +62,7 @@ Gem::Specification.new do |spec| "include/prism/internal/bit.h", "include/prism/internal/buffer.h", "include/prism/internal/char.h", + "include/prism/internal/comments.h", "include/prism/internal/constant_pool.h", "include/prism/internal/diagnostic.h", "include/prism/internal/encoding.h", @@ -69,10 +70,13 @@ Gem::Specification.new do |spec| "include/prism/internal/isinf.h", "include/prism/internal/line_offset_list.h", "include/prism/internal/list.h", + "include/prism/internal/magic_comments.h", "include/prism/internal/memchr.h", "include/prism/internal/node.h", "include/prism/internal/options.h", + "include/prism/internal/parser.h", "include/prism/internal/regexp.h", + "include/prism/internal/serialize.h", "include/prism/internal/static_literals.h", "include/prism/internal/strncasecmp.h", "include/prism/internal/strings.h", @@ -80,12 +84,13 @@ Gem::Specification.new do |spec| "include/prism/arena.h", "include/prism/ast.h", "include/prism/buffer.h", + "include/prism/comments.h", "include/prism/constant_pool.h", "include/prism/diagnostic.h", "include/prism/excludes.h", "include/prism/integer.h", "include/prism/line_offset_list.h", - "include/prism/list.h", + "include/prism/magic_comments.h", "include/prism/node.h", "include/prism/node_new.h", "include/prism/options.h", @@ -185,12 +190,14 @@ Gem::Specification.new do |spec| "src/arena.c", "src/buffer.c", "src/char.c", + "src/comments.c", "src/constant_pool.c", "src/diagnostic.c", "src/encoding.c", "src/integer.c", "src/line_offset_list.c", "src/list.c", + "src/magic_comments.c", "src/memchr.c", "src/node.c", "src/options.c", diff --git a/src/prism.c b/src/prism.c index 47cfdf6ea4..06d3dc42b9 100644 --- a/src/prism.c +++ b/src/prism.c @@ -29,6 +29,7 @@ #include "prism/internal/strncasecmp.h" #include "prism/internal/strpbrk.h" +#include "prism/excludes.h" #include "prism/node_new.h" #include diff --git a/templates/include/prism/internal/diagnostic.h.erb b/templates/include/prism/internal/diagnostic.h.erb index f15e3936a1..ca294e3972 100644 --- a/templates/include/prism/internal/diagnostic.h.erb +++ b/templates/include/prism/internal/diagnostic.h.erb @@ -6,9 +6,10 @@ #ifndef PRISM_INTERNAL_DIAGNOSTIC_H #define PRISM_INTERNAL_DIAGNOSTIC_H +#include "prism/internal/list.h" + #include "prism/arena.h" #include "prism/diagnostic.h" -#include "prism/list.h" /** * The diagnostic IDs of all of the diagnostics, used to communicate the types diff --git a/templates/src/serialize.c.erb b/templates/src/serialize.c.erb index 2d7bbfcb93..405332e339 100644 --- a/templates/src/serialize.c.erb +++ b/templates/src/serialize.c.erb @@ -1,8 +1,8 @@ #include "prism/excludes.h" -// We optionally support serializing to a binary string. For systems that don't -// want or need this functionality, it can be turned off with the -// PRISM_EXCLUDE_SERIALIZATION define. +/* We optionally support serializing to a binary string. For systems that do not + * want or need this functionality, it can be turned off with the + * PRISM_EXCLUDE_SERIALIZATION define. */ #ifndef PRISM_EXCLUDE_SERIALIZATION #include "prism/compiler/inline.h" From e8606f7022b366b1c093abb5193bfd6c2a7fa017 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 18 Mar 2026 11:39:17 -0400 Subject: [PATCH 062/100] Do not define a shim if the define is set --- include/prism/prettyprint.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/include/prism/prettyprint.h b/include/prism/prettyprint.h index b59cfe1460..0c81618e7f 100644 --- a/include/prism/prettyprint.h +++ b/include/prism/prettyprint.h @@ -8,11 +8,7 @@ #include "prism/excludes.h" -#ifdef PRISM_EXCLUDE_PRETTYPRINT - -#define pm_prettyprint(output_buffer_, parser_, node_) - -#else +#ifndef PRISM_EXCLUDE_PRETTYPRINT #include "prism/compiler/exported.h" From e0d17eb9e787036c91690962332f35a08b2d3a3e Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 18 Mar 2026 12:02:52 -0400 Subject: [PATCH 063/100] Remove iterators, just use callbacks --- ext/prism/extension.c | 203 ++++++++++-------- include/prism/comments.h | 35 --- include/prism/internal/comments.h | 12 -- include/prism/internal/magic_comments.h | 12 -- include/prism/parser.h | 116 ++++++---- src/comments.c | 28 --- src/magic_comments.c | 29 --- src/parser.c | 104 ++++++--- .../include/prism/internal/diagnostic.h.erb | 12 -- 9 files changed, 264 insertions(+), 287 deletions(-) diff --git a/ext/prism/extension.c b/ext/prism/extension.c index af67bee388..27cc3839ed 100644 --- a/ext/prism/extension.c +++ b/ext/prism/extension.c @@ -485,26 +485,30 @@ parser_comment(VALUE source, bool freeze, const pm_comment_t *comment) { return rb_class_new_instance_freeze(1, argv, type, freeze); } +typedef struct { + VALUE comments; + VALUE source; + bool freeze; +} parser_comments_each_data_t; + +static void +parser_comments_each(const pm_comment_t *comment, void *data) { + parser_comments_each_data_t *each_data = (parser_comments_each_data_t *) data; + VALUE value = parser_comment(each_data->source, each_data->freeze, comment); + rb_ary_push(each_data->comments, value); +} + /** * Extract the comments out of the parser into an array. */ static VALUE parser_comments(const pm_parser_t *parser, VALUE source, bool freeze) { - pm_comments_iter_t *comments_iter = pm_parser_comments(parser); - VALUE comments = rb_ary_new_capa(pm_comments_iter_size(comments_iter)); - - for ( - const pm_comment_t *comment = pm_comments_iter_next(comments_iter); - comment != NULL; - comment = pm_comments_iter_next(comments_iter) - ) { - VALUE value = parser_comment(source, freeze, comment); - rb_ary_push(comments, value); - } + VALUE comments = rb_ary_new_capa(pm_parser_comments_size(parser)); - pm_comments_iter_free(comments_iter); - if (freeze) rb_obj_freeze(comments); + parser_comments_each_data_t each_data = { comments, source, freeze }; + pm_parser_comments_each(parser, parser_comments_each, &each_data); + if (freeze) rb_obj_freeze(comments); return comments; } @@ -523,24 +527,29 @@ parser_magic_comment(VALUE source, bool freeze, const pm_magic_comment_t *magic_ return rb_class_new_instance_freeze(2, argv, rb_cPrismMagicComment, freeze); } +typedef struct { + VALUE magic_comments; + VALUE source; + bool freeze; +} parser_magic_comments_each_data_t; + +static void +parser_magic_comments_each(const pm_magic_comment_t *magic_comment, void *data) { + parser_magic_comments_each_data_t *each_data = (parser_magic_comments_each_data_t *) data; + VALUE value = parser_magic_comment(each_data->source, each_data->freeze, magic_comment); + rb_ary_push(each_data->magic_comments, value); +} + /** * Extract the magic comments out of the parser into an array. */ static VALUE parser_magic_comments(const pm_parser_t *parser, VALUE source, bool freeze) { - pm_magic_comments_iter_t *iter = pm_parser_magic_comments(parser); - VALUE magic_comments = rb_ary_new_capa(pm_magic_comments_iter_size(iter)); - - for ( - const pm_magic_comment_t *magic_comment = pm_magic_comments_iter_next(iter); - magic_comment != NULL; - magic_comment = pm_magic_comments_iter_next(iter) - ) { - VALUE value = parser_magic_comment(source, freeze, magic_comment); - rb_ary_push(magic_comments, value); - } + VALUE magic_comments = rb_ary_new_capa(pm_parser_magic_comments_size(parser)); + + parser_magic_comments_each_data_t each_data = { magic_comments, source, freeze }; + pm_parser_magic_comments_each(parser, parser_magic_comments_each, &each_data); - pm_magic_comments_iter_free(iter); if (freeze) rb_obj_freeze(magic_comments); return magic_comments; } @@ -560,90 +569,102 @@ parser_data_loc(const pm_parser_t *parser, VALUE source, bool freeze) { } } +typedef struct { + VALUE errors; + rb_encoding *encoding; + VALUE source; + bool freeze; +} parser_errors_each_data_t; + +static void +parser_errors_each(const pm_diagnostic_t *diagnostic, void *data) { + parser_errors_each_data_t *each_data = (parser_errors_each_data_t *) data; + + VALUE type = ID2SYM(rb_intern(pm_diagnostic_type(diagnostic))); + VALUE message = rb_obj_freeze(rb_enc_str_new_cstr(pm_diagnostic_message(diagnostic), each_data->encoding)); + VALUE location = PARSER_LOCATION(each_data->source, each_data->freeze, pm_diagnostic_location(diagnostic)); + + pm_error_level_t error_level = pm_diagnostic_error_level(diagnostic); + VALUE level = Qnil; + + switch (error_level) { + case PM_ERROR_LEVEL_SYNTAX: + level = ID2SYM(rb_intern("syntax")); + break; + case PM_ERROR_LEVEL_ARGUMENT: + level = ID2SYM(rb_intern("argument")); + break; + case PM_ERROR_LEVEL_LOAD: + level = ID2SYM(rb_intern("load")); + break; + default: + rb_raise(rb_eRuntimeError, "Unknown level: %" PRIu8, error_level); + } + + VALUE argv[] = { type, message, location, level }; + VALUE value = rb_class_new_instance_freeze(4, argv, rb_cPrismParseError, each_data->freeze); + rb_ary_push(each_data->errors, value); +} + /** * Extract the errors out of the parser into an array. */ static VALUE parser_errors(const pm_parser_t *parser, rb_encoding *encoding, VALUE source, bool freeze) { - pm_diagnostics_iter_t *iter = pm_parser_errors(parser); - VALUE errors = rb_ary_new_capa(pm_diagnostics_iter_size(iter)); - - for ( - const pm_diagnostic_t *error = pm_diagnostics_iter_next(iter); - error != NULL; - error = pm_diagnostics_iter_next(iter) - ) { - VALUE type = ID2SYM(rb_intern(pm_diagnostic_type(error))); - VALUE message = rb_obj_freeze(rb_enc_str_new_cstr(pm_diagnostic_message(error), encoding)); - VALUE location = PARSER_LOCATION(source, freeze, pm_diagnostic_location(error)); - - pm_error_level_t error_level = pm_diagnostic_error_level(error); - VALUE level = Qnil; - - switch (error_level) { - case PM_ERROR_LEVEL_SYNTAX: - level = ID2SYM(rb_intern("syntax")); - break; - case PM_ERROR_LEVEL_ARGUMENT: - level = ID2SYM(rb_intern("argument")); - break; - case PM_ERROR_LEVEL_LOAD: - level = ID2SYM(rb_intern("load")); - break; - default: - rb_raise(rb_eRuntimeError, "Unknown level: %" PRIu8, error_level); - } + VALUE errors = rb_ary_new_capa(pm_parser_errors_size(parser)); - VALUE argv[] = { type, message, location, level }; - VALUE value = rb_class_new_instance_freeze(4, argv, rb_cPrismParseError, freeze); - rb_ary_push(errors, value); - } + parser_errors_each_data_t each_data = { errors, encoding, source, freeze }; + pm_parser_errors_each(parser, parser_errors_each, &each_data); - pm_diagnostics_iter_free(iter); if (freeze) rb_obj_freeze(errors); - return errors; } +typedef struct { + VALUE warnings; + rb_encoding *encoding; + VALUE source; + bool freeze; +} parser_warnings_each_data_t; + +static void +parser_warnings_each(const pm_diagnostic_t *diagnostic, void *data) { + parser_warnings_each_data_t *each_data = (parser_warnings_each_data_t *) data; + + VALUE type = ID2SYM(rb_intern(pm_diagnostic_type(diagnostic))); + VALUE message = rb_obj_freeze(rb_enc_str_new_cstr(pm_diagnostic_message(diagnostic), each_data->encoding)); + VALUE location = PARSER_LOCATION(each_data->source, each_data->freeze, pm_diagnostic_location(diagnostic)); + + pm_warning_level_t warning_level = pm_diagnostic_warning_level(diagnostic); + VALUE level = Qnil; + + switch (warning_level) { + case PM_WARNING_LEVEL_DEFAULT: + level = ID2SYM(rb_intern("default")); + break; + case PM_WARNING_LEVEL_VERBOSE: + level = ID2SYM(rb_intern("verbose")); + break; + default: + rb_raise(rb_eRuntimeError, "Unknown level: %" PRIu8, warning_level); + } + + VALUE argv[] = { type, message, location, level }; + VALUE value = rb_class_new_instance_freeze(4, argv, rb_cPrismParseWarning, each_data->freeze); + rb_ary_push(each_data->warnings, value); +} + /** * Extract the warnings out of the parser into an array. */ static VALUE parser_warnings(const pm_parser_t *parser, rb_encoding *encoding, VALUE source, bool freeze) { - pm_diagnostics_iter_t *iter = pm_parser_warnings(parser); - VALUE warnings = rb_ary_new_capa(pm_diagnostics_iter_size(iter)); - - for ( - const pm_diagnostic_t *warning = pm_diagnostics_iter_next(iter); - warning != NULL; - warning = pm_diagnostics_iter_next(iter) - ) { - VALUE type = ID2SYM(rb_intern(pm_diagnostic_type(warning))); - VALUE message = rb_obj_freeze(rb_enc_str_new_cstr(pm_diagnostic_message(warning), encoding)); - VALUE location = PARSER_LOCATION(source, freeze, pm_diagnostic_location(warning)); - - pm_warning_level_t warning_level = pm_diagnostic_warning_level(warning); - VALUE level = Qnil; - - switch (warning_level) { - case PM_WARNING_LEVEL_DEFAULT: - level = ID2SYM(rb_intern("default")); - break; - case PM_WARNING_LEVEL_VERBOSE: - level = ID2SYM(rb_intern("verbose")); - break; - default: - rb_raise(rb_eRuntimeError, "Unknown level: %" PRIu8, warning_level); - } + VALUE warnings = rb_ary_new_capa(pm_parser_warnings_size(parser)); - VALUE argv[] = { type, message, location, level }; - VALUE value = rb_class_new_instance_freeze(4, argv, rb_cPrismParseWarning, freeze); - rb_ary_push(warnings, value); - } + parser_warnings_each_data_t each_data = { warnings, encoding, source, freeze }; + pm_parser_warnings_each(parser, parser_warnings_each, &each_data); - pm_diagnostics_iter_free(iter); if (freeze) rb_obj_freeze(warnings); - return warnings; } @@ -1234,7 +1255,7 @@ parse_input_success_p(pm_string_t *input, const pm_options_t *options) { pm_parse(parser); - VALUE result = pm_diagnostics_iter_size(pm_parser_errors(parser)) == 0 ? Qtrue : Qfalse; + VALUE result = pm_parser_errors_size(parser) == 0 ? Qtrue : Qfalse; pm_parser_free(parser); pm_arena_free(&arena); diff --git a/include/prism/comments.h b/include/prism/comments.h index 7c95d0ca72..91792897d9 100644 --- a/include/prism/comments.h +++ b/include/prism/comments.h @@ -36,39 +36,4 @@ PRISM_EXPORTED_FUNCTION pm_location_t pm_comment_location(const pm_comment_t *co */ PRISM_EXPORTED_FUNCTION pm_comment_type_t pm_comment_type(const pm_comment_t *comment); -/* An opaque pointer to an iterator that can be used to iterate over the - * comments associated with a parser. */ -typedef struct pm_comments_iter_t pm_comments_iter_t; - -/** - * Returns the number of comments associated with the comment iterator. - * - * @param iter the iterator to get the number of comments from - * @return the number of comments associated with the comment iterator - * - * \public \memberof pm_comments_iter_t - */ -PRISM_EXPORTED_FUNCTION size_t pm_comments_iter_size(const pm_comments_iter_t *iter); - -/** - * Returns the next comment in the iteration, or NULL if there are no more - * comments. - * - * @param iter the iterator to get the next comment from - * @return the next comment in the iteration, or NULL if there are no more - * comments. - * - * \public \memberof pm_comments_iter_t - */ -PRISM_EXPORTED_FUNCTION const pm_comment_t * pm_comments_iter_next(pm_comments_iter_t *iter); - -/** - * Frees the memory associated with the given comments iterator. - * - * @param iter the iterator to free - * - * \public \memberof pm_comments_iter_t - */ -PRISM_EXPORTED_FUNCTION void pm_comments_iter_free(pm_comments_iter_t *iter); - #endif diff --git a/include/prism/internal/comments.h b/include/prism/internal/comments.h index 8b1009daf6..e8fbb0e6aa 100644 --- a/include/prism/internal/comments.h +++ b/include/prism/internal/comments.h @@ -22,16 +22,4 @@ struct pm_comment_t { pm_comment_type_t type; }; -/** - * A struct used as an opaque pointer for the client to iterate through the - * comments found while parsing. - */ -struct pm_comments_iter_t { - /** The number of comments in the list. */ - size_t size; - - /** The current node in the list. */ - const pm_list_node_t *current; -}; - #endif diff --git a/include/prism/internal/magic_comments.h b/include/prism/internal/magic_comments.h index eb4154928d..57c964bf4e 100644 --- a/include/prism/internal/magic_comments.h +++ b/include/prism/internal/magic_comments.h @@ -25,16 +25,4 @@ struct pm_magic_comment_t { pm_location_t value; }; -/** - * A struct used as an opaque pointer for the client to iterate through the - * magic comments found while parsing. - */ -struct pm_magic_comments_iter_t { - /** The number of magic comments in the list. */ - size_t size; - - /** The current node in the list. */ - const pm_list_node_t *current; -}; - #endif diff --git a/include/prism/parser.h b/include/prism/parser.h index 9e94ad1631..395dbe37be 100644 --- a/include/prism/parser.h +++ b/include/prism/parser.h @@ -138,59 +138,103 @@ PRISM_EXPORTED_FUNCTION bool pm_parser_continuable(const pm_parser_t *parser); PRISM_EXPORTED_FUNCTION int pm_parser_lex_state(const pm_parser_t *parser); /** - * Returns an iterator that knows how to iterate over the comments that are - * associated with the given parser. + * Returns the number of comments associated with the given parser. * - * @param parser the parser whose comments we want to get - * @return the iterator that knows how to iterate over the comments that are - * associated with the given parser. It is the responsibility of the caller - * to free the memory associated with the iterator through - * pm_comments_iter_free. + * @param parser the parser whose comments we want to get the size of + * @return the number of comments associated with the given parser + */ +PRISM_EXPORTED_FUNCTION size_t pm_parser_comments_size(const pm_parser_t *parser); + +/** + * A callback function that can be used to process comments found while parsing. + */ +typedef void (*pm_comment_callback_t)(const pm_comment_t *comment, void *data); + +/** + * Iterates over the comments associated with the given parser and calls the + * given callback for each comment. * - * \public \memberof pm_parser + * @param parser the parser whose comments we want to iterate over + * @param callback the callback function to call for each comment. This function + * will be passed a pointer to the comment and the data parameter passed to + * this function. + * @param data the data to pass to the callback function for each comment. This + * can be NULL if no data needs to be passed to the callback function. */ -PRISM_EXPORTED_FUNCTION pm_comments_iter_t * pm_parser_comments(const pm_parser_t *parser); +PRISM_EXPORTED_FUNCTION void pm_parser_comments_each(const pm_parser_t *parser, pm_comment_callback_t callback, void *data); /** - * Returns an iterator that knows how to iterate over the magic comments that - * are associated with the given parser. + * Returns the number of magic comments associated with the given parser. * - * @param parser the parser whose magic comments we want to get - * @return the iterator that knows how to iterate over the magic comments that are - * associated with the given parser. It is the responsibility of the caller - * to free the memory associated with the iterator through - * pm_magic_comments_iter_free. + * @param parser the parser whose magic comments we want to get the size of + * @return the number of magic comments associated with the given parser + */ +PRISM_EXPORTED_FUNCTION size_t pm_parser_magic_comments_size(const pm_parser_t *parser); + +/** + * A callback function that can be used to process magic comments found while parsing. + */ +typedef void (*pm_magic_comment_callback_t)(const pm_magic_comment_t *magic_comment, void *data); + +/** + * Iterates over the magic comments associated with the given parser and calls the + * given callback for each magic comment. * - * \public \memberof pm_parser + * @param parser the parser whose magic comments we want to iterate over + * @param callback the callback function to call for each magic comment. This + * function will be passed a pointer to the magic comment and the data + * parameter passed to this function. + * @param data the data to pass to the callback function for each magic comment. + * This can be NULL if no data needs to be passed to the callback function. */ -PRISM_EXPORTED_FUNCTION pm_magic_comments_iter_t * pm_parser_magic_comments(const pm_parser_t *parser); +PRISM_EXPORTED_FUNCTION void pm_parser_magic_comments_each(const pm_parser_t *parser, pm_magic_comment_callback_t callback, void *data); /** - * Returns an iterator that knows how to iterate over the errors that are - * associated with the given parser. + * Returns the number of errors associated with the given parser. * - * @param parser the parser whose errors we want to get - * @return the iterator that knows how to iterate over the errors that are - * associated with the given parser. It is the responsibility of the caller - * to free the memory associated with the iterator through - * pm_diagnostics_iter_free. + * @param parser the parser whose errors we want to get the size of + * @return the number of errors associated with the given parser + */ +PRISM_EXPORTED_FUNCTION size_t pm_parser_errors_size(const pm_parser_t *parser); + +/** + * Returns the number of warnings associated with the given parser. * - * \public \memberof pm_parser + * @param parser the parser whose warnings we want to get the size of + * @return the number of warnings associated with the given parser + */ +PRISM_EXPORTED_FUNCTION size_t pm_parser_warnings_size(const pm_parser_t *parser); + +/** + * A callback function that can be used to process diagnostics found while + * parsing. */ -PRISM_EXPORTED_FUNCTION pm_diagnostics_iter_t * pm_parser_errors(const pm_parser_t *parser); +typedef void (*pm_diagnostic_callback_t)(const pm_diagnostic_t *diagnostic, void *data); /** - * Returns an iterator that knows how to iterate over the warnings that are - * associated with the given parser. + * Iterates over the errors associated with the given parser and calls the + * given callback for each error. * - * @param parser the parser whose warnings we want to get - * @return the iterator that knows how to iterate over the warnings that are - * associated with the given parser. It is the responsibility of the caller - * to free the memory associated with the iterator through - * pm_diagnostics_iter_free. + * @param parser the parser whose errors we want to iterate over + * @param callback the callback function to call for each error. This function + * will be passed a pointer to the error and the data parameter passed to + * this function. + * @param data the data to pass to the callback function for each error. This + * can be NULL if no data needs to be passed to the callback function. + */ +PRISM_EXPORTED_FUNCTION void pm_parser_errors_each(const pm_parser_t *parser, pm_diagnostic_callback_t callback, void *data); + +/** + * Iterates over the warnings associated with the given parser and calls the + * given callback for each warning. * - * \public \memberof pm_parser + * @param parser the parser whose warnings we want to iterate over + * @param callback the callback function to call for each warning. This function + * will be passed a pointer to the warning and the data parameter passed to + * this function. + * @param data the data to pass to the callback function for each warning. This + * can be NULL if no data needs to be passed to the callback function. */ -PRISM_EXPORTED_FUNCTION pm_diagnostics_iter_t * pm_parser_warnings(const pm_parser_t *parser); +PRISM_EXPORTED_FUNCTION void pm_parser_warnings_each(const pm_parser_t *parser, pm_diagnostic_callback_t callback, void *data); #endif diff --git a/src/comments.c b/src/comments.c index 7e2f1e2f10..be535d2f7a 100644 --- a/src/comments.c +++ b/src/comments.c @@ -20,31 +20,3 @@ pm_comment_type_t pm_comment_type(const pm_comment_t *comment) { return comment->type; } - -/** - * Returns the number of comments associated with the comment iterator. - */ -size_t -pm_comments_iter_size(const pm_comments_iter_t *iter) { - return iter->size; -} - -/** - * Returns the next comment in the iteration, or NULL if there are no more - * comments. - */ -const pm_comment_t * -pm_comments_iter_next(pm_comments_iter_t *iter) { - if (iter->current == NULL) return NULL; - const pm_comment_t *comment = (const pm_comment_t *) iter->current; - iter->current = iter->current->next; - return comment; -} - -/** - * Frees the memory associated with the given comments iterator. - */ -void -pm_comments_iter_free(pm_comments_iter_t *iter) { - xfree(iter); -} diff --git a/src/magic_comments.c b/src/magic_comments.c index fc570868a6..6648010061 100644 --- a/src/magic_comments.c +++ b/src/magic_comments.c @@ -20,32 +20,3 @@ pm_location_t pm_magic_comment_value(const pm_magic_comment_t *magic_comment) { return magic_comment->value; } - -/** - * Returns the number of magic comments associated with the magic comment - * iterator. - */ -size_t -pm_magic_comments_iter_size(const pm_magic_comments_iter_t *iter) { - return iter->size; -} - -/** - * Returns the next magic comment in the iteration, or NULL if there are no more - * magic comments. - */ -const pm_magic_comment_t * -pm_magic_comments_iter_next(pm_magic_comments_iter_t *iter) { - if (iter->current == NULL) return NULL; - const pm_magic_comment_t *magic_comment = (const pm_magic_comment_t *) iter->current; - iter->current = iter->current->next; - return magic_comment; -} - -/** - * Frees the memory associated with the given magic comments iterator. - */ -void -pm_magic_comments_iter_free(pm_magic_comments_iter_t *iter) { - xfree(iter); -} diff --git a/src/parser.c b/src/parser.c index 84d8dff992..c2814e1637 100644 --- a/src/parser.c +++ b/src/parser.c @@ -118,49 +118,89 @@ pm_parser_lex_state(const pm_parser_t *parser) { } /** - * Returns an iterator that knows how to iterate over the comments that are - * associated with the given parser. + * Returns the number of comments associated with the given parser. */ -pm_comments_iter_t * -pm_parser_comments(const pm_parser_t *parser) { - pm_comments_iter_t *iter = (pm_comments_iter_t *) xmalloc(sizeof(pm_comments_iter_t)); - iter->size = parser->comment_list.size; - iter->current = parser->comment_list.head; - return iter; +size_t +pm_parser_comments_size(const pm_parser_t *parser) { + return parser->comment_list.size; } /** - * Returns an iterator that knows how to iterate over the magic comments that - * are associated with the given parser. + * Iterates over the comments associated with the given parser and calls the + * given callback for each comment. */ -pm_magic_comments_iter_t * -pm_parser_magic_comments(const pm_parser_t *parser) { - pm_magic_comments_iter_t *iter = (pm_magic_comments_iter_t *) xmalloc(sizeof(pm_magic_comments_iter_t)); - iter->size = parser->magic_comment_list.size; - iter->current = parser->magic_comment_list.head; - return iter; +void +pm_parser_comments_each(const pm_parser_t *parser, pm_comment_callback_t callback, void *data) { + const pm_list_node_t *current = parser->comment_list.head; + while (current != NULL) { + const pm_comment_t *comment = (const pm_comment_t *) current; + callback(comment, data); + current = current->next; + } +} + +/** + * Returns the number of magic comments associated with the given parser. + */ +size_t +pm_parser_magic_comments_size(const pm_parser_t *parser) { + return parser->magic_comment_list.size; +} + +/** + * Iterates over the magic comments associated with the given parser and calls + * the given callback for each magic comment. + */ +void +pm_parser_magic_comments_each(const pm_parser_t *parser, pm_magic_comment_callback_t callback, void *data) { + const pm_list_node_t *current = parser->magic_comment_list.head; + while (current != NULL) { + const pm_magic_comment_t *magic_comment = (const pm_magic_comment_t *) current; + callback(magic_comment, data); + current = current->next; + } +} + +/** + * Returns the number of errors associated with the given parser. + */ +size_t +pm_parser_errors_size(const pm_parser_t *parser) { + return parser->error_list.size; } /** - * Returns an iterator that knows how to iterate over the errors that are - * associated with the given parser. + * Returns the number of warnings associated with the given parser. */ -pm_diagnostics_iter_t * -pm_parser_errors(const pm_parser_t *parser) { - pm_diagnostics_iter_t *iter = (pm_diagnostics_iter_t *) xmalloc(sizeof(pm_diagnostics_iter_t)); - iter->size = parser->error_list.size; - iter->current = parser->error_list.head; - return iter; +size_t +pm_parser_warnings_size(const pm_parser_t *parser) { + return parser->warning_list.size; +} + +static inline void +pm_parser_diagnostics_each(const pm_list_t *list, pm_diagnostic_callback_t callback, void *data) { + const pm_list_node_t *current = list->head; + while (current != NULL) { + const pm_diagnostic_t *diagnostic = (const pm_diagnostic_t *) current; + callback(diagnostic, data); + current = current->next; + } } /** - * Returns an iterator that knows how to iterate over the warnings that are - * associated with the given parser. + * Iterates over the errors associated with the given parser and calls the + * given callback for each error. */ -pm_diagnostics_iter_t * -pm_parser_warnings(const pm_parser_t *parser) { - pm_diagnostics_iter_t *iter = (pm_diagnostics_iter_t *) xmalloc(sizeof(pm_diagnostics_iter_t)); - iter->size = parser->warning_list.size; - iter->current = parser->warning_list.head; - return iter; +void +pm_parser_errors_each(const pm_parser_t *parser, pm_diagnostic_callback_t callback, void *data) { + pm_parser_diagnostics_each(&parser->error_list, callback, data); +} + +/** + * Iterates over the warnings associated with the given parser and calls the + * given callback for each warning. + */ +void +pm_parser_warnings_each(const pm_parser_t *parser, pm_diagnostic_callback_t callback, void *data) { + pm_parser_diagnostics_each(&parser->warning_list, callback, data); } diff --git a/templates/include/prism/internal/diagnostic.h.erb b/templates/include/prism/internal/diagnostic.h.erb index ca294e3972..fcbc2b6a70 100644 --- a/templates/include/prism/internal/diagnostic.h.erb +++ b/templates/include/prism/internal/diagnostic.h.erb @@ -75,16 +75,4 @@ void pm_diagnostic_list_append(pm_arena_t *arena, pm_list_t *list, uint32_t star */ void pm_diagnostic_list_append_format(pm_arena_t *arena, pm_list_t *list, uint32_t start, uint32_t length, pm_diagnostic_id_t diag_id, ...); -/** - * A struct used as an opaque pointer for the client to iterate through the - * diagnostics found while parsing. - */ -struct pm_diagnostics_iter_t { - /** The number of diagnostics in the list. */ - size_t size; - - /** The current node in the list. */ - const pm_list_node_t *current; -}; - #endif From 1f4bf83b5364bf1150a978f44594f7be492123c0 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 18 Mar 2026 12:09:46 -0400 Subject: [PATCH 064/100] Code review --- include/prism/options.h | 2 +- include/prism/parser.h | 2 +- src/options.c | 8 ++++---- src/parser.c | 2 +- src/string_query.c | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/include/prism/options.h b/include/prism/options.h index aba122f705..61afc0df92 100644 --- a/include/prism/options.h +++ b/include/prism/options.h @@ -143,7 +143,7 @@ PRISM_EXPORTED_FUNCTION void pm_options_shebang_callback_set(pm_options_t *optio * * \public \memberof pm_options */ -PRISM_EXPORTED_FUNCTION pm_string_t * pm_options_filepath_get(pm_options_t *options); +PRISM_EXPORTED_FUNCTION const pm_string_t * pm_options_filepath_get(const pm_options_t *options); /** * Set the filepath option on the given options struct. diff --git a/include/prism/parser.h b/include/prism/parser.h index 395dbe37be..f15ddd8901 100644 --- a/include/prism/parser.h +++ b/include/prism/parser.h @@ -57,7 +57,7 @@ PRISM_EXPORTED_FUNCTION void pm_parser_lex_callback_set(pm_parser_t *parser, pm_ * @param parser The parser whose lex callback data we want to get. * @return The opaque data that is passed to the lex callback when it is called. */ -PRISM_EXPORTED_FUNCTION void * pm_parser_lex_callback_data(pm_parser_t *parser); +PRISM_EXPORTED_FUNCTION void * pm_parser_lex_callback_data(const pm_parser_t *parser); /** * Returns the raw pointer to the start of the source that is being parsed. diff --git a/src/options.c b/src/options.c index ecdeba58f3..9dbafdf63c 100644 --- a/src/options.c +++ b/src/options.c @@ -63,8 +63,8 @@ pm_options_shebang_callback_set(pm_options_t *options, pm_options_shebang_callba /** * Get the filepath option on the given options struct. */ -pm_string_t * -pm_options_filepath_get(pm_options_t *options) { +const pm_string_t * +pm_options_filepath_get(const pm_options_t *options) { return &options->filepath; } @@ -182,8 +182,8 @@ pm_options_version_set(pm_options_t *options, const char *version, size_t length } } - if (length >= 6) { - if (strncmp(version, "latest", 7) == 0) { // 7 to compare the \0 as well + if (length == 6) { + if (strncmp(version, "latest", 6) == 0) { options->version = PM_OPTIONS_VERSION_LATEST; return true; } diff --git a/src/parser.c b/src/parser.c index c2814e1637..27c9641f83 100644 --- a/src/parser.c +++ b/src/parser.c @@ -30,7 +30,7 @@ pm_parser_lex_callback_set(pm_parser_t *parser, pm_lex_callback_t callback, void * Returns the opaque data that is passed to the lex callback when it is called. */ void * -pm_parser_lex_callback_data(pm_parser_t *parser) { +pm_parser_lex_callback_data(const pm_parser_t *parser) { return parser->lex_callback.data; } diff --git a/src/string_query.c b/src/string_query.c index a3be418b75..ccedaf9c00 100644 --- a/src/string_query.c +++ b/src/string_query.c @@ -27,7 +27,7 @@ typedef enum { /** * Check that the slice is a valid local variable name or constant. */ -pm_slice_type_t +static pm_slice_type_t pm_slice_type(const uint8_t *source, size_t length, const char *encoding_name) { // first, get the right encoding object const pm_encoding_t *encoding = pm_encoding_find((const uint8_t *) encoding_name, (const uint8_t *) (encoding_name + strlen(encoding_name))); From 2867c529a3366477189d876dde68b36fab4f421e Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 18 Mar 2026 12:35:33 -0400 Subject: [PATCH 065/100] Move node list append internal --- include/prism/internal/node.h | 31 ++++++++++++++++++++++++++++++- include/prism/node.h | 27 --------------------------- templates/src/node.c.erb | 2 +- 3 files changed, 31 insertions(+), 29 deletions(-) diff --git a/include/prism/internal/node.h b/include/prism/internal/node.h index 2399b5a72a..075dc33e0a 100644 --- a/include/prism/internal/node.h +++ b/include/prism/internal/node.h @@ -4,8 +4,37 @@ #ifndef PRISM_INTERNAL_NODE_H #define PRISM_INTERNAL_NODE_H +#include "prism/node.h" + +#include "prism/compiler/force_inline.h" + #include "prism/arena.h" -#include "prism/ast.h" + +/** + * Slow path for pm_node_list_append: grow the list and append the node. + * Do not call directly — use pm_node_list_append instead. + * + * @param arena The arena to allocate from. + * @param list The list to append to. + * @param node The node to append. + */ +void pm_node_list_append_slow(pm_arena_t *arena, pm_node_list_t *list, pm_node_t *node); + +/** + * Append a new node onto the end of the node list. + * + * @param arena The arena to allocate from. + * @param list The list to append to. + * @param node The node to append. + */ +static PRISM_FORCE_INLINE void +pm_node_list_append(pm_arena_t *arena, pm_node_list_t *list, pm_node_t *node) { + if (list->size < list->capacity) { + list->nodes[list->size++] = node; + } else { + pm_node_list_append_slow(arena, list, node); + } +} /** * Prepend a new node onto the beginning of the node list. diff --git a/include/prism/node.h b/include/prism/node.h index ac7c337ad9..3d2ff3170a 100644 --- a/include/prism/node.h +++ b/include/prism/node.h @@ -8,7 +8,6 @@ #include "prism/compiler/exported.h" -#include "prism/arena.h" #include "prism/ast.h" /** @@ -18,32 +17,6 @@ #define PM_NODE_LIST_FOREACH(list, index, node) \ for (size_t index = 0; index < (list)->size && ((node) = (list)->nodes[index]); index++) -/** - * Slow path for pm_node_list_append: grow the list and append the node. - * Do not call directly — use pm_node_list_append instead. - * - * @param arena The arena to allocate from. - * @param list The list to append to. - * @param node The node to append. - */ -void pm_node_list_append_slow(pm_arena_t *arena, pm_node_list_t *list, pm_node_t *node); - -/** - * Append a new node onto the end of the node list. - * - * @param arena The arena to allocate from. - * @param list The list to append to. - * @param node The node to append. - */ -static PRISM_FORCE_INLINE void -pm_node_list_append(pm_arena_t *arena, pm_node_list_t *list, pm_node_t *node) { - if (list->size < list->capacity) { - list->nodes[list->size++] = node; - } else { - pm_node_list_append_slow(arena, list, node); - } -} - /** * Returns a string representation of the given node type. * diff --git a/templates/src/node.c.erb b/templates/src/node.c.erb index da249141ed..3db5239b6f 100644 --- a/templates/src/node.c.erb +++ b/templates/src/node.c.erb @@ -1,5 +1,5 @@ #line <%= __LINE__ + 1 %> "prism/templates/src/<%= File.basename(__FILE__) %>" -#include "prism/node.h" +#include "prism/internal/node.h" #include "prism/internal/buffer.h" #include "prism/internal/constant_pool.h" From 9498322c7cb5774b3ab89236899a8c0a82950906 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 18 Mar 2026 12:49:31 -0400 Subject: [PATCH 066/100] Fix up diagnostic templates --- include/prism/diagnostic.h | 35 ----------------------------- templates/src/diagnostic.c.erb | 40 +++++----------------------------- 2 files changed, 6 insertions(+), 69 deletions(-) diff --git a/include/prism/diagnostic.h b/include/prism/diagnostic.h index 0619c274ef..6c098c55ef 100644 --- a/include/prism/diagnostic.h +++ b/include/prism/diagnostic.h @@ -96,39 +96,4 @@ PRISM_EXPORTED_FUNCTION pm_error_level_t pm_diagnostic_error_level(const pm_diag */ PRISM_EXPORTED_FUNCTION pm_warning_level_t pm_diagnostic_warning_level(const pm_diagnostic_t *diagnostic); -/* An opaque pointer to an iterator that can be used to iterate over a set of - * diagnostics associated with a parser. */ -typedef struct pm_diagnostics_iter_t pm_diagnostics_iter_t; - -/** - * Returns the number of diagnostics associated with the diagnostics iterator. - * - * @param iter the iterator to get the number of diagnostics from - * @return the number of diagnostics associated with the diagnostics iterator - * - * \public \memberof pm_diagnostics_iter_t - */ -PRISM_EXPORTED_FUNCTION size_t pm_diagnostics_iter_size(const pm_diagnostics_iter_t *iter); - -/** - * Returns the next diagnostic in the iteration, or NULL if there are no more - * diagnostics. - * - * @param iter the iterator to get the next diagnostic from - * @return the next diagnostic in the iteration, or NULL if there are no more - * diagnostics. - * - * \public \memberof pm_diagnostics_iter_t - */ -PRISM_EXPORTED_FUNCTION const pm_diagnostic_t * pm_diagnostics_iter_next(pm_diagnostics_iter_t *iter); - -/** - * Frees the memory associated with the given diagnostics iterator. - * - * @param iter the iterator to free - * - * \public \memberof pm_diagnostics_iter_t - */ -PRISM_EXPORTED_FUNCTION void pm_diagnostics_iter_free(pm_diagnostics_iter_t *iter); - #endif diff --git a/templates/src/diagnostic.c.erb b/templates/src/diagnostic.c.erb index a4aba2e586..0dea732869 100644 --- a/templates/src/diagnostic.c.erb +++ b/templates/src/diagnostic.c.erb @@ -87,16 +87,16 @@ typedef struct { * * `PM_WARNING_LEVEL_VERBOSE` - Warnings that appear with `-w`, as in `ruby -w -c -e 'code'`. */ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_MAX] = { - // Special error that can be replaced + /* Special error that can be replaced */ [PM_ERR_CANNOT_PARSE_EXPRESSION] = { "cannot parse the expression", PM_ERROR_LEVEL_SYNTAX }, - // Errors that should raise argument errors + /* Errors that should raise argument errors */ [PM_ERR_INVALID_ENCODING_MAGIC_COMMENT] = { "unknown or invalid encoding in the magic comment", PM_ERROR_LEVEL_ARGUMENT }, - // Errors that should raise load errors + /* Errors that should raise load errors */ [PM_ERR_SCRIPT_NOT_FOUND] = { "no Ruby script found in input", PM_ERROR_LEVEL_LOAD }, - // Errors that should raise syntax errors + /* Errors that should raise syntax errors */ [PM_ERR_ALIAS_ARGUMENT] = { "invalid argument being passed to `alias`; expected a bare word, symbol, constant, or global variable", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_ALIAS_ARGUMENT_NUMBERED_REFERENCE] = { "invalid argument being passed to `alias`; can't make alias for the number variables", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_AMPAMPEQ_MULTI_ASSIGN] = { "unexpected `&&=` in a multiple assignment", PM_ERROR_LEVEL_SYNTAX }, @@ -365,7 +365,7 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_MAX] = { [PM_ERR_STRING_INTERPOLATED_TERM] = { "unterminated string; expected a closing delimiter for the interpolated string", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_STRING_LITERAL_EOF] = { "unterminated string meets end of file", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_STRING_LITERAL_TERM] = { "unexpected %s, expected a string literal terminator", PM_ERROR_LEVEL_SYNTAX }, - [PM_ERR_SYMBOL_INVALID] = { "invalid symbol", PM_ERROR_LEVEL_SYNTAX }, // TODO expected symbol? prism.c ~9719 + [PM_ERR_SYMBOL_INVALID] = { "invalid symbol", PM_ERROR_LEVEL_SYNTAX }, /* TODO expected symbol? prism.c ~9719 */ [PM_ERR_SYMBOL_TERM_DYNAMIC] = { "unterminated quoted string; expected a closing delimiter for the dynamic symbol", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_SYMBOL_TERM_INTERPOLATED] = { "unterminated symbol; expected a closing delimiter for the interpolated symbol", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_TERNARY_COLON] = { "expected a `:` after the true expression of a ternary operator", PM_ERROR_LEVEL_SYNTAX }, @@ -392,7 +392,7 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_MAX] = { [PM_ERR_WRITE_TARGET_UNEXPECTED] = { "unexpected write target", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_XSTRING_TERM] = { "expected a closing delimiter for the `%x` or backtick string", PM_ERROR_LEVEL_SYNTAX }, - // Warnings + /* Warnings */ [PM_WARN_AMBIGUOUS_BINARY_OPERATOR] = { "'%s' after local variable or literal is interpreted as binary operator even though it seems like %s", PM_WARNING_LEVEL_VERBOSE }, [PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_MINUS] = { "ambiguous first argument; put parentheses or a space even after `-` operator", PM_WARNING_LEVEL_VERBOSE }, [PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS] = { "ambiguous first argument; put parentheses or a space even after `+` operator", PM_WARNING_LEVEL_VERBOSE }, @@ -552,31 +552,3 @@ pm_diagnostic_list_append_format(pm_arena_t *arena, pm_list_t *list, uint32_t st pm_list_append(list, (pm_list_node_t *) diagnostic); } - -/** - * Returns the number of diagnostics associated with the diagnostics iterator. - */ -size_t -pm_diagnostics_iter_size(const pm_diagnostics_iter_t *iter) { - return iter->size; -} - -/** - * Returns the next diagnostic in the iteration, or NULL if there are no more - * diagnostics. - */ -const pm_diagnostic_t * -pm_diagnostics_iter_next(pm_diagnostics_iter_t *iter) { - if (iter->current == NULL) return NULL; - const pm_diagnostic_t *diagnostic = (const pm_diagnostic_t *) iter->current; - iter->current = iter->current->next; - return diagnostic; -} - -/** - * Frees the memory associated with the given diagnostics iterator. - */ -void -pm_diagnostics_iter_free(pm_diagnostics_iter_t *iter) { - xfree(iter); -} From 2fcab5eeb838dbd692c129d53da45b5e65eb9be4 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 18 Mar 2026 12:58:35 -0400 Subject: [PATCH 067/100] Do not inline node_new functions --- .gitignore | 1 + templates/include/prism/node_new.h.erb | 21 +++++---------------- templates/src/node_new.c.erb | 24 ++++++++++++++++++++++++ templates/template.rb | 1 + 4 files changed, 31 insertions(+), 16 deletions(-) create mode 100644 templates/src/node_new.c.erb diff --git a/.gitignore b/.gitignore index a8370790b2..edcf62a959 100644 --- a/.gitignore +++ b/.gitignore @@ -56,6 +56,7 @@ out.svg /sorbet/ /src/diagnostic.c /src/node.c +/src/node_new.c /src/prettyprint.c /src/serialize.c /src/token_type.c diff --git a/templates/include/prism/node_new.h.erb b/templates/include/prism/node_new.h.erb index 4a253fc02e..d3d4ebd773 100644 --- a/templates/include/prism/node_new.h.erb +++ b/templates/include/prism/node_new.h.erb @@ -1,16 +1,17 @@ /** * @file node_new.h * - * Static inline functions for allocating and initializing AST nodes. + * Functions for allocating and initializing AST nodes. * * -- */ #ifndef PRISM_NODE_NEW_H #define PRISM_NODE_NEW_H -#include "prism/compiler/inline.h" +#include "prism/compiler/exported.h" -#include "prism/node.h" +#include "prism/arena.h" +#include "prism/ast.h" <%- nodes.each do |node| -%> <%- params = node.fields.map(&:c_param) -%> @@ -26,19 +27,7 @@ <%- end -%> * @return The newly allocated and initialized node. */ -static PRISM_INLINE pm_<%= node.human %>_t * -pm_<%= node.human %>_new(pm_arena_t *arena, uint32_t node_id, pm_node_flags_t flags, pm_location_t location<%= params.empty? ? "" : ", #{params.join(", ")}" %>) { - pm_<%= node.human %>_t *node = (pm_<%= node.human %>_t *) pm_arena_alloc(arena, sizeof(pm_<%= node.human %>_t), PRISM_ALIGNOF(pm_<%= node.human %>_t)); - - *node = (pm_<%= node.human %>_t) { - .base = { .type = <%= node.type %>, .flags = flags, .node_id = node_id, .location = location }<%= node.fields.empty? ? "" : "," %> -<%- node.fields.each_with_index do |field, index| -%> - .<%= field.name %> = <%= field.name %><%= index < node.fields.size - 1 ? "," : "" %> -<%- end -%> - }; - - return node; -} +PRISM_EXPORTED_FUNCTION pm_<%= node.human %>_t * pm_<%= node.human %>_new(pm_arena_t *arena, uint32_t node_id, pm_node_flags_t flags, pm_location_t location<%= params.empty? ? "" : ", #{params.join(", ")}" %>); <%- end -%> #endif diff --git a/templates/src/node_new.c.erb b/templates/src/node_new.c.erb new file mode 100644 index 0000000000..f40a823472 --- /dev/null +++ b/templates/src/node_new.c.erb @@ -0,0 +1,24 @@ +#line <%= __LINE__ + 1 %> "prism/templates/src/<%= File.basename(__FILE__) %>" +#include "prism/internal/arena.h" +#include "prism/ast.h" + +<%- nodes.each do |node| -%> +<%- params = node.fields.map(&:c_param) -%> +/** + * Allocate and initialize a new <%= node.name %> node. + */ +pm_<%= node.human %>_t * +pm_<%= node.human %>_new(pm_arena_t *arena, uint32_t node_id, pm_node_flags_t flags, pm_location_t location<%= params.empty? ? "" : ", #{params.join(", ")}" %>) { + pm_<%= node.human %>_t *node = (pm_<%= node.human %>_t *) pm_arena_alloc(arena, sizeof(pm_<%= node.human %>_t), PRISM_ALIGNOF(pm_<%= node.human %>_t)); + + *node = (pm_<%= node.human %>_t) { + .base = { .type = <%= node.type %>, .flags = flags, .node_id = node_id, .location = location }<%= node.fields.empty? ? "" : "," %> +<%- node.fields.each_with_index do |field, index| -%> + .<%= field.name %> = <%= field.name %><%= index < node.fields.size - 1 ? "," : "" %> +<%- end -%> + }; + + return node; +} + +<%- end -%> diff --git a/templates/template.rb b/templates/template.rb index b3ccf373d3..0838181d26 100755 --- a/templates/template.rb +++ b/templates/template.rb @@ -704,6 +704,7 @@ def locals "lib/prism/visitor.rb", "src/diagnostic.c", "src/node.c", + "src/node_new.c", "src/prettyprint.c", "src/serialize.c", "src/token_type.c" From ee6f32068912c13e3a94afc63431072d44e1ed49 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 18 Mar 2026 13:03:39 -0400 Subject: [PATCH 068/100] Fold node_new into node/ast --- .gitignore | 2 -- prism.gemspec | 1 - src/prism.c | 1 - templates/include/prism/ast.h.erb | 19 +++++++++++++++ templates/include/prism/node_new.h.erb | 33 -------------------------- templates/src/node.c.erb | 21 ++++++++++++++++ templates/src/node_new.c.erb | 24 ------------------- templates/template.rb | 2 -- 8 files changed, 40 insertions(+), 63 deletions(-) delete mode 100644 templates/include/prism/node_new.h.erb delete mode 100644 templates/src/node_new.c.erb diff --git a/.gitignore b/.gitignore index edcf62a959..7df40e7ba6 100644 --- a/.gitignore +++ b/.gitignore @@ -31,7 +31,6 @@ out.svg /fuzz/output/ /gemfiles/typecheck/bin/ /include/prism/ast.h -/include/prism/node_new.h /include/prism/internal/diagnostic.h /javascript/node_modules/ /javascript/package-lock.json @@ -56,7 +55,6 @@ out.svg /sorbet/ /src/diagnostic.c /src/node.c -/src/node_new.c /src/prettyprint.c /src/serialize.c /src/token_type.c diff --git a/prism.gemspec b/prism.gemspec index ca21b792cc..41eb9888ca 100644 --- a/prism.gemspec +++ b/prism.gemspec @@ -92,7 +92,6 @@ Gem::Specification.new do |spec| "include/prism/line_offset_list.h", "include/prism/magic_comments.h", "include/prism/node.h", - "include/prism/node_new.h", "include/prism/options.h", "include/prism/parser.h", "include/prism/prettyprint.h", diff --git a/src/prism.c b/src/prism.c index 06d3dc42b9..4c497d74b6 100644 --- a/src/prism.c +++ b/src/prism.c @@ -30,7 +30,6 @@ #include "prism/internal/strpbrk.h" #include "prism/excludes.h" -#include "prism/node_new.h" #include #include diff --git a/templates/include/prism/ast.h.erb b/templates/include/prism/ast.h.erb index 754d05a216..3a949f1000 100644 --- a/templates/include/prism/ast.h.erb +++ b/templates/include/prism/ast.h.erb @@ -9,7 +9,9 @@ #define PRISM_AST_H #include "prism/compiler/align.h" +#include "prism/compiler/exported.h" +#include "prism/arena.h" #include "prism/constant_pool.h" #include "prism/integer.h" #include "prism/strings.h" @@ -238,6 +240,23 @@ typedef enum pm_<%= flag.human %> { PM_<%= flag.human.upcase %>_LAST, } pm_<%= flag.human %>_t; <%- end -%> +<%- nodes.each do |node| -%> + +<%- params = node.fields.map(&:c_param) -%> +/** + * Allocate and initialize a new <%= node.name %> node. + * + * @param arena The arena to allocate from. + * @param node_id The unique identifier for this node. + * @param flags The flags for this node. + * @param location The location of this node in the source. +<%- node.fields.each do |field| -%> + * @param <%= field.name %> <%= field.comment ? Prism::Template::Doxygen.verbatim(field.comment.lines.first.strip) : "The #{field.name} field." %> +<%- end -%> + * @return The newly allocated and initialized node. + */ +PRISM_EXPORTED_FUNCTION pm_<%= node.human %>_t * pm_<%= node.human %>_new(pm_arena_t *arena, uint32_t node_id, pm_node_flags_t flags, pm_location_t location<%= params.empty? ? "" : ", #{params.join(", ")}" %>); +<%- end -%> /** * When we're serializing to Java, we want to skip serializing the location diff --git a/templates/include/prism/node_new.h.erb b/templates/include/prism/node_new.h.erb deleted file mode 100644 index d3d4ebd773..0000000000 --- a/templates/include/prism/node_new.h.erb +++ /dev/null @@ -1,33 +0,0 @@ -/** - * @file node_new.h - * - * Functions for allocating and initializing AST nodes. - * - * -- - */ -#ifndef PRISM_NODE_NEW_H -#define PRISM_NODE_NEW_H - -#include "prism/compiler/exported.h" - -#include "prism/arena.h" -#include "prism/ast.h" - -<%- nodes.each do |node| -%> -<%- params = node.fields.map(&:c_param) -%> -/** - * Allocate and initialize a new <%= node.name %> node. - * - * @param arena The arena to allocate from. - * @param node_id The unique identifier for this node. - * @param flags The flags for this node. - * @param location The location of this node in the source. -<%- node.fields.each do |field| -%> - * @param <%= field.name %> <%= field.comment ? Prism::Template::Doxygen.verbatim(field.comment.lines.first.strip) : "The #{field.name} field." %> -<%- end -%> - * @return The newly allocated and initialized node. - */ -PRISM_EXPORTED_FUNCTION pm_<%= node.human %>_t * pm_<%= node.human %>_new(pm_arena_t *arena, uint32_t node_id, pm_node_flags_t flags, pm_location_t location<%= params.empty? ? "" : ", #{params.join(", ")}" %>); - -<%- end -%> -#endif diff --git a/templates/src/node.c.erb b/templates/src/node.c.erb index 3db5239b6f..695175d7c8 100644 --- a/templates/src/node.c.erb +++ b/templates/src/node.c.erb @@ -1,6 +1,7 @@ #line <%= __LINE__ + 1 %> "prism/templates/src/<%= File.basename(__FILE__) %>" #include "prism/internal/node.h" +#include "prism/internal/arena.h" #include "prism/internal/buffer.h" #include "prism/internal/constant_pool.h" #include "prism/internal/integer.h" @@ -270,3 +271,23 @@ pm_dump_json(pm_buffer_t *buffer, const pm_parser_t *parser, const pm_node_t *no } #endif +<%- nodes.each do |node| -%> + +<%- params = node.fields.map(&:c_param) -%> +/** + * Allocate and initialize a new <%= node.name %> node. + */ +pm_<%= node.human %>_t * +pm_<%= node.human %>_new(pm_arena_t *arena, uint32_t node_id, pm_node_flags_t flags, pm_location_t location<%= params.empty? ? "" : ", #{params.join(", ")}" %>) { + pm_<%= node.human %>_t *node = (pm_<%= node.human %>_t *) pm_arena_alloc(arena, sizeof(pm_<%= node.human %>_t), PRISM_ALIGNOF(pm_<%= node.human %>_t)); + + *node = (pm_<%= node.human %>_t) { + .base = { .type = <%= node.type %>, .flags = flags, .node_id = node_id, .location = location }<%= node.fields.empty? ? "" : "," %> +<%- node.fields.each_with_index do |field, index| -%> + .<%= field.name %> = <%= field.name %><%= index < node.fields.size - 1 ? "," : "" %> +<%- end -%> + }; + + return node; +} +<%- end -%> diff --git a/templates/src/node_new.c.erb b/templates/src/node_new.c.erb deleted file mode 100644 index f40a823472..0000000000 --- a/templates/src/node_new.c.erb +++ /dev/null @@ -1,24 +0,0 @@ -#line <%= __LINE__ + 1 %> "prism/templates/src/<%= File.basename(__FILE__) %>" -#include "prism/internal/arena.h" -#include "prism/ast.h" - -<%- nodes.each do |node| -%> -<%- params = node.fields.map(&:c_param) -%> -/** - * Allocate and initialize a new <%= node.name %> node. - */ -pm_<%= node.human %>_t * -pm_<%= node.human %>_new(pm_arena_t *arena, uint32_t node_id, pm_node_flags_t flags, pm_location_t location<%= params.empty? ? "" : ", #{params.join(", ")}" %>) { - pm_<%= node.human %>_t *node = (pm_<%= node.human %>_t *) pm_arena_alloc(arena, sizeof(pm_<%= node.human %>_t), PRISM_ALIGNOF(pm_<%= node.human %>_t)); - - *node = (pm_<%= node.human %>_t) { - .base = { .type = <%= node.type %>, .flags = flags, .node_id = node_id, .location = location }<%= node.fields.empty? ? "" : "," %> -<%- node.fields.each_with_index do |field, index| -%> - .<%= field.name %> = <%= field.name %><%= index < node.fields.size - 1 ? "," : "" %> -<%- end -%> - }; - - return node; -} - -<%- end -%> diff --git a/templates/template.rb b/templates/template.rb index 0838181d26..fb778871b4 100755 --- a/templates/template.rb +++ b/templates/template.rb @@ -684,7 +684,6 @@ def locals TEMPLATES = [ "ext/prism/api_node.c", "include/prism/ast.h", - "include/prism/node_new.h", "include/prism/internal/diagnostic.h", "javascript/src/deserialize.js", "javascript/src/nodes.js", @@ -704,7 +703,6 @@ def locals "lib/prism/visitor.rb", "src/diagnostic.c", "src/node.c", - "src/node_new.c", "src/prettyprint.c", "src/serialize.c", "src/token_type.c" From 639da14de555349b6031ba177a17cf92b31aff45 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 18 Mar 2026 13:05:57 -0400 Subject: [PATCH 069/100] Move some of arena internal --- cpp/test.cpp | 2 +- ext/prism/extension.c | 14 ++++---- include/prism.h | 6 ++-- include/prism/arena.h | 41 ++--------------------- include/prism/internal/arena.h | 37 ++++++++++++++++++++ include/prism/node.h | 2 +- rust/ruby-prism-sys/build/main.rs | 2 +- rust/ruby-prism-sys/tests/node_tests.rs | 4 +-- rust/ruby-prism-sys/tests/parser_tests.rs | 8 ++--- rust/ruby-prism/src/parse_result/mod.rs | 4 +-- src/arena.c | 2 +- src/prism.c | 12 +++---- templates/src/serialize.c.erb | 4 +-- 13 files changed, 69 insertions(+), 69 deletions(-) diff --git a/cpp/test.cpp b/cpp/test.cpp index 8ef742d957..916d10381d 100644 --- a/cpp/test.cpp +++ b/cpp/test.cpp @@ -18,7 +18,7 @@ int main() { pm_buffer_free(buffer); pm_parser_free(parser); - pm_arena_free(&arena); + pm_arena_cleanup(&arena); return 0; } diff --git a/ext/prism/extension.c b/ext/prism/extension.c index 27cc3839ed..0606b9b238 100644 --- a/ext/prism/extension.c +++ b/ext/prism/extension.c @@ -381,7 +381,7 @@ dump_input(pm_string_t *input, const pm_options_t *options) { VALUE result = rb_str_new(pm_buffer_value(buffer), pm_buffer_length(buffer)); pm_buffer_free(buffer); pm_parser_free(parser); - pm_arena_free(&arena); + pm_arena_cleanup(&arena); return result; } @@ -828,7 +828,7 @@ parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nod } pm_parser_free(parser); - pm_arena_free(&arena); + pm_arena_cleanup(&arena); return result; } @@ -902,7 +902,7 @@ parse_input(pm_string_t *input, const pm_options_t *options) { } pm_parser_free(parser); - pm_arena_free(&arena); + pm_arena_cleanup(&arena); return result; } @@ -1009,7 +1009,7 @@ profile_input(pm_string_t *input, const pm_options_t *options) { pm_parse(parser); pm_parser_free(parser); - pm_arena_free(&arena); + pm_arena_cleanup(&arena); } /** @@ -1117,7 +1117,7 @@ parse_stream(int argc, VALUE *argv, VALUE self) { pm_buffer_free(buffer); pm_parser_free(parser); - pm_arena_free(&arena); + pm_arena_cleanup(&arena); pm_options_free(options); return result; @@ -1138,7 +1138,7 @@ parse_input_comments(pm_string_t *input, const pm_options_t *options) { VALUE comments = parser_comments(parser, source, pm_options_freeze_get(options)); pm_parser_free(parser); - pm_arena_free(&arena); + pm_arena_cleanup(&arena); return comments; } @@ -1257,7 +1257,7 @@ parse_input_success_p(pm_string_t *input, const pm_options_t *options) { VALUE result = pm_parser_errors_size(parser) == 0 ? Qtrue : Qfalse; pm_parser_free(parser); - pm_arena_free(&arena); + pm_arena_cleanup(&arena); return result; } diff --git a/include/prism.h b/include/prism.h index 9af3e6cf6c..5a8b835978 100644 --- a/include/prism.h +++ b/include/prism.h @@ -260,7 +260,7 @@ PRISM_EXPORTED_FUNCTION void pm_dump_json(pm_buffer_t *buffer, const pm_parser_t * * `pm_parser_new()` - allocate and initialize a new parser * * `pm_parse()` - parse and return the root node * * `pm_parser_free()` - free the parser and its internal memory - * * `pm_arena_free()` - free all AST-lifetime memory + * * `pm_arena_cleanup()` - free all AST-lifetime memory * * Putting all of this together would look something like: * @@ -273,7 +273,7 @@ PRISM_EXPORTED_FUNCTION void pm_dump_json(pm_buffer_t *buffer, const pm_parser_t * printf("PARSED!\n"); * * pm_parser_free(parser); - * pm_arena_free(&arena); + * pm_arena_cleanup(&arena); * } * ``` * @@ -326,7 +326,7 @@ PRISM_EXPORTED_FUNCTION void pm_dump_json(pm_buffer_t *buffer, const pm_parser_t * * pm_buffer_free(buffer); * pm_parser_free(parser); - * pm_arena_free(&arena); + * pm_arena_cleanup(&arena); * } * ``` */ diff --git a/include/prism/arena.h b/include/prism/arena.h index 73f2fab900..303ccc554b 100644 --- a/include/prism/arena.h +++ b/include/prism/arena.h @@ -43,49 +43,12 @@ typedef struct { size_t block_count; } pm_arena_t; -/** - * Slow path for pm_arena_alloc: allocate a new block and return a pointer to - * the first `size` bytes. Do not call directly — use pm_arena_alloc instead. - * - * @param arena The arena to allocate from. - * @param size The number of bytes to allocate. - * @returns A pointer to the allocated memory. - */ -void * pm_arena_alloc_slow(pm_arena_t *arena, size_t size); - -/** - * Allocate memory from the arena. The returned memory is NOT zeroed. This - * function is infallible — it aborts on allocation failure. - * - * The fast path (bump pointer within the current block) is inlined at each - * call site. The slow path (new block allocation) is out-of-line. - * - * @param arena The arena to allocate from. - * @param size The number of bytes to allocate. - * @param alignment The required alignment (must be a power of 2). - * @returns A pointer to the allocated memory. - */ -static PRISM_FORCE_INLINE void * -pm_arena_alloc(pm_arena_t *arena, size_t size, size_t alignment) { - if (arena->current != NULL) { - size_t used_aligned = (arena->current->used + alignment - 1) & ~(alignment - 1); - size_t needed = used_aligned + size; - - if (used_aligned >= arena->current->used && needed >= used_aligned && needed <= arena->current->capacity) { - arena->current->used = needed; - return arena->current->data + used_aligned; - } - } - - return pm_arena_alloc_slow(arena, size); -} - /** * Free all blocks in the arena. After this call, all pointers returned by * pm_arena_alloc and pm_arena_zalloc are invalid. * - * @param arena The arena to free. + * @param arena The arena whose held memory should be freed. */ -PRISM_EXPORTED_FUNCTION void pm_arena_free(pm_arena_t *arena); +PRISM_EXPORTED_FUNCTION void pm_arena_cleanup(pm_arena_t *arena); #endif diff --git a/include/prism/internal/arena.h b/include/prism/internal/arena.h index 747484fa2f..1a393f7bcb 100644 --- a/include/prism/internal/arena.h +++ b/include/prism/internal/arena.h @@ -24,6 +24,43 @@ */ void pm_arena_reserve(pm_arena_t *arena, size_t capacity); +/** + * Slow path for pm_arena_alloc: allocate a new block and return a pointer to + * the first `size` bytes. Do not call directly — use pm_arena_alloc instead. + * + * @param arena The arena to allocate from. + * @param size The number of bytes to allocate. + * @returns A pointer to the allocated memory. + */ +void * pm_arena_alloc_slow(pm_arena_t *arena, size_t size); + +/** + * Allocate memory from the arena. The returned memory is NOT zeroed. This + * function is infallible — it aborts on allocation failure. + * + * The fast path (bump pointer within the current block) is inlined at each + * call site. The slow path (new block allocation) is out-of-line. + * + * @param arena The arena to allocate from. + * @param size The number of bytes to allocate. + * @param alignment The required alignment (must be a power of 2). + * @returns A pointer to the allocated memory. + */ +static PRISM_FORCE_INLINE void * +pm_arena_alloc(pm_arena_t *arena, size_t size, size_t alignment) { + if (arena->current != NULL) { + size_t used_aligned = (arena->current->used + alignment - 1) & ~(alignment - 1); + size_t needed = used_aligned + size; + + if (used_aligned >= arena->current->used && needed >= used_aligned && needed <= arena->current->capacity) { + arena->current->used = needed; + return arena->current->data + used_aligned; + } + } + + return pm_arena_alloc_slow(arena, size); +} + /** * Allocate zero-initialized memory from the arena. This function is infallible * — it aborts on allocation failure. diff --git a/include/prism/node.h b/include/prism/node.h index 3d2ff3170a..bfda2a6a52 100644 --- a/include/prism/node.h +++ b/include/prism/node.h @@ -67,7 +67,7 @@ PRISM_EXPORTED_FUNCTION const char * pm_node_type_to_str(pm_node_type_t node_typ * * pm_parser_free(parser); * pm_options_free(options); - * pm_arena_free(&arena); + * pm_arena_cleanup(&arena); * * return EXIT_SUCCESS; * } diff --git a/rust/ruby-prism-sys/build/main.rs b/rust/ruby-prism-sys/build/main.rs index 4cf5add131..9c4d807030 100644 --- a/rust/ruby-prism-sys/build/main.rs +++ b/rust/ruby-prism-sys/build/main.rs @@ -144,7 +144,7 @@ fn generate_bindings(ruby_include_path: &Path) -> bindgen::Bindings { .rustified_non_exhaustive_enum(r"pm_\w+_flags") .rustified_non_exhaustive_enum("pm_node_type") // Functions - .allowlist_function("pm_arena_free") + .allowlist_function("pm_arena_cleanup") .allowlist_function("pm_line_offset_list_line_column") .allowlist_function("pm_options_command_line_set") .allowlist_function("pm_options_encoding_locked_set") diff --git a/rust/ruby-prism-sys/tests/node_tests.rs b/rust/ruby-prism-sys/tests/node_tests.rs index 0de0eabe49..73c8c04d30 100644 --- a/rust/ruby-prism-sys/tests/node_tests.rs +++ b/rust/ruby-prism-sys/tests/node_tests.rs @@ -1,6 +1,6 @@ use std::{ffi::CString, mem::MaybeUninit}; -use ruby_prism_sys::{pm_arena_free, pm_arena_t, pm_node_type}; +use ruby_prism_sys::{pm_arena_cleanup, pm_arena_t, pm_node_type}; use ruby_prism_sys::{pm_parse, pm_parser_cleanup, pm_parser_init, pm_parser_t}; #[test] @@ -24,6 +24,6 @@ fn node_test() { assert_eq!((*parsed_node).type_, pm_node_type::PM_PROGRAM_NODE as u16); pm_parser_cleanup(parser); - pm_arena_free(arena.as_mut_ptr()); + pm_arena_cleanup(arena.as_mut_ptr()); } } diff --git a/rust/ruby-prism-sys/tests/parser_tests.rs b/rust/ruby-prism-sys/tests/parser_tests.rs index 3d85ef06de..7efb374ae8 100644 --- a/rust/ruby-prism-sys/tests/parser_tests.rs +++ b/rust/ruby-prism-sys/tests/parser_tests.rs @@ -5,7 +5,7 @@ use std::{ }; use ruby_prism_sys::{ - pm_arena_free, pm_arena_t, pm_comment_t, pm_comment_type_t, pm_diagnostic_t, pm_parse, pm_parser_cleanup, + pm_arena_cleanup, pm_arena_t, pm_comment_t, pm_comment_type_t, pm_diagnostic_t, pm_parse, pm_parser_cleanup, pm_parser_init, pm_parser_t, }; @@ -30,7 +30,7 @@ fn init_test() { let parser = parser.assume_init_mut(); pm_parser_cleanup(parser); - pm_arena_free(arena.as_mut_ptr()); + pm_arena_cleanup(arena.as_mut_ptr()); } } @@ -63,7 +63,7 @@ fn comments_test() { assert_eq!(location, 0..7); pm_parser_cleanup(parser); - pm_arena_free(arena.as_mut_ptr()); + pm_arena_cleanup(arena.as_mut_ptr()); } } @@ -102,6 +102,6 @@ fn diagnostics_test() { assert_eq!(location, 10..10); pm_parser_cleanup(parser); - pm_arena_free(arena.as_mut_ptr()); + pm_arena_cleanup(arena.as_mut_ptr()); } } diff --git a/rust/ruby-prism/src/parse_result/mod.rs b/rust/ruby-prism/src/parse_result/mod.rs index 4ac843661e..5e071b515b 100644 --- a/rust/ruby-prism/src/parse_result/mod.rs +++ b/rust/ruby-prism/src/parse_result/mod.rs @@ -8,7 +8,7 @@ mod diagnostics; use std::ptr::NonNull; -use ruby_prism_sys::{pm_arena_free, pm_arena_t, pm_comment_t, pm_diagnostic_t, pm_line_offset_list_line_column, pm_location_t, pm_magic_comment_t, pm_node_t, pm_parser_cleanup, pm_parser_t}; +use ruby_prism_sys::{pm_arena_cleanup, pm_arena_t, pm_comment_t, pm_diagnostic_t, pm_line_offset_list_line_column, pm_location_t, pm_magic_comment_t, pm_node_t, pm_parser_cleanup, pm_parser_t}; pub use self::comments::{Comment, CommentType, Comments, MagicComment, MagicComments}; pub use self::diagnostics::{Diagnostic, Diagnostics}; @@ -262,7 +262,7 @@ impl Drop for ParseResult<'_> { unsafe { pm_parser_cleanup(self.parser.as_ptr()); drop(Box::from_raw(self.parser.as_ptr())); - pm_arena_free(self.arena.as_mut()); + pm_arena_cleanup(self.arena.as_mut()); } } } diff --git a/src/arena.c b/src/arena.c index d0bd6f139c..393725c90f 100644 --- a/src/arena.c +++ b/src/arena.c @@ -85,7 +85,7 @@ pm_arena_alloc_slow(pm_arena_t *arena, size_t size) { * Free all blocks in the arena. */ void -pm_arena_free(pm_arena_t *arena) { +pm_arena_cleanup(pm_arena_t *arena) { pm_arena_block_t *block = arena->current; while (block != NULL) { diff --git a/src/prism.c b/src/prism.c index 4c497d74b6..fb2b67a779 100644 --- a/src/prism.c +++ b/src/prism.c @@ -22501,7 +22501,7 @@ pm_parser_new(pm_arena_t *arena, const uint8_t *source, size_t size, const pm_op void pm_parser_cleanup(pm_parser_t *parser) { pm_string_cleanup(&parser->filepath); - pm_arena_free(&parser->metadata_arena); + pm_arena_cleanup(&parser->metadata_arena); while (parser->current_scope != NULL) { // Normally, popping the scope doesn't free the locals since it is @@ -22768,7 +22768,7 @@ pm_parse_stream(pm_parser_t **parser, pm_arena_t *arena, pm_buffer_t *buffer, vo eof = pm_parse_stream_read(buffer, stream, stream_fgets, stream_feof); pm_parser_free(tmp); - pm_arena_free(arena); + pm_arena_cleanup(arena); tmp = pm_parser_new(arena, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options); node = pm_parse(tmp); @@ -22794,7 +22794,7 @@ pm_parse_success_p(const uint8_t *source, size_t size, const char *data) { bool result = parser.error_list.size == 0; pm_parser_cleanup(&parser); - pm_arena_free(&arena); + pm_arena_cleanup(&arena); pm_options_cleanup(&options); return result; @@ -22849,7 +22849,7 @@ pm_serialize_parse(pm_buffer_t *buffer, const uint8_t *source, size_t size, cons pm_buffer_append_byte(buffer, '\0'); pm_parser_cleanup(&parser); - pm_arena_free(&arena); + pm_arena_cleanup(&arena); pm_options_cleanup(&options); } @@ -22872,7 +22872,7 @@ pm_serialize_parse_stream(pm_buffer_t *buffer, void *stream, pm_parse_stream_fge pm_buffer_free(parser_buffer); pm_parser_free(parser); - pm_arena_free(&arena); + pm_arena_cleanup(&arena); pm_options_cleanup(&options); } @@ -22895,7 +22895,7 @@ pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t s pm_serialize_comment_list(&parser.comment_list, buffer); pm_parser_cleanup(&parser); - pm_arena_free(&arena); + pm_arena_cleanup(&arena); pm_options_cleanup(&options); } diff --git a/templates/src/serialize.c.erb b/templates/src/serialize.c.erb index 405332e339..c92300b33d 100644 --- a/templates/src/serialize.c.erb +++ b/templates/src/serialize.c.erb @@ -345,7 +345,7 @@ pm_serialize_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, const pm_serialize_metadata(&parser, buffer); pm_parser_cleanup(&parser); - pm_arena_free(&arena); + pm_arena_cleanup(&arena); pm_options_cleanup(&options); } @@ -369,7 +369,7 @@ pm_serialize_parse_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, pm_serialize(&parser, node, buffer); pm_parser_cleanup(&parser); - pm_arena_free(&arena); + pm_arena_cleanup(&arena); pm_options_cleanup(&options); } From 93085a52d999d2e67168d0560f89ba5f75336c08 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 18 Mar 2026 13:12:15 -0400 Subject: [PATCH 070/100] Make arena fully opaque --- ext/prism/extension.c | 42 ++++++++++++++++---------------- include/prism/arena.h | 43 +++++++++------------------------ include/prism/internal/arena.h | 41 +++++++++++++++++++++++++++++++ include/prism/internal/parser.h | 2 +- src/arena.c | 21 +++++++++++++++- 5 files changed, 95 insertions(+), 54 deletions(-) diff --git a/ext/prism/extension.c b/ext/prism/extension.c index 0606b9b238..2c64ff14ed 100644 --- a/ext/prism/extension.c +++ b/ext/prism/extension.c @@ -372,8 +372,8 @@ dump_input(pm_string_t *input, const pm_options_t *options) { rb_raise(rb_eNoMemError, "failed to allocate memory"); } - pm_arena_t arena = { 0 }; - pm_parser_t *parser = pm_parser_new(&arena, pm_string_source(input), pm_string_length(input), options); + pm_arena_t *arena = pm_arena_new(); + pm_parser_t *parser = pm_parser_new(arena, pm_string_source(input), pm_string_length(input), options); pm_node_t *node = pm_parse(parser); pm_serialize(parser, node, buffer); @@ -381,7 +381,7 @@ dump_input(pm_string_t *input, const pm_options_t *options) { VALUE result = rb_str_new(pm_buffer_value(buffer), pm_buffer_length(buffer)); pm_buffer_free(buffer); pm_parser_free(parser); - pm_arena_cleanup(&arena); + pm_arena_free(arena); return result; } @@ -777,8 +777,8 @@ parse_lex_encoding_changed_callback(pm_parser_t *parser) { */ static VALUE parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nodes) { - pm_arena_t arena = { 0 }; - pm_parser_t *parser = pm_parser_new(&arena, pm_string_source(input), pm_string_length(input), options); + pm_arena_t *arena = pm_arena_new(); + pm_parser_t *parser = pm_parser_new(arena, pm_string_source(input), pm_string_length(input), options); pm_parser_encoding_changed_callback_set(parser, parse_lex_encoding_changed_callback); VALUE source_string = rb_str_new((const char *) pm_string_source(input), pm_string_length(input)); @@ -828,7 +828,7 @@ parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nod } pm_parser_free(parser); - pm_arena_cleanup(&arena); + pm_arena_free(arena); return result; } @@ -886,8 +886,8 @@ lex_file(int argc, VALUE *argv, VALUE self) { */ static VALUE parse_input(pm_string_t *input, const pm_options_t *options) { - pm_arena_t arena = { 0 }; - pm_parser_t *parser = pm_parser_new(&arena, pm_string_source(input), pm_string_length(input), options); + pm_arena_t *arena = pm_arena_new(); + pm_parser_t *parser = pm_parser_new(arena, pm_string_source(input), pm_string_length(input), options); pm_node_t *node = pm_parse(parser); rb_encoding *encoding = rb_enc_find(pm_parser_encoding_name(parser)); @@ -902,7 +902,7 @@ parse_input(pm_string_t *input, const pm_options_t *options) { } pm_parser_free(parser); - pm_arena_cleanup(&arena); + pm_arena_free(arena); return result; } @@ -1004,12 +1004,12 @@ parse_file(int argc, VALUE *argv, VALUE self) { */ static void profile_input(pm_string_t *input, const pm_options_t *options) { - pm_arena_t arena = { 0 }; - pm_parser_t *parser = pm_parser_new(&arena, pm_string_source(input), pm_string_length(input), options); + pm_arena_t *arena = pm_arena_new(); + pm_parser_t *parser = pm_parser_new(arena, pm_string_source(input), pm_string_length(input), options); pm_parse(parser); pm_parser_free(parser); - pm_arena_cleanup(&arena); + pm_arena_free(arena); } /** @@ -1104,11 +1104,11 @@ parse_stream(int argc, VALUE *argv, VALUE self) { pm_options_t *options = pm_options_new(); extract_options(options, Qnil, keywords); - pm_arena_t arena = { 0 }; + pm_arena_t *arena = pm_arena_new(); pm_parser_t *parser; pm_buffer_t *buffer = pm_buffer_new(); - pm_node_t *node = pm_parse_stream(&parser, &arena, buffer, (void *) stream, parse_stream_fgets, parse_stream_eof, options); + pm_node_t *node = pm_parse_stream(&parser, arena, buffer, (void *) stream, parse_stream_fgets, parse_stream_eof, options); rb_encoding *encoding = rb_enc_find(pm_parser_encoding_name(parser)); VALUE source = pm_source_new(parser, encoding, pm_options_freeze_get(options)); @@ -1117,7 +1117,7 @@ parse_stream(int argc, VALUE *argv, VALUE self) { pm_buffer_free(buffer); pm_parser_free(parser); - pm_arena_cleanup(&arena); + pm_arena_free(arena); pm_options_free(options); return result; @@ -1128,8 +1128,8 @@ parse_stream(int argc, VALUE *argv, VALUE self) { */ static VALUE parse_input_comments(pm_string_t *input, const pm_options_t *options) { - pm_arena_t arena = { 0 }; - pm_parser_t *parser = pm_parser_new(&arena, pm_string_source(input), pm_string_length(input), options); + pm_arena_t *arena = pm_arena_new(); + pm_parser_t *parser = pm_parser_new(arena, pm_string_source(input), pm_string_length(input), options); pm_parse(parser); rb_encoding *encoding = rb_enc_find(pm_parser_encoding_name(parser)); @@ -1138,7 +1138,7 @@ parse_input_comments(pm_string_t *input, const pm_options_t *options) { VALUE comments = parser_comments(parser, source, pm_options_freeze_get(options)); pm_parser_free(parser); - pm_arena_cleanup(&arena); + pm_arena_free(arena); return comments; } @@ -1250,14 +1250,14 @@ parse_lex_file(int argc, VALUE *argv, VALUE self) { */ static VALUE parse_input_success_p(pm_string_t *input, const pm_options_t *options) { - pm_arena_t arena = { 0 }; - pm_parser_t *parser = pm_parser_new(&arena, pm_string_source(input), pm_string_length(input), options); + pm_arena_t *arena = pm_arena_new(); + pm_parser_t *parser = pm_parser_new(arena, pm_string_source(input), pm_string_length(input), options); pm_parse(parser); VALUE result = pm_parser_errors_size(parser) == 0 ? Qtrue : Qfalse; pm_parser_free(parser); - pm_arena_cleanup(&arena); + pm_arena_free(arena); return result; } diff --git a/include/prism/arena.h b/include/prism/arena.h index 303ccc554b..1b1729bb24 100644 --- a/include/prism/arena.h +++ b/include/prism/arena.h @@ -7,48 +7,29 @@ #define PRISM_ARENA_H #include "prism/compiler/exported.h" -#include "prism/compiler/flex_array.h" -#include "prism/compiler/force_inline.h" #include /** - * A single block of memory in the arena. Blocks are linked via prev pointers so - * they can be freed by walking the chain. + * An opaque pointer to an arena that is used for allocations. */ -typedef struct pm_arena_block { - /** The previous block in the chain (for freeing). */ - struct pm_arena_block *prev; - - /** The total usable bytes in data[]. */ - size_t capacity; - - /** The number of bytes consumed so far. */ - size_t used; - - /** The block's data. */ - char data[PM_FLEX_ARRAY_LENGTH]; -} pm_arena_block_t; +typedef struct pm_arena_t pm_arena_t; /** - * A bump allocator. Allocations are made by bumping a pointer within the - * current block. When a block is full, a new block is allocated and linked to - * the previous one. All blocks are freed at once by walking the chain. + * Returns a newly allocated and initialized arena. If the arena cannot be + * allocated, this function aborts the process. + * + * @return A pointer to the newly allocated arena. It is the responsibility of + * the caller to free the arena using pm_arena_free when it is no longer + * needed. */ -typedef struct { - /** The active block (allocate from here). */ - pm_arena_block_t *current; - - /** The number of blocks allocated. */ - size_t block_count; -} pm_arena_t; +PRISM_EXPORTED_FUNCTION pm_arena_t * pm_arena_new(void); /** - * Free all blocks in the arena. After this call, all pointers returned by - * pm_arena_alloc and pm_arena_zalloc are invalid. + * Frees both the held memory and the arena itself. * - * @param arena The arena whose held memory should be freed. + * @param arena The arena to free. */ -PRISM_EXPORTED_FUNCTION void pm_arena_cleanup(pm_arena_t *arena); +PRISM_EXPORTED_FUNCTION void pm_arena_free(pm_arena_t *arena); #endif diff --git a/include/prism/internal/arena.h b/include/prism/internal/arena.h index 1a393f7bcb..54bbead6bd 100644 --- a/include/prism/internal/arena.h +++ b/include/prism/internal/arena.h @@ -7,6 +7,8 @@ #define PRISM_INTERNAL_ARENA_H #include "prism/compiler/exported.h" +#include "prism/compiler/flex_array.h" +#include "prism/compiler/force_inline.h" #include "prism/compiler/inline.h" #include "prism/arena.h" @@ -14,6 +16,45 @@ #include #include +/** + * A single block of memory in the arena. Blocks are linked via prev pointers so + * they can be freed by walking the chain. + */ +typedef struct pm_arena_block { + /** The previous block in the chain (for freeing). */ + struct pm_arena_block *prev; + + /** The total usable bytes in data[]. */ + size_t capacity; + + /** The number of bytes consumed so far. */ + size_t used; + + /** The block's data. */ + char data[PM_FLEX_ARRAY_LENGTH]; +} pm_arena_block_t; + +/** + * A bump allocator. Allocations are made by bumping a pointer within the + * current block. When a block is full, a new block is allocated and linked to + * the previous one. All blocks are freed at once by walking the chain. + */ +struct pm_arena_t { + /** The active block (allocate from here). */ + pm_arena_block_t *current; + + /** The number of blocks allocated. */ + size_t block_count; +}; + +/** + * Free all blocks in the arena. After this call, all pointers returned by + * pm_arena_alloc and pm_arena_zalloc are invalid. + * + * @param arena The arena whose held memory should be freed. + */ +void pm_arena_cleanup(pm_arena_t *arena); + /** * Ensure the arena has at least `capacity` bytes available in its current * block, allocating a new block if necessary. This allows callers to diff --git a/include/prism/internal/parser.h b/include/prism/internal/parser.h index f6618f67ed..7ea8e9c9cf 100644 --- a/include/prism/internal/parser.h +++ b/include/prism/internal/parser.h @@ -8,12 +8,12 @@ #include "prism/compiler/accel.h" +#include "prism/internal/arena.h" #include "prism/internal/encoding.h" #include "prism/internal/list.h" #include "prism/internal/options.h" #include "prism/internal/static_literals.h" -#include "prism/arena.h" #include "prism/ast.h" #include "prism/line_offset_list.h" #include "prism/parser.h" diff --git a/src/arena.c b/src/arena.c index 393725c90f..dd998402f3 100644 --- a/src/arena.c +++ b/src/arena.c @@ -1,4 +1,4 @@ -#include "prism/arena.h" +#include "prism/internal/arena.h" #include "prism/internal/allocator.h" @@ -81,6 +81,16 @@ pm_arena_alloc_slow(pm_arena_t *arena, size_t size) { return block->data; } +/** + * Returns a newly allocated and initialized arena. + */ +pm_arena_t * +pm_arena_new(void) { + pm_arena_t *arena = (pm_arena_t *) xcalloc(1, sizeof(pm_arena_t)); + if (arena == NULL) abort(); + return arena; +} + /** * Free all blocks in the arena. */ @@ -96,3 +106,12 @@ pm_arena_cleanup(pm_arena_t *arena) { *arena = (pm_arena_t) { 0 }; } + +/** + * Frees both the held memory and the arena itself. + */ +void +pm_arena_free(pm_arena_t *arena) { + pm_arena_cleanup(arena); + xfree(arena); +} From 250b2c9ac523a0d04ccaec9a81c0ddb4a85155c9 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 18 Mar 2026 13:30:51 -0400 Subject: [PATCH 071/100] Make the constant pool fully opaque --- include/prism/constant_pool.h | 72 +++++++------------------- include/prism/internal/constant_pool.h | 60 +++++++++++++++++++++ include/prism/internal/parser.h | 1 + include/prism/parser.h | 38 +++++++++++--- src/comments.c | 3 -- src/constant_pool.c | 14 +++++ src/magic_comments.c | 3 -- src/parser.c | 29 ++++++++--- templates/ext/prism/api_node.c.erb | 37 +++++++------ 9 files changed, 166 insertions(+), 91 deletions(-) diff --git a/include/prism/constant_pool.h b/include/prism/constant_pool.h index b1db33f8e3..08a06d1612 100644 --- a/include/prism/constant_pool.h +++ b/include/prism/constant_pool.h @@ -10,6 +10,8 @@ #ifndef PRISM_CONSTANT_POOL_H #define PRISM_CONSTANT_POOL_H +#include "prism/compiler/exported.h" + #include #include @@ -32,64 +34,26 @@ typedef struct { pm_constant_id_t *ids; } pm_constant_id_list_t; -/** - * The type of bucket in the constant pool hash map. This determines how the - * bucket should be freed. - */ -typedef unsigned int pm_constant_pool_bucket_type_t; - -/** By default, each constant is a slice of the source. */ -static const pm_constant_pool_bucket_type_t PM_CONSTANT_POOL_BUCKET_DEFAULT = 0; - -/** An owned constant is one for which memory has been allocated. */ -static const pm_constant_pool_bucket_type_t PM_CONSTANT_POOL_BUCKET_OWNED = 1; - -/** A constant constant is known at compile time. */ -static const pm_constant_pool_bucket_type_t PM_CONSTANT_POOL_BUCKET_CONSTANT = 2; - -/** A bucket in the hash map. */ -typedef struct { - /** The incremental ID used for indexing back into the pool. */ - unsigned int id: 30; - - /** The type of the bucket, which determines how to free it. */ - pm_constant_pool_bucket_type_t type: 2; - - /** The hash of the bucket. */ - uint32_t hash; - - /** - * A pointer to the start of the string, stored directly in the bucket to - * avoid a pointer chase to the constants array during probing. - */ - const uint8_t *start; - - /** The length of the string. */ - size_t length; -} pm_constant_pool_bucket_t; - /** A constant in the pool which effectively stores a string. */ -typedef struct { - /** A pointer to the start of the string. */ - const uint8_t *start; - - /** The length of the string. */ - size_t length; -} pm_constant_t; +typedef struct pm_constant_t pm_constant_t; /** The overall constant pool, which stores constants found while parsing. */ -typedef struct { - /** The buckets in the hash map. */ - pm_constant_pool_bucket_t *buckets; - - /** The constants that are stored in the buckets. */ - pm_constant_t *constants; +typedef struct pm_constant_pool_t pm_constant_pool_t; - /** The number of buckets in the hash map. */ - uint32_t size; +/** + * Return a raw pointer to the start of a constant. + * + * @param constant The constant to get the start of. + * @return A raw pointer to the start of the constant. + */ +PRISM_EXPORTED_FUNCTION const uint8_t * pm_constant_start(const pm_constant_t *constant); - /** The number of buckets that have been allocated in the hash map. */ - uint32_t capacity; -} pm_constant_pool_t; +/** + * Return the length of a constant. + * + * @param constant The constant to get the length of. + * @return The length of the constant. + */ +PRISM_EXPORTED_FUNCTION size_t pm_constant_length(const pm_constant_t *constant); #endif diff --git a/include/prism/internal/constant_pool.h b/include/prism/internal/constant_pool.h index 9e7d3cd74b..68d7d63203 100644 --- a/include/prism/internal/constant_pool.h +++ b/include/prism/internal/constant_pool.h @@ -16,6 +16,66 @@ #include +/** A constant in the pool which effectively stores a string. */ +struct pm_constant_t { + /** A pointer to the start of the string. */ + const uint8_t *start; + + /** The length of the string. */ + size_t length; +}; + +/** + * The type of bucket in the constant pool hash map. This determines how the + * bucket should be freed. + */ +typedef unsigned int pm_constant_pool_bucket_type_t; + +/** By default, each constant is a slice of the source. */ +static const pm_constant_pool_bucket_type_t PM_CONSTANT_POOL_BUCKET_DEFAULT = 0; + +/** An owned constant is one for which memory has been allocated. */ +static const pm_constant_pool_bucket_type_t PM_CONSTANT_POOL_BUCKET_OWNED = 1; + +/** A constant constant is known at compile time. */ +static const pm_constant_pool_bucket_type_t PM_CONSTANT_POOL_BUCKET_CONSTANT = 2; + +/** A bucket in the hash map. */ +typedef struct { + /** The incremental ID used for indexing back into the pool. */ + unsigned int id: 30; + + /** The type of the bucket, which determines how to free it. */ + pm_constant_pool_bucket_type_t type: 2; + + /** The hash of the bucket. */ + uint32_t hash; + + /** + * A pointer to the start of the string, stored directly in the bucket to + * avoid a pointer chase to the constants array during probing. + */ + const uint8_t *start; + + /** The length of the string. */ + size_t length; +} pm_constant_pool_bucket_t; + +/** The overall constant pool, which stores constants found while parsing. */ +struct pm_constant_pool_t { + /** The buckets in the hash map. */ + pm_constant_pool_bucket_t *buckets; + + /** The constants that are stored in the buckets. */ + pm_constant_t *constants; + + /** The number of buckets in the hash map. */ + uint32_t size; + + /** The number of buckets that have been allocated in the hash map. */ + uint32_t capacity; +}; + /** * When we allocate constants into the pool, we reserve 0 to mean that the slot * is not yet filled. This constant is reused in other places to indicate the diff --git a/include/prism/internal/parser.h b/include/prism/internal/parser.h index 7ea8e9c9cf..354b5adef4 100644 --- a/include/prism/internal/parser.h +++ b/include/prism/internal/parser.h @@ -9,6 +9,7 @@ #include "prism/compiler/accel.h" #include "prism/internal/arena.h" +#include "prism/internal/constant_pool.h" #include "prism/internal/encoding.h" #include "prism/internal/list.h" #include "prism/internal/options.h" diff --git a/include/prism/parser.h b/include/prism/parser.h index f15ddd8901..1f0b7fc0f3 100644 --- a/include/prism/parser.h +++ b/include/prism/parser.h @@ -99,14 +99,6 @@ PRISM_EXPORTED_FUNCTION const char * pm_parser_encoding_name(const pm_parser_t * */ PRISM_EXPORTED_FUNCTION const pm_line_offset_list_t * pm_parser_line_offsets(const pm_parser_t *parser); -/** - * Returns the constant pool associated with the given parser. - * - * @param parser the parser whose constant pool we want to get - * @return the constant pool associated with the given parser - */ -PRISM_EXPORTED_FUNCTION const pm_constant_pool_t * pm_parser_constant_pool(const pm_parser_t *parser); - /** * Returns the location of the __DATA__ section that is associated with the * given parser. @@ -237,4 +229,34 @@ PRISM_EXPORTED_FUNCTION void pm_parser_errors_each(const pm_parser_t *parser, pm */ PRISM_EXPORTED_FUNCTION void pm_parser_warnings_each(const pm_parser_t *parser, pm_diagnostic_callback_t callback, void *data); +/** + * Returns the number of constants in the constant pool associated with the + * given parser. + * + * @param parser the parser whose constant pool constants we want to get the + * size of + * @return the number of constants in the constant pool associated with the + * given parser + */ +PRISM_EXPORTED_FUNCTION size_t pm_parser_constants_size(const pm_parser_t *parser); + +/** + * A callback function that can be used to process constants found while + * parsing. + */ +typedef void (*pm_constant_callback_t)(const pm_constant_t *constant, void *data); + +/** + * Iterates over the constants in the constant pool associated with the given + * parser and calls the given callback for each constant. + * + * @param parser the parser whose constants we want to iterate over + * @param callback the callback function to call for each constant. This function + * will be passed a pointer to the constant and the data parameter passed to + * this function. + * @param data the data to pass to the callback function for each constant. This + * can be NULL if no data needs to be passed to the callback function. + */ +PRISM_EXPORTED_FUNCTION void pm_parser_constants_each(const pm_parser_t *parser, pm_constant_callback_t callback, void *data); + #endif diff --git a/src/comments.c b/src/comments.c index be535d2f7a..5c2898a43f 100644 --- a/src/comments.c +++ b/src/comments.c @@ -1,10 +1,7 @@ #include "prism/internal/comments.h" -#include "prism/internal/allocator.h" #include "prism/internal/parser.h" -#include - /** * Returns the location associated with the given comment. */ diff --git a/src/constant_pool.c b/src/constant_pool.c index 3f0baac702..90201ebb8e 100644 --- a/src/constant_pool.c +++ b/src/constant_pool.c @@ -344,3 +344,17 @@ pm_constant_pool_insert_constant(pm_arena_t *arena, pm_constant_pool_t *pool, co return pm_constant_pool_insert(arena, pool, start, length, PM_CONSTANT_POOL_BUCKET_CONSTANT); } +/** + * Return a raw pointer to the start of a constant. + */ +const uint8_t * +pm_constant_start(const pm_constant_t *constant) { + return constant->start; +} + +/** + * Return the length of a constant. + */ +size_t pm_constant_length(const pm_constant_t *constant) { + return constant->length; +} diff --git a/src/magic_comments.c b/src/magic_comments.c index 6648010061..0b2e1d3049 100644 --- a/src/magic_comments.c +++ b/src/magic_comments.c @@ -1,10 +1,7 @@ #include "prism/internal/magic_comments.h" -#include "prism/internal/allocator.h" #include "prism/internal/parser.h" -#include - /** * Returns the location associated with the given magic comment key. */ diff --git a/src/parser.c b/src/parser.c index 27c9641f83..68778487e8 100644 --- a/src/parser.c +++ b/src/parser.c @@ -80,14 +80,6 @@ pm_parser_line_offsets(const pm_parser_t *parser) { return &parser->line_offsets; } -/** - * Returns the constant pool associated with the given parser. - */ -const pm_constant_pool_t * -pm_parser_constant_pool(const pm_parser_t *parser) { - return &parser->constant_pool; -} - /** * Returns the location of the __DATA__ section that is associated with the * given parser, if it exists. @@ -204,3 +196,24 @@ void pm_parser_warnings_each(const pm_parser_t *parser, pm_diagnostic_callback_t callback, void *data) { pm_parser_diagnostics_each(&parser->warning_list, callback, data); } + +/** + * Returns the number of constants in the constant pool associated with the + * given parser. + */ +size_t +pm_parser_constants_size(const pm_parser_t *parser) { + return parser->constant_pool.size; +} + +/** + * Iterates over the constants in the constant pool associated with the given + * parser and calls the given callback for each constant. + */ +void +pm_parser_constants_each(const pm_parser_t *parser, pm_constant_callback_t callback, void *data) { + for (uint32_t index = 0; index < parser->constant_pool.size; index++) { + const pm_constant_t *constant = &parser->constant_pool.constants[index]; + callback(constant, data); + } +} diff --git a/templates/ext/prism/api_node.c.erb b/templates/ext/prism/api_node.c.erb index fffb2bebb5..ca793b471c 100644 --- a/templates/ext/prism/api_node.c.erb +++ b/templates/ext/prism/api_node.c.erb @@ -122,26 +122,33 @@ pm_node_stack_pop(pm_node_stack_node_t **stack) { return visit; } -VALUE -pm_ast_new(const pm_parser_t *parser, const pm_node_t *node, rb_encoding *encoding, VALUE source, bool freeze) { - const pm_constant_pool_t *constant_pool = pm_parser_constant_pool(parser); - VALUE constants = rb_ary_new_capa(constant_pool->size); - - for (uint32_t index = 0; index < constant_pool->size; index++) { - pm_constant_t *constant = &constant_pool->constants[index]; - int state = 0; +typedef struct { + VALUE constants; + rb_encoding *encoding; +} pm_ast_constants_each_data_t; - VALUE string = rb_enc_str_new((const char *) constant->start, constant->length, encoding); - VALUE value = rb_protect(rb_str_intern, string, &state); +static void +pm_ast_constants_each(const pm_constant_t *constant, void *data) { + pm_ast_constants_each_data_t *constants_data = (pm_ast_constants_each_data_t *) data; + int state = 0; - if (state != 0) { - value = ID2SYM(rb_intern_const("?")); - rb_set_errinfo(Qnil); - } + VALUE string = rb_enc_str_new((const char *) pm_constant_start(constant), pm_constant_length(constant), constants_data->encoding); + VALUE value = rb_protect(rb_str_intern, string, &state); - rb_ary_push(constants, value); + if (state != 0) { + value = ID2SYM(rb_intern_const("?")); + rb_set_errinfo(Qnil); } + rb_ary_push(constants_data->constants, value); +} + +VALUE +pm_ast_new(const pm_parser_t *parser, const pm_node_t *node, rb_encoding *encoding, VALUE source, bool freeze) { + VALUE constants = rb_ary_new_capa(pm_parser_constants_size(parser)); + pm_ast_constants_each_data_t constants_data = { .constants = constants, .encoding = encoding }; + pm_parser_constants_each(parser, pm_ast_constants_each, &constants_data); + pm_node_stack_node_t *node_stack = NULL; pm_node_stack_push(&node_stack, node); VALUE value_stack = rb_ary_new(); From fb0d1369f423310fb2297de20a67faedd1ab22c8 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 18 Mar 2026 13:32:38 -0400 Subject: [PATCH 072/100] Inline comments and magic comments, they do not need their own TUs --- prism.gemspec | 2 -- src/comments.c | 19 ------------------- src/magic_comments.c | 19 ------------------- src/parser.c | 32 ++++++++++++++++++++++++++++++++ 4 files changed, 32 insertions(+), 40 deletions(-) delete mode 100644 src/comments.c delete mode 100644 src/magic_comments.c diff --git a/prism.gemspec b/prism.gemspec index 41eb9888ca..d3fd8da546 100644 --- a/prism.gemspec +++ b/prism.gemspec @@ -189,14 +189,12 @@ Gem::Specification.new do |spec| "src/arena.c", "src/buffer.c", "src/char.c", - "src/comments.c", "src/constant_pool.c", "src/diagnostic.c", "src/encoding.c", "src/integer.c", "src/line_offset_list.c", "src/list.c", - "src/magic_comments.c", "src/memchr.c", "src/node.c", "src/options.c", diff --git a/src/comments.c b/src/comments.c deleted file mode 100644 index 5c2898a43f..0000000000 --- a/src/comments.c +++ /dev/null @@ -1,19 +0,0 @@ -#include "prism/internal/comments.h" - -#include "prism/internal/parser.h" - -/** - * Returns the location associated with the given comment. - */ -pm_location_t -pm_comment_location(const pm_comment_t *comment) { - return comment->location; -} - -/** - * Returns the type associated with the given comment. - */ -pm_comment_type_t -pm_comment_type(const pm_comment_t *comment) { - return comment->type; -} diff --git a/src/magic_comments.c b/src/magic_comments.c deleted file mode 100644 index 0b2e1d3049..0000000000 --- a/src/magic_comments.c +++ /dev/null @@ -1,19 +0,0 @@ -#include "prism/internal/magic_comments.h" - -#include "prism/internal/parser.h" - -/** - * Returns the location associated with the given magic comment key. - */ -pm_location_t -pm_magic_comment_key(const pm_magic_comment_t *magic_comment) { - return magic_comment->key; -} - -/** - * Returns the location associated with the given magic comment value. - */ -pm_location_t -pm_magic_comment_value(const pm_magic_comment_t *magic_comment) { - return magic_comment->value; -} diff --git a/src/parser.c b/src/parser.c index 68778487e8..b61fb6ce03 100644 --- a/src/parser.c +++ b/src/parser.c @@ -109,6 +109,22 @@ pm_parser_lex_state(const pm_parser_t *parser) { return (int) parser->lex_state; } +/** + * Returns the location associated with the given comment. + */ +pm_location_t +pm_comment_location(const pm_comment_t *comment) { + return comment->location; +} + +/** + * Returns the type associated with the given comment. + */ +pm_comment_type_t +pm_comment_type(const pm_comment_t *comment) { + return comment->type; +} + /** * Returns the number of comments associated with the given parser. */ @@ -131,6 +147,22 @@ pm_parser_comments_each(const pm_parser_t *parser, pm_comment_callback_t callbac } } +/** + * Returns the location associated with the given magic comment key. + */ +pm_location_t +pm_magic_comment_key(const pm_magic_comment_t *magic_comment) { + return magic_comment->key; +} + +/** + * Returns the location associated with the given magic comment value. + */ +pm_location_t +pm_magic_comment_value(const pm_magic_comment_t *magic_comment) { + return magic_comment->value; +} + /** * Returns the number of magic comments associated with the given parser. */ From aa84fd5f5311c02abc0203f0d963f361b963d493 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 18 Mar 2026 13:36:32 -0400 Subject: [PATCH 073/100] Cleanup --- cpp/test.cpp | 6 +++--- include/prism.h | 14 +++++++------- include/prism/magic_comments.h | 35 ---------------------------------- include/prism/node.h | 6 +++--- 4 files changed, 13 insertions(+), 48 deletions(-) diff --git a/cpp/test.cpp b/cpp/test.cpp index 916d10381d..3d21c99769 100644 --- a/cpp/test.cpp +++ b/cpp/test.cpp @@ -5,8 +5,8 @@ extern "C" { #include int main() { - pm_arena_t arena = { 0 }; - pm_parser_t *parser = pm_parser_new(&arena, reinterpret_cast("1 + 2"), 5, NULL); + pm_arena_t *arena = pm_arena_new(); + pm_parser_t *parser = pm_parser_new(arena, reinterpret_cast("1 + 2"), 5, NULL); pm_node_t *root = pm_parse(parser); pm_buffer_t *buffer = pm_buffer_new(); @@ -18,7 +18,7 @@ int main() { pm_buffer_free(buffer); pm_parser_free(parser); - pm_arena_cleanup(&arena); + pm_arena_free(arena); return 0; } diff --git a/include/prism.h b/include/prism.h index 5a8b835978..64c1657113 100644 --- a/include/prism.h +++ b/include/prism.h @@ -260,20 +260,20 @@ PRISM_EXPORTED_FUNCTION void pm_dump_json(pm_buffer_t *buffer, const pm_parser_t * * `pm_parser_new()` - allocate and initialize a new parser * * `pm_parse()` - parse and return the root node * * `pm_parser_free()` - free the parser and its internal memory - * * `pm_arena_cleanup()` - free all AST-lifetime memory + * * `pm_arena_free()` - free all AST-lifetime memory * * Putting all of this together would look something like: * * ```c * void parse(const uint8_t *source, size_t length) { - * pm_arena_t arena = { 0 }; - * pm_parser_t *parser = pm_parser_new(&arena, source, length, NULL); + * pm_arena_t *arena = pm_arena_new(); + * pm_parser_t *parser = pm_parser_new(arena, source, length, NULL); * * pm_node_t *root = pm_parse(parser); * printf("PARSED!\n"); * * pm_parser_free(parser); - * pm_arena_cleanup(&arena); + * pm_arena_free(arena); * } * ``` * @@ -315,8 +315,8 @@ PRISM_EXPORTED_FUNCTION void pm_dump_json(pm_buffer_t *buffer, const pm_parser_t * * ```c * void prettyprint(const uint8_t *source, size_t length) { - * pm_arena_t arena = { 0 }; - * pm_parser_t *parser = pm_parser_new(&arena, source, length, NULL); + * pm_arena_t *arena = pm_arena_new(); + * pm_parser_t *parser = pm_parser_new(arena, source, length, NULL); * * pm_node_t *root = pm_parse(parser); * pm_buffer_t *buffer = pm_buffer_new(); @@ -326,7 +326,7 @@ PRISM_EXPORTED_FUNCTION void pm_dump_json(pm_buffer_t *buffer, const pm_parser_t * * pm_buffer_free(buffer); * pm_parser_free(parser); - * pm_arena_cleanup(&arena); + * pm_arena_free(arena); * } * ``` */ diff --git a/include/prism/magic_comments.h b/include/prism/magic_comments.h index 10f9cfa32d..a19ecedb74 100644 --- a/include/prism/magic_comments.h +++ b/include/prism/magic_comments.h @@ -29,39 +29,4 @@ PRISM_EXPORTED_FUNCTION pm_location_t pm_magic_comment_key(const pm_magic_commen */ PRISM_EXPORTED_FUNCTION pm_location_t pm_magic_comment_value(const pm_magic_comment_t *comment); -/* An opaque pointer to an iterator that can be used to iterate over the - * magic comments associated with a parser. */ -typedef struct pm_magic_comments_iter_t pm_magic_comments_iter_t; - -/** - * Returns the number of magic comments associated with the magic comments iterator. - * - * @param iter the iterator to get the number of magic comments from - * @return the number of magic comments associated with the magic comments iterator - * - * \public \memberof pm_magic_comments_iter_t - */ -PRISM_EXPORTED_FUNCTION size_t pm_magic_comments_iter_size(const pm_magic_comments_iter_t *iter); - -/** - * Returns the next magic comment in the iteration, or NULL if there are no more - * magic comments. - * - * @param iter the iterator to get the next magic comment from - * @return the next magic comment in the iteration, or NULL if there are no more - * magic comments. - * - * \public \memberof pm_magic_comments_iter_t - */ -PRISM_EXPORTED_FUNCTION const pm_magic_comment_t * pm_magic_comments_iter_next(pm_magic_comments_iter_t *iter); - -/** - * Frees the memory associated with the given magic comments iterator. - * - * @param iter the iterator to free - * - * \public \memberof pm_magic_comments_iter_t - */ -PRISM_EXPORTED_FUNCTION void pm_magic_comments_iter_free(pm_magic_comments_iter_t *iter); - #endif diff --git a/include/prism/node.h b/include/prism/node.h index bfda2a6a52..5056bfbb26 100644 --- a/include/prism/node.h +++ b/include/prism/node.h @@ -54,10 +54,10 @@ PRISM_EXPORTED_FUNCTION const char * pm_node_type_to_str(pm_node_type_t node_typ * const char *source = "1 + 2; 3 + 4"; * size_t size = strlen(source); * - * pm_arena_t arena = { 0 }; + * pm_arena_t *arena = pm_arena_new(); * pm_options_t *options = pm_options_new(); * - * pm_parser_t *parser = pm_parser_new(&arena, (const uint8_t *) source, size, options); + * pm_parser_t *parser = pm_parser_new(arena, (const uint8_t *) source, size, options); * * size_t indent = 0; * pm_node_t *node = pm_parse(parser); @@ -67,7 +67,7 @@ PRISM_EXPORTED_FUNCTION const char * pm_node_type_to_str(pm_node_type_t node_typ * * pm_parser_free(parser); * pm_options_free(options); - * pm_arena_cleanup(&arena); + * pm_arena_free(arena); * * return EXIT_SUCCESS; * } From 879139efc9e2e153250b0a306752f40ff11e5263 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 18 Mar 2026 13:43:18 -0400 Subject: [PATCH 074/100] pm_parser_init and pm_parser_cleanup -> internal --- include/prism.h | 38 ++++++--------------------------- include/prism/internal/parser.h | 23 ++++++++++++++++++++ 2 files changed, 29 insertions(+), 32 deletions(-) diff --git a/include/prism.h b/include/prism.h index 64c1657113..2fdb048519 100644 --- a/include/prism.h +++ b/include/prism.h @@ -28,21 +28,6 @@ extern "C" { */ PRISM_EXPORTED_FUNCTION const char * pm_version(void); -/** - * Initialize a parser with the given start and end pointers. - * - * @param arena The arena to use for all AST-lifetime allocations. It is caller- - * owned and must outlive the parser. - * @param parser The parser to initialize. - * @param source The source to parse. - * @param size The size of the source. - * @param options The optional options to use when parsing. These options must - * live for the whole lifetime of this parser. - * - * \public \memberof pm_parser - */ -PRISM_EXPORTED_FUNCTION void pm_parser_init(pm_arena_t *arena, pm_parser_t *parser, const uint8_t *source, size_t size, const pm_options_t *options); - /** * Allocate and initialize a parser with the given start and end pointers. * @@ -59,18 +44,6 @@ PRISM_EXPORTED_FUNCTION void pm_parser_init(pm_arena_t *arena, pm_parser_t *pars */ PRISM_EXPORTED_FUNCTION pm_parser_t * pm_parser_new(pm_arena_t *arena, const uint8_t *source, size_t size, const pm_options_t *options); -/** - * Free the memory held by the given parser. - * - * This does not free the `pm_options_t` object that was used to initialize the - * parser. - * - * @param parser The parser whose held memory should be freed. - * - * \public \memberof pm_parser - */ -PRISM_EXPORTED_FUNCTION void pm_parser_cleanup(pm_parser_t *parser); - /** * Free both the memory held by the given parser and the parser itself. * @@ -120,9 +93,9 @@ typedef int (pm_parse_stream_feof_t)(void *stream); */ PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse_stream(pm_parser_t **parser, pm_arena_t *arena, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const pm_options_t *options); -// We optionally support serializing to a binary string. For systems that don't -// want or need this functionality, it can be turned off with the -// PRISM_EXCLUDE_SERIALIZATION define. +/* We optionally support serializing to a binary string. For systems that do not + * want or need this functionality, it can be turned off with the + * PRISM_EXCLUDE_SERIALIZATION define. */ #ifndef PRISM_EXCLUDE_SERIALIZATION /** @@ -215,8 +188,9 @@ PRISM_EXPORTED_FUNCTION const char * pm_token_type_name(pm_token_type_t token_ty */ const char * pm_token_type_human(pm_token_type_t token_type); -// We optionally support dumping to JSON. For systems that don't want or need -// this functionality, it can be turned off with the PRISM_EXCLUDE_JSON define. +/* We optionally support dumping to JSON. For systems that don't want or need + * this functionality, it can be turned off with the PRISM_EXCLUDE_JSON define. + */ #ifndef PRISM_EXCLUDE_JSON /** diff --git a/include/prism/internal/parser.h b/include/prism/internal/parser.h index 354b5adef4..c731e629db 100644 --- a/include/prism/internal/parser.h +++ b/include/prism/internal/parser.h @@ -934,4 +934,27 @@ struct pm_parser_t { #endif }; +/** + * Initialize a parser with the given start and end pointers. + * + * @param arena The arena to use for all AST-lifetime allocations. It is caller- + * owned and must outlive the parser. + * @param parser The parser to initialize. + * @param source The source to parse. + * @param size The size of the source. + * @param options The optional options to use when parsing. These options must + * live for the whole lifetime of this parser. + */ +void pm_parser_init(pm_arena_t *arena, pm_parser_t *parser, const uint8_t *source, size_t size, const pm_options_t *options); + +/** + * Free the memory held by the given parser. + * + * This does not free the `pm_options_t` object that was used to initialize the + * parser. + * + * @param parser The parser whose held memory should be freed. + */ +void pm_parser_cleanup(pm_parser_t *parser); + #endif From 456167dccd910d92bb53ca139567bafcf41fbf86 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 18 Mar 2026 13:55:33 -0400 Subject: [PATCH 075/100] Move even more headers into their own spots --- include/prism.h | 172 ++---------------------------- include/prism/json.h | 31 ++++++ include/prism/parser.h | 26 +++++ include/prism/serialize.h | 86 +++++++++++++++ include/prism/stream.h | 44 ++++++++ src/prism.c | 4 +- templates/include/prism/ast.h.erb | 16 +++ 7 files changed, 213 insertions(+), 166 deletions(-) create mode 100644 include/prism/json.h create mode 100644 include/prism/serialize.h create mode 100644 include/prism/stream.h diff --git a/include/prism.h b/include/prism.h index 2fdb048519..64674d154a 100644 --- a/include/prism.h +++ b/include/prism.h @@ -14,10 +14,13 @@ extern "C" { #include "prism/ast.h" #include "prism/diagnostic.h" #include "prism/excludes.h" +#include "prism/json.h" #include "prism/node.h" #include "prism/options.h" #include "prism/parser.h" #include "prism/prettyprint.h" +#include "prism/serialize.h" +#include "prism/stream.h" #include "prism/string_query.h" #include "prism/version.h" @@ -28,31 +31,6 @@ extern "C" { */ PRISM_EXPORTED_FUNCTION const char * pm_version(void); -/** - * Allocate and initialize a parser with the given start and end pointers. - * - * @param arena The arena to use for all AST-lifetime allocations. It is caller- - * owned and must outlive the parser. - * @param source The source to parse. - * @param size The size of the source. - * @param options The optional options to use when parsing. These options must - * live for the whole lifetime of this parser. - * @return The initialized parser. It is the responsibility of the caller to - * free the parser with `pm_parser_free()`. - * - * \public \memberof pm_parser - */ -PRISM_EXPORTED_FUNCTION pm_parser_t * pm_parser_new(pm_arena_t *arena, const uint8_t *source, size_t size, const pm_options_t *options); - -/** - * Free both the memory held by the given parser and the parser itself. - * - * @param parser The parser to free. - * - * \public \memberof pm_parser - */ -PRISM_EXPORTED_FUNCTION void pm_parser_free(pm_parser_t *parser); - /** * Initiate the parser with the given parser. * @@ -63,105 +41,6 @@ PRISM_EXPORTED_FUNCTION void pm_parser_free(pm_parser_t *parser); */ PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse(pm_parser_t *parser); -/** - * This function is used in pm_parse_stream() to retrieve a line of input from a - * stream. It closely mirrors that of fgets so that fgets can be used as the - * default implementation. - */ -typedef char * (pm_parse_stream_fgets_t)(char *string, int size, void *stream); - -/** - * This function is used in pm_parse_stream to check whether a stream is EOF. - * It closely mirrors that of feof so that feof can be used as the - * default implementation. - */ -typedef int (pm_parse_stream_feof_t)(void *stream); - -/** - * Parse a stream of Ruby source and return the tree. - * - * @param parser The out parameter to write the parser to. - * @param arena The arena to use for all AST-lifetime allocations. - * @param buffer The buffer to use. - * @param stream The stream to parse. - * @param stream_fgets The function to use to read from the stream. - * @param stream_feof The function to use to determine if the stream has hit eof. - * @param options The optional options to use when parsing. - * @return The AST representing the source. - * - * \public \memberof pm_parser - */ -PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse_stream(pm_parser_t **parser, pm_arena_t *arena, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const pm_options_t *options); - -/* We optionally support serializing to a binary string. For systems that do not - * want or need this functionality, it can be turned off with the - * PRISM_EXCLUDE_SERIALIZATION define. */ -#ifndef PRISM_EXCLUDE_SERIALIZATION - -/** - * Parse and serialize the AST represented by the source that is read out of the - * given stream into to the given buffer. - * - * @param buffer The buffer to serialize to. - * @param stream The stream to parse. - * @param stream_fgets The function to use to read from the stream. - * @param stream_feof The function to use to tell if the stream has hit eof. - * @param data The optional data to pass to the parser. - */ -PRISM_EXPORTED_FUNCTION void pm_serialize_parse_stream(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const char *data); - -/** - * Serialize the AST represented by the given node to the given buffer. - * - * @param parser The parser to serialize. - * @param node The node to serialize. - * @param buffer The buffer to serialize to. - */ -PRISM_EXPORTED_FUNCTION void pm_serialize(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer); - -/** - * Parse the given source to the AST and dump the AST to the given buffer. - * - * @param buffer The buffer to serialize to. - * @param source The source to parse. - * @param size The size of the source. - * @param data The optional data to pass to the parser. - */ -PRISM_EXPORTED_FUNCTION void pm_serialize_parse(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data); - -/** - * Parse and serialize the comments in the given source to the given buffer. - * - * @param buffer The buffer to serialize to. - * @param source The source to parse. - * @param size The size of the source. - * @param data The optional data to pass to the parser. - */ -PRISM_EXPORTED_FUNCTION void pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data); - -/** - * Lex the given source and serialize to the given buffer. - * - * @param source The source to lex. - * @param size The size of the source. - * @param buffer The buffer to serialize to. - * @param data The optional data to pass to the lexer. - */ -PRISM_EXPORTED_FUNCTION void pm_serialize_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data); - -/** - * Parse and serialize both the AST and the tokens represented by the given - * source to the given buffer. - * - * @param buffer The buffer to serialize to. - * @param source The source to parse. - * @param size The size of the source. - * @param data The optional data to pass to the parser. - */ -PRISM_EXPORTED_FUNCTION void pm_serialize_parse_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data); - -#endif - /** * Parse the source and return true if it parses without errors or warnings. * @@ -172,38 +51,6 @@ PRISM_EXPORTED_FUNCTION void pm_serialize_parse_lex(pm_buffer_t *buffer, const u */ PRISM_EXPORTED_FUNCTION bool pm_parse_success_p(const uint8_t *source, size_t size, const char *data); -/** - * Returns a string representation of the given token type. - * - * @param token_type The token type to convert to a string. - * @return A string representation of the given token type. - */ -PRISM_EXPORTED_FUNCTION const char * pm_token_type_name(pm_token_type_t token_type); - -/** - * Returns the human name of the given token type. - * - * @param token_type The token type to convert to a human name. - * @return The human name of the given token type. - */ -const char * pm_token_type_human(pm_token_type_t token_type); - -/* We optionally support dumping to JSON. For systems that don't want or need - * this functionality, it can be turned off with the PRISM_EXCLUDE_JSON define. - */ -#ifndef PRISM_EXCLUDE_JSON - -/** - * Dump JSON to the given buffer. - * - * @param buffer The buffer to serialize to. - * @param parser The parser that parsed the node. - * @param node The node to serialize. - */ -PRISM_EXPORTED_FUNCTION void pm_dump_json(pm_buffer_t *buffer, const pm_parser_t *parser, const pm_node_t *node); - -#endif - /** * @mainpage * @@ -226,11 +73,10 @@ PRISM_EXPORTED_FUNCTION void pm_dump_json(pm_buffer_t *buffer, const pm_parser_t * * @section parsing Parsing * - * In order to parse Ruby code, the structures and functions that you're going - * to want to use and be aware of are: + * In order to parse Ruby code, the functions that you are going to want to use + * and be aware of are: * - * * `pm_arena_t` - the arena allocator for AST-lifetime memory - * * `pm_parser_t` - the main parser structure + * * `pm_arena_new()` - create a new arena to hold all AST-lifetime allocations * * `pm_parser_new()` - allocate and initialize a new parser * * `pm_parse()` - parse and return the root node * * `pm_parser_free()` - free the parser and its internal memory @@ -260,13 +106,11 @@ PRISM_EXPORTED_FUNCTION void pm_dump_json(pm_buffer_t *buffer, const pm_parser_t * Prism provides the ability to serialize the AST and its related metadata into * a binary format. This format is designed to be portable to different * languages and runtimes so that you only need to make one FFI call in order to - * parse Ruby code. The structures and functions that you're going to want to - * use and be aware of are: + * parse Ruby code. The functions that you are going to want to use and be + * aware of are: * - * * `pm_buffer_t` - an opaque buffer object that will hold the serialized AST * * `pm_buffer_new()` - create a new buffer * * `pm_buffer_free()` - free the buffer and its internal memory - * * `pm_serialize()` - serialize the AST into a buffer * * `pm_serialize_parse()` - parse and serialize the AST into a buffer * * Putting all of this together would look something like: diff --git a/include/prism/json.h b/include/prism/json.h new file mode 100644 index 0000000000..cc2f6f3bb1 --- /dev/null +++ b/include/prism/json.h @@ -0,0 +1,31 @@ +/** + * @file json.h + */ +#ifndef PRISM_JSON_H +#define PRISM_JSON_H + +#include "prism/excludes.h" + +/* We optionally support dumping to JSON. For systems that don't want or need + * this functionality, it can be turned off with the PRISM_EXCLUDE_JSON define. + */ +#ifndef PRISM_EXCLUDE_JSON + +#include "prism/compiler/exported.h" + +#include "prism/ast.h" +#include "prism/buffer.h" +#include "prism/parser.h" + +/** + * Dump JSON to the given buffer. + * + * @param buffer The buffer to serialize to. + * @param parser The parser that parsed the node. + * @param node The node to serialize. + */ +PRISM_EXPORTED_FUNCTION void pm_dump_json(pm_buffer_t *buffer, const pm_parser_t *parser, const pm_node_t *node); + +#endif + +#endif diff --git a/include/prism/parser.h b/include/prism/parser.h index 1f0b7fc0f3..f49155799d 100644 --- a/include/prism/parser.h +++ b/include/prism/parser.h @@ -11,12 +11,38 @@ #include "prism/diagnostic.h" #include "prism/line_offset_list.h" #include "prism/magic_comments.h" +#include "prism/options.h" /** * The parser used to parse Ruby source. */ typedef struct pm_parser_t pm_parser_t; +/** + * Allocate and initialize a parser with the given start and end pointers. + * + * @param arena The arena to use for all AST-lifetime allocations. It is caller- + * owned and must outlive the parser. + * @param source The source to parse. + * @param size The size of the source. + * @param options The optional options to use when parsing. These options must + * live for the whole lifetime of this parser. + * @return The initialized parser. It is the responsibility of the caller to + * free the parser with `pm_parser_free()`. + * + * \public \memberof pm_parser + */ +PRISM_EXPORTED_FUNCTION pm_parser_t * pm_parser_new(pm_arena_t *arena, const uint8_t *source, size_t size, const pm_options_t *options); + +/** + * Free both the memory held by the given parser and the parser itself. + * + * @param parser The parser to free. + * + * \public \memberof pm_parser + */ +PRISM_EXPORTED_FUNCTION void pm_parser_free(pm_parser_t *parser); + /** * When the encoding that is being used to parse the source is changed by prism, * we provide the ability here to call out to a user-defined function. diff --git a/include/prism/serialize.h b/include/prism/serialize.h new file mode 100644 index 0000000000..b2e93e5e9b --- /dev/null +++ b/include/prism/serialize.h @@ -0,0 +1,86 @@ +/** + * @file serialize.h + * + * The functions related to serializing the AST to a binary format. + */ +#ifndef PRISM_SERIALIZE_H +#define PRISM_SERIALIZE_H + +#include "prism/excludes.h" + +/* We optionally support serializing to a binary string. For systems that do not + * want or need this functionality, it can be turned off with the + * PRISM_EXCLUDE_SERIALIZATION define. */ +#ifndef PRISM_EXCLUDE_SERIALIZATION + +#include "prism/compiler/exported.h" + +#include "prism/buffer.h" +#include "prism/parser.h" +#include "prism/stream.h" + +/** + * Serialize the AST represented by the given node to the given buffer. + * + * @param parser The parser to serialize. + * @param node The node to serialize. + * @param buffer The buffer to serialize to. + */ +PRISM_EXPORTED_FUNCTION void pm_serialize(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer); + +/** + * Parse the given source to the AST and dump the AST to the given buffer. + * + * @param buffer The buffer to serialize to. + * @param source The source to parse. + * @param size The size of the source. + * @param data The optional data to pass to the parser. + */ +PRISM_EXPORTED_FUNCTION void pm_serialize_parse(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data); + +/** + * Parse and serialize the AST represented by the source that is read out of the + * given stream into to the given buffer. + * + * @param buffer The buffer to serialize to. + * @param stream The stream to parse. + * @param stream_fgets The function to use to read from the stream. + * @param stream_feof The function to use to tell if the stream has hit eof. + * @param data The optional data to pass to the parser. + */ +PRISM_EXPORTED_FUNCTION void pm_serialize_parse_stream(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const char *data); + +/** + * Parse and serialize the comments in the given source to the given buffer. + * + * @param buffer The buffer to serialize to. + * @param source The source to parse. + * @param size The size of the source. + * @param data The optional data to pass to the parser. + */ +PRISM_EXPORTED_FUNCTION void pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data); + +/** + * Lex the given source and serialize to the given buffer. + * + * @param source The source to lex. + * @param size The size of the source. + * @param buffer The buffer to serialize to. + * @param data The optional data to pass to the lexer. + */ +PRISM_EXPORTED_FUNCTION void pm_serialize_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data); + +/** + * Parse and serialize both the AST and the tokens represented by the given + * source to the given buffer. + * + * @param buffer The buffer to serialize to. + * @param source The source to parse. + * @param size The size of the source. + * @param data The optional data to pass to the parser. + */ +PRISM_EXPORTED_FUNCTION void pm_serialize_parse_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data); + +#endif + +#endif diff --git a/include/prism/stream.h b/include/prism/stream.h new file mode 100644 index 0000000000..9ed94f58e9 --- /dev/null +++ b/include/prism/stream.h @@ -0,0 +1,44 @@ +/** + * @file stream.h + * + * Functions for parsing streams. + */ +#ifndef PRISM_STREAM_H +#define PRISM_STREAM_H + +#include "prism/compiler/exported.h" + +#include "prism/arena.h" +#include "prism/buffer.h" +#include "prism/options.h" +#include "prism/parser.h" + +/** + * This function is used in pm_parse_stream() to retrieve a line of input from a + * stream. It closely mirrors that of fgets so that fgets can be used as the + * default implementation. + */ +typedef char * (pm_parse_stream_fgets_t)(char *string, int size, void *stream); + +/** + * This function is used in pm_parse_stream to check whether a stream is EOF. + * It closely mirrors that of feof so that feof can be used as the + * default implementation. + */ +typedef int (pm_parse_stream_feof_t)(void *stream); + +/** + * Parse a stream of Ruby source and return the tree. + * + * @param parser The out parameter to write the parser to. + * @param arena The arena to use for all AST-lifetime allocations. + * @param buffer The buffer to use. + * @param stream The stream to parse. + * @param stream_fgets The function to use to read from the stream. + * @param stream_feof The function to use to determine if the stream has hit eof. + * @param options The optional options to use when parsing. + * @return The AST representing the source. + */ +PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse_stream(pm_parser_t **parser, pm_arena_t *arena, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const pm_options_t *options); + +#endif diff --git a/src/prism.c b/src/prism.c index fb2b67a779..3f8e3ad870 100644 --- a/src/prism.c +++ b/src/prism.c @@ -1,5 +1,3 @@ -#include "prism.h" - #include "prism/compiler/accel.h" #include "prism/compiler/fallthrough.h" #include "prism/compiler/unused.h" @@ -30,6 +28,8 @@ #include "prism/internal/strpbrk.h" #include "prism/excludes.h" +#include "prism/serialize.h" +#include "prism/version.h" #include #include diff --git a/templates/include/prism/ast.h.erb b/templates/include/prism/ast.h.erb index 3a949f1000..f08e017fc0 100644 --- a/templates/include/prism/ast.h.erb +++ b/templates/include/prism/ast.h.erb @@ -47,6 +47,22 @@ typedef struct { const uint8_t *end; } pm_token_t; +/** + * Returns a string representation of the given token type. + * + * @param token_type The token type to convert to a string. + * @return A string representation of the given token type. + */ +PRISM_EXPORTED_FUNCTION const char * pm_token_type_name(pm_token_type_t token_type); + +/** + * Returns the human name of the given token type. + * + * @param token_type The token type to convert to a human name. + * @return The human name of the given token type. + */ +const char * pm_token_type_human(pm_token_type_t token_type); + /** * This struct represents a slice in the source code, defined by an offset and * a length. Note that we have confirmation that we can represent all locations From 3b098866330fb8ae6175f01c5f160f6950ae5eb2 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 18 Mar 2026 14:04:05 -0400 Subject: [PATCH 076/100] Move JSON to its own TU --- include/prism.h | 2 +- include/prism/internal/encoding.h | 4 +- src/json.c | 5723 +++++++++++++++++++++++++++++ templates/src/json.c.erb | 130 + templates/src/node.c.erb | 127 - templates/src/prettyprint.c.erb | 14 +- templates/template.rb | 1 + 7 files changed, 5862 insertions(+), 139 deletions(-) create mode 100644 src/json.c create mode 100644 templates/src/json.c.erb diff --git a/include/prism.h b/include/prism.h index 64674d154a..b344a43691 100644 --- a/include/prism.h +++ b/include/prism.h @@ -12,8 +12,8 @@ extern "C" { #include "prism/arena.h" #include "prism/ast.h" +#include "prism/buffer.h" #include "prism/diagnostic.h" -#include "prism/excludes.h" #include "prism/json.h" #include "prism/node.h" #include "prism/options.h" diff --git a/include/prism/internal/encoding.h b/include/prism/internal/encoding.h index eb68ad6250..409345fd7f 100644 --- a/include/prism/internal/encoding.h +++ b/include/prism/internal/encoding.h @@ -136,8 +136,8 @@ typedef enum { PM_ENCODING_EUC_JP, PM_ENCODING_WINDOWS_31J, -// We optionally support excluding the full set of encodings to only support the -// minimum necessary to process Ruby code without encoding comments. +/* We optionally support excluding the full set of encodings to only support the + * minimum necessary to process Ruby code without encoding comments. */ #ifndef PRISM_ENCODING_EXCLUDE_FULL PM_ENCODING_BIG5, PM_ENCODING_BIG5_HKSCS, diff --git a/src/json.c b/src/json.c new file mode 100644 index 0000000000..0d72ca8368 --- /dev/null +++ b/src/json.c @@ -0,0 +1,5723 @@ +/*----------------------------------------------------------------------------*/ +/* This file is generated by the templates/template.rb script and should not */ +/* be modified manually. See */ +/* templates/src/json.c.erb */ +/* if you are looking to modify the */ +/* template */ +/*----------------------------------------------------------------------------*/ + +#include "prism/json.h" + +/* We optionally support dumping to JSON. For systems that don not want or need + * this functionality, it can be turned off with the PRISM_EXCLUDE_JSON define. + */ +#ifndef PRISM_EXCLUDE_JSON + +#include "prism/internal/buffer.h" +#include "prism/internal/constant_pool.h" +#include "prism/internal/integer.h" +#include "prism/internal/parser.h" + +#include + +static void +pm_dump_json_constant(pm_buffer_t *buffer, const pm_parser_t *parser, pm_constant_id_t constant_id) { + const pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, constant_id); + pm_buffer_append_byte(buffer, '"'); + pm_buffer_append_source(buffer, constant->start, constant->length, PM_BUFFER_ESCAPING_JSON); + pm_buffer_append_byte(buffer, '"'); +} + +static void +pm_dump_json_location(pm_buffer_t *buffer, const pm_location_t *location) { + pm_buffer_append_format(buffer, "{\"start\":%" PRIu32 ",\"length\":%" PRIu32 "}", location->start, location->length); +} + +/** + * Dump JSON to the given buffer. + */ +void +pm_dump_json(pm_buffer_t *buffer, const pm_parser_t *parser, const pm_node_t *node) { + switch (PM_NODE_TYPE(node)) { + case PM_ALIAS_GLOBAL_VARIABLE_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"AliasGlobalVariableNode\",\"location\":", 45); + + const pm_alias_global_variable_node_t *cast = (const pm_alias_global_variable_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the new_name field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"new_name\":", 11); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->new_name); + + // Dump the old_name field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"old_name\":", 11); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->old_name); + + // Dump the keyword_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"keyword_loc\":", 14); + pm_dump_json_location(buffer, &cast->keyword_loc); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_ALIAS_METHOD_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"AliasMethodNode\",\"location\":", 37); + + const pm_alias_method_node_t *cast = (const pm_alias_method_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the new_name field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"new_name\":", 11); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->new_name); + + // Dump the old_name field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"old_name\":", 11); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->old_name); + + // Dump the keyword_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"keyword_loc\":", 14); + pm_dump_json_location(buffer, &cast->keyword_loc); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_ALTERNATION_PATTERN_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"AlternationPatternNode\",\"location\":", 44); + + const pm_alternation_pattern_node_t *cast = (const pm_alternation_pattern_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the left field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"left\":", 7); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->left); + + // Dump the right field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"right\":", 8); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->right); + + // Dump the operator_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"operator_loc\":", 15); + pm_dump_json_location(buffer, &cast->operator_loc); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_AND_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"AndNode\",\"location\":", 29); + + const pm_and_node_t *cast = (const pm_and_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the left field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"left\":", 7); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->left); + + // Dump the right field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"right\":", 8); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->right); + + // Dump the operator_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"operator_loc\":", 15); + pm_dump_json_location(buffer, &cast->operator_loc); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_ARGUMENTS_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"ArgumentsNode\",\"location\":", 35); + + const pm_arguments_node_t *cast = (const pm_arguments_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the ArgumentsNodeFlags field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"flags\":", 8); + size_t flags = 0; + pm_buffer_append_byte(buffer, '['); + if (PM_NODE_FLAG_P(cast, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_FORWARDING)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"CONTAINS_FORWARDING\"", 21); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"CONTAINS_KEYWORDS\"", 19); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"CONTAINS_KEYWORD_SPLAT\"", 24); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"CONTAINS_SPLAT\"", 16); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_MULTIPLE_SPLATS)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"CONTAINS_MULTIPLE_SPLATS\"", 26); + flags++; + } + pm_buffer_append_byte(buffer, ']'); + + // Dump the arguments field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"arguments\":", 12); + const pm_node_list_t *arguments = &cast->arguments; + pm_buffer_append_byte(buffer, '['); + + for (size_t index = 0; index < arguments->size; index++) { + if (index != 0) pm_buffer_append_byte(buffer, ','); + pm_dump_json(buffer, parser, arguments->nodes[index]); + } + pm_buffer_append_byte(buffer, ']'); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_ARRAY_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"ArrayNode\",\"location\":", 31); + + const pm_array_node_t *cast = (const pm_array_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the ArrayNodeFlags field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"flags\":", 8); + size_t flags = 0; + pm_buffer_append_byte(buffer, '['); + if (PM_NODE_FLAG_P(cast, PM_ARRAY_NODE_FLAGS_CONTAINS_SPLAT)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"CONTAINS_SPLAT\"", 16); + flags++; + } + pm_buffer_append_byte(buffer, ']'); + + // Dump the elements field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"elements\":", 11); + const pm_node_list_t *elements = &cast->elements; + pm_buffer_append_byte(buffer, '['); + + for (size_t index = 0; index < elements->size; index++) { + if (index != 0) pm_buffer_append_byte(buffer, ','); + pm_dump_json(buffer, parser, elements->nodes[index]); + } + pm_buffer_append_byte(buffer, ']'); + + // Dump the opening_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"opening_loc\":", 14); + if (cast->opening_loc.length != 0) { + pm_dump_json_location(buffer, &cast->opening_loc); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the closing_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"closing_loc\":", 14); + if (cast->closing_loc.length != 0) { + pm_dump_json_location(buffer, &cast->closing_loc); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_ARRAY_PATTERN_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"ArrayPatternNode\",\"location\":", 38); + + const pm_array_pattern_node_t *cast = (const pm_array_pattern_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the constant field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"constant\":", 11); + if (cast->constant != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->constant); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the requireds field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"requireds\":", 12); + const pm_node_list_t *requireds = &cast->requireds; + pm_buffer_append_byte(buffer, '['); + + for (size_t index = 0; index < requireds->size; index++) { + if (index != 0) pm_buffer_append_byte(buffer, ','); + pm_dump_json(buffer, parser, requireds->nodes[index]); + } + pm_buffer_append_byte(buffer, ']'); + + // Dump the rest field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"rest\":", 7); + if (cast->rest != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->rest); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the posts field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"posts\":", 8); + const pm_node_list_t *posts = &cast->posts; + pm_buffer_append_byte(buffer, '['); + + for (size_t index = 0; index < posts->size; index++) { + if (index != 0) pm_buffer_append_byte(buffer, ','); + pm_dump_json(buffer, parser, posts->nodes[index]); + } + pm_buffer_append_byte(buffer, ']'); + + // Dump the opening_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"opening_loc\":", 14); + if (cast->opening_loc.length != 0) { + pm_dump_json_location(buffer, &cast->opening_loc); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the closing_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"closing_loc\":", 14); + if (cast->closing_loc.length != 0) { + pm_dump_json_location(buffer, &cast->closing_loc); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_ASSOC_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"AssocNode\",\"location\":", 31); + + const pm_assoc_node_t *cast = (const pm_assoc_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the key field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"key\":", 6); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->key); + + // Dump the value field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"value\":", 8); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->value); + + // Dump the operator_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"operator_loc\":", 15); + if (cast->operator_loc.length != 0) { + pm_dump_json_location(buffer, &cast->operator_loc); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_ASSOC_SPLAT_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"AssocSplatNode\",\"location\":", 36); + + const pm_assoc_splat_node_t *cast = (const pm_assoc_splat_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the value field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"value\":", 8); + if (cast->value != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->value); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the operator_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"operator_loc\":", 15); + pm_dump_json_location(buffer, &cast->operator_loc); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_BACK_REFERENCE_READ_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"BackReferenceReadNode\",\"location\":", 43); + + const pm_back_reference_read_node_t *cast = (const pm_back_reference_read_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the name field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"name\":", 7); + pm_dump_json_constant(buffer, parser, cast->name); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_BEGIN_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"BeginNode\",\"location\":", 31); + + const pm_begin_node_t *cast = (const pm_begin_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the begin_keyword_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"begin_keyword_loc\":", 20); + if (cast->begin_keyword_loc.length != 0) { + pm_dump_json_location(buffer, &cast->begin_keyword_loc); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the statements field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"statements\":", 13); + if (cast->statements != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->statements); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the rescue_clause field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"rescue_clause\":", 16); + if (cast->rescue_clause != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->rescue_clause); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the else_clause field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"else_clause\":", 14); + if (cast->else_clause != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->else_clause); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the ensure_clause field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"ensure_clause\":", 16); + if (cast->ensure_clause != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->ensure_clause); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the end_keyword_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"end_keyword_loc\":", 18); + if (cast->end_keyword_loc.length != 0) { + pm_dump_json_location(buffer, &cast->end_keyword_loc); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_BLOCK_ARGUMENT_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"BlockArgumentNode\",\"location\":", 39); + + const pm_block_argument_node_t *cast = (const pm_block_argument_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the expression field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"expression\":", 13); + if (cast->expression != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->expression); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the operator_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"operator_loc\":", 15); + pm_dump_json_location(buffer, &cast->operator_loc); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_BLOCK_LOCAL_VARIABLE_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"BlockLocalVariableNode\",\"location\":", 44); + + const pm_block_local_variable_node_t *cast = (const pm_block_local_variable_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the ParameterFlags field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"flags\":", 8); + size_t flags = 0; + pm_buffer_append_byte(buffer, '['); + if (PM_NODE_FLAG_P(cast, PM_PARAMETER_FLAGS_REPEATED_PARAMETER)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"REPEATED_PARAMETER\"", 20); + flags++; + } + pm_buffer_append_byte(buffer, ']'); + + // Dump the name field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"name\":", 7); + pm_dump_json_constant(buffer, parser, cast->name); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_BLOCK_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"BlockNode\",\"location\":", 31); + + const pm_block_node_t *cast = (const pm_block_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the locals field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"locals\":", 9); + const pm_constant_id_list_t *locals = &cast->locals; + pm_buffer_append_byte(buffer, '['); + + for (size_t index = 0; index < locals->size; index++) { + if (index != 0) pm_buffer_append_byte(buffer, ','); + pm_dump_json_constant(buffer, parser, locals->ids[index]); + } + pm_buffer_append_byte(buffer, ']'); + + // Dump the parameters field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"parameters\":", 13); + if (cast->parameters != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->parameters); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the body field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"body\":", 7); + if (cast->body != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->body); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the opening_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"opening_loc\":", 14); + pm_dump_json_location(buffer, &cast->opening_loc); + + // Dump the closing_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"closing_loc\":", 14); + pm_dump_json_location(buffer, &cast->closing_loc); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_BLOCK_PARAMETER_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"BlockParameterNode\",\"location\":", 40); + + const pm_block_parameter_node_t *cast = (const pm_block_parameter_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the ParameterFlags field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"flags\":", 8); + size_t flags = 0; + pm_buffer_append_byte(buffer, '['); + if (PM_NODE_FLAG_P(cast, PM_PARAMETER_FLAGS_REPEATED_PARAMETER)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"REPEATED_PARAMETER\"", 20); + flags++; + } + pm_buffer_append_byte(buffer, ']'); + + // Dump the name field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"name\":", 7); + if (cast->name != PM_CONSTANT_ID_UNSET) { + pm_dump_json_constant(buffer, parser, cast->name); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the name_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"name_loc\":", 11); + if (cast->name_loc.length != 0) { + pm_dump_json_location(buffer, &cast->name_loc); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the operator_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"operator_loc\":", 15); + pm_dump_json_location(buffer, &cast->operator_loc); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_BLOCK_PARAMETERS_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"BlockParametersNode\",\"location\":", 41); + + const pm_block_parameters_node_t *cast = (const pm_block_parameters_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the parameters field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"parameters\":", 13); + if (cast->parameters != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->parameters); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the locals field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"locals\":", 9); + const pm_node_list_t *locals = &cast->locals; + pm_buffer_append_byte(buffer, '['); + + for (size_t index = 0; index < locals->size; index++) { + if (index != 0) pm_buffer_append_byte(buffer, ','); + pm_dump_json(buffer, parser, locals->nodes[index]); + } + pm_buffer_append_byte(buffer, ']'); + + // Dump the opening_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"opening_loc\":", 14); + if (cast->opening_loc.length != 0) { + pm_dump_json_location(buffer, &cast->opening_loc); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the closing_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"closing_loc\":", 14); + if (cast->closing_loc.length != 0) { + pm_dump_json_location(buffer, &cast->closing_loc); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_BREAK_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"BreakNode\",\"location\":", 31); + + const pm_break_node_t *cast = (const pm_break_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the arguments field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"arguments\":", 12); + if (cast->arguments != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->arguments); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the keyword_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"keyword_loc\":", 14); + pm_dump_json_location(buffer, &cast->keyword_loc); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_CALL_AND_WRITE_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"CallAndWriteNode\",\"location\":", 38); + + const pm_call_and_write_node_t *cast = (const pm_call_and_write_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the CallNodeFlags field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"flags\":", 8); + size_t flags = 0; + pm_buffer_append_byte(buffer, '['); + if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"SAFE_NAVIGATION\"", 17); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"VARIABLE_CALL\"", 15); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"ATTRIBUTE_WRITE\"", 17); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"IGNORE_VISIBILITY\"", 19); + flags++; + } + pm_buffer_append_byte(buffer, ']'); + + // Dump the receiver field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"receiver\":", 11); + if (cast->receiver != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->receiver); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the call_operator_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"call_operator_loc\":", 20); + if (cast->call_operator_loc.length != 0) { + pm_dump_json_location(buffer, &cast->call_operator_loc); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the message_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"message_loc\":", 14); + if (cast->message_loc.length != 0) { + pm_dump_json_location(buffer, &cast->message_loc); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the read_name field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"read_name\":", 12); + pm_dump_json_constant(buffer, parser, cast->read_name); + + // Dump the write_name field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"write_name\":", 13); + pm_dump_json_constant(buffer, parser, cast->write_name); + + // Dump the operator_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"operator_loc\":", 15); + pm_dump_json_location(buffer, &cast->operator_loc); + + // Dump the value field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"value\":", 8); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->value); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_CALL_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"CallNode\",\"location\":", 30); + + const pm_call_node_t *cast = (const pm_call_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the CallNodeFlags field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"flags\":", 8); + size_t flags = 0; + pm_buffer_append_byte(buffer, '['); + if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"SAFE_NAVIGATION\"", 17); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"VARIABLE_CALL\"", 15); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"ATTRIBUTE_WRITE\"", 17); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"IGNORE_VISIBILITY\"", 19); + flags++; + } + pm_buffer_append_byte(buffer, ']'); + + // Dump the receiver field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"receiver\":", 11); + if (cast->receiver != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->receiver); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the call_operator_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"call_operator_loc\":", 20); + if (cast->call_operator_loc.length != 0) { + pm_dump_json_location(buffer, &cast->call_operator_loc); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the name field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"name\":", 7); + pm_dump_json_constant(buffer, parser, cast->name); + + // Dump the message_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"message_loc\":", 14); + if (cast->message_loc.length != 0) { + pm_dump_json_location(buffer, &cast->message_loc); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the opening_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"opening_loc\":", 14); + if (cast->opening_loc.length != 0) { + pm_dump_json_location(buffer, &cast->opening_loc); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the arguments field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"arguments\":", 12); + if (cast->arguments != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->arguments); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the closing_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"closing_loc\":", 14); + if (cast->closing_loc.length != 0) { + pm_dump_json_location(buffer, &cast->closing_loc); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the equal_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"equal_loc\":", 12); + if (cast->equal_loc.length != 0) { + pm_dump_json_location(buffer, &cast->equal_loc); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the block field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"block\":", 8); + if (cast->block != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->block); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_CALL_OPERATOR_WRITE_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"CallOperatorWriteNode\",\"location\":", 43); + + const pm_call_operator_write_node_t *cast = (const pm_call_operator_write_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the CallNodeFlags field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"flags\":", 8); + size_t flags = 0; + pm_buffer_append_byte(buffer, '['); + if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"SAFE_NAVIGATION\"", 17); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"VARIABLE_CALL\"", 15); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"ATTRIBUTE_WRITE\"", 17); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"IGNORE_VISIBILITY\"", 19); + flags++; + } + pm_buffer_append_byte(buffer, ']'); + + // Dump the receiver field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"receiver\":", 11); + if (cast->receiver != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->receiver); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the call_operator_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"call_operator_loc\":", 20); + if (cast->call_operator_loc.length != 0) { + pm_dump_json_location(buffer, &cast->call_operator_loc); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the message_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"message_loc\":", 14); + if (cast->message_loc.length != 0) { + pm_dump_json_location(buffer, &cast->message_loc); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the read_name field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"read_name\":", 12); + pm_dump_json_constant(buffer, parser, cast->read_name); + + // Dump the write_name field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"write_name\":", 13); + pm_dump_json_constant(buffer, parser, cast->write_name); + + // Dump the binary_operator field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"binary_operator\":", 18); + pm_dump_json_constant(buffer, parser, cast->binary_operator); + + // Dump the binary_operator_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"binary_operator_loc\":", 22); + pm_dump_json_location(buffer, &cast->binary_operator_loc); + + // Dump the value field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"value\":", 8); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->value); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_CALL_OR_WRITE_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"CallOrWriteNode\",\"location\":", 37); + + const pm_call_or_write_node_t *cast = (const pm_call_or_write_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the CallNodeFlags field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"flags\":", 8); + size_t flags = 0; + pm_buffer_append_byte(buffer, '['); + if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"SAFE_NAVIGATION\"", 17); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"VARIABLE_CALL\"", 15); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"ATTRIBUTE_WRITE\"", 17); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"IGNORE_VISIBILITY\"", 19); + flags++; + } + pm_buffer_append_byte(buffer, ']'); + + // Dump the receiver field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"receiver\":", 11); + if (cast->receiver != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->receiver); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the call_operator_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"call_operator_loc\":", 20); + if (cast->call_operator_loc.length != 0) { + pm_dump_json_location(buffer, &cast->call_operator_loc); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the message_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"message_loc\":", 14); + if (cast->message_loc.length != 0) { + pm_dump_json_location(buffer, &cast->message_loc); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the read_name field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"read_name\":", 12); + pm_dump_json_constant(buffer, parser, cast->read_name); + + // Dump the write_name field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"write_name\":", 13); + pm_dump_json_constant(buffer, parser, cast->write_name); + + // Dump the operator_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"operator_loc\":", 15); + pm_dump_json_location(buffer, &cast->operator_loc); + + // Dump the value field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"value\":", 8); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->value); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_CALL_TARGET_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"CallTargetNode\",\"location\":", 36); + + const pm_call_target_node_t *cast = (const pm_call_target_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the CallNodeFlags field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"flags\":", 8); + size_t flags = 0; + pm_buffer_append_byte(buffer, '['); + if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"SAFE_NAVIGATION\"", 17); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"VARIABLE_CALL\"", 15); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"ATTRIBUTE_WRITE\"", 17); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"IGNORE_VISIBILITY\"", 19); + flags++; + } + pm_buffer_append_byte(buffer, ']'); + + // Dump the receiver field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"receiver\":", 11); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->receiver); + + // Dump the call_operator_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"call_operator_loc\":", 20); + pm_dump_json_location(buffer, &cast->call_operator_loc); + + // Dump the name field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"name\":", 7); + pm_dump_json_constant(buffer, parser, cast->name); + + // Dump the message_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"message_loc\":", 14); + pm_dump_json_location(buffer, &cast->message_loc); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_CAPTURE_PATTERN_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"CapturePatternNode\",\"location\":", 40); + + const pm_capture_pattern_node_t *cast = (const pm_capture_pattern_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the value field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"value\":", 8); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->value); + + // Dump the target field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"target\":", 9); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->target); + + // Dump the operator_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"operator_loc\":", 15); + pm_dump_json_location(buffer, &cast->operator_loc); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_CASE_MATCH_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"CaseMatchNode\",\"location\":", 35); + + const pm_case_match_node_t *cast = (const pm_case_match_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the predicate field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"predicate\":", 12); + if (cast->predicate != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->predicate); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the conditions field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"conditions\":", 13); + const pm_node_list_t *conditions = &cast->conditions; + pm_buffer_append_byte(buffer, '['); + + for (size_t index = 0; index < conditions->size; index++) { + if (index != 0) pm_buffer_append_byte(buffer, ','); + pm_dump_json(buffer, parser, conditions->nodes[index]); + } + pm_buffer_append_byte(buffer, ']'); + + // Dump the else_clause field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"else_clause\":", 14); + if (cast->else_clause != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->else_clause); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the case_keyword_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"case_keyword_loc\":", 19); + pm_dump_json_location(buffer, &cast->case_keyword_loc); + + // Dump the end_keyword_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"end_keyword_loc\":", 18); + pm_dump_json_location(buffer, &cast->end_keyword_loc); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_CASE_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"CaseNode\",\"location\":", 30); + + const pm_case_node_t *cast = (const pm_case_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the predicate field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"predicate\":", 12); + if (cast->predicate != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->predicate); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the conditions field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"conditions\":", 13); + const pm_node_list_t *conditions = &cast->conditions; + pm_buffer_append_byte(buffer, '['); + + for (size_t index = 0; index < conditions->size; index++) { + if (index != 0) pm_buffer_append_byte(buffer, ','); + pm_dump_json(buffer, parser, conditions->nodes[index]); + } + pm_buffer_append_byte(buffer, ']'); + + // Dump the else_clause field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"else_clause\":", 14); + if (cast->else_clause != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->else_clause); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the case_keyword_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"case_keyword_loc\":", 19); + pm_dump_json_location(buffer, &cast->case_keyword_loc); + + // Dump the end_keyword_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"end_keyword_loc\":", 18); + pm_dump_json_location(buffer, &cast->end_keyword_loc); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_CLASS_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"ClassNode\",\"location\":", 31); + + const pm_class_node_t *cast = (const pm_class_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the locals field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"locals\":", 9); + const pm_constant_id_list_t *locals = &cast->locals; + pm_buffer_append_byte(buffer, '['); + + for (size_t index = 0; index < locals->size; index++) { + if (index != 0) pm_buffer_append_byte(buffer, ','); + pm_dump_json_constant(buffer, parser, locals->ids[index]); + } + pm_buffer_append_byte(buffer, ']'); + + // Dump the class_keyword_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"class_keyword_loc\":", 20); + pm_dump_json_location(buffer, &cast->class_keyword_loc); + + // Dump the constant_path field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"constant_path\":", 16); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->constant_path); + + // Dump the inheritance_operator_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"inheritance_operator_loc\":", 27); + if (cast->inheritance_operator_loc.length != 0) { + pm_dump_json_location(buffer, &cast->inheritance_operator_loc); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the superclass field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"superclass\":", 13); + if (cast->superclass != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->superclass); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the body field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"body\":", 7); + if (cast->body != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->body); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the end_keyword_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"end_keyword_loc\":", 18); + pm_dump_json_location(buffer, &cast->end_keyword_loc); + + // Dump the name field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"name\":", 7); + pm_dump_json_constant(buffer, parser, cast->name); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_CLASS_VARIABLE_AND_WRITE_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"ClassVariableAndWriteNode\",\"location\":", 47); + + const pm_class_variable_and_write_node_t *cast = (const pm_class_variable_and_write_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the name field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"name\":", 7); + pm_dump_json_constant(buffer, parser, cast->name); + + // Dump the name_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"name_loc\":", 11); + pm_dump_json_location(buffer, &cast->name_loc); + + // Dump the operator_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"operator_loc\":", 15); + pm_dump_json_location(buffer, &cast->operator_loc); + + // Dump the value field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"value\":", 8); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->value); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_CLASS_VARIABLE_OPERATOR_WRITE_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"ClassVariableOperatorWriteNode\",\"location\":", 52); + + const pm_class_variable_operator_write_node_t *cast = (const pm_class_variable_operator_write_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the name field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"name\":", 7); + pm_dump_json_constant(buffer, parser, cast->name); + + // Dump the name_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"name_loc\":", 11); + pm_dump_json_location(buffer, &cast->name_loc); + + // Dump the binary_operator_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"binary_operator_loc\":", 22); + pm_dump_json_location(buffer, &cast->binary_operator_loc); + + // Dump the value field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"value\":", 8); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->value); + + // Dump the binary_operator field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"binary_operator\":", 18); + pm_dump_json_constant(buffer, parser, cast->binary_operator); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_CLASS_VARIABLE_OR_WRITE_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"ClassVariableOrWriteNode\",\"location\":", 46); + + const pm_class_variable_or_write_node_t *cast = (const pm_class_variable_or_write_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the name field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"name\":", 7); + pm_dump_json_constant(buffer, parser, cast->name); + + // Dump the name_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"name_loc\":", 11); + pm_dump_json_location(buffer, &cast->name_loc); + + // Dump the operator_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"operator_loc\":", 15); + pm_dump_json_location(buffer, &cast->operator_loc); + + // Dump the value field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"value\":", 8); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->value); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_CLASS_VARIABLE_READ_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"ClassVariableReadNode\",\"location\":", 43); + + const pm_class_variable_read_node_t *cast = (const pm_class_variable_read_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the name field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"name\":", 7); + pm_dump_json_constant(buffer, parser, cast->name); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_CLASS_VARIABLE_TARGET_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"ClassVariableTargetNode\",\"location\":", 45); + + const pm_class_variable_target_node_t *cast = (const pm_class_variable_target_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the name field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"name\":", 7); + pm_dump_json_constant(buffer, parser, cast->name); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_CLASS_VARIABLE_WRITE_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"ClassVariableWriteNode\",\"location\":", 44); + + const pm_class_variable_write_node_t *cast = (const pm_class_variable_write_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the name field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"name\":", 7); + pm_dump_json_constant(buffer, parser, cast->name); + + // Dump the name_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"name_loc\":", 11); + pm_dump_json_location(buffer, &cast->name_loc); + + // Dump the value field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"value\":", 8); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->value); + + // Dump the operator_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"operator_loc\":", 15); + pm_dump_json_location(buffer, &cast->operator_loc); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_CONSTANT_AND_WRITE_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"ConstantAndWriteNode\",\"location\":", 42); + + const pm_constant_and_write_node_t *cast = (const pm_constant_and_write_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the name field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"name\":", 7); + pm_dump_json_constant(buffer, parser, cast->name); + + // Dump the name_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"name_loc\":", 11); + pm_dump_json_location(buffer, &cast->name_loc); + + // Dump the operator_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"operator_loc\":", 15); + pm_dump_json_location(buffer, &cast->operator_loc); + + // Dump the value field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"value\":", 8); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->value); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_CONSTANT_OPERATOR_WRITE_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"ConstantOperatorWriteNode\",\"location\":", 47); + + const pm_constant_operator_write_node_t *cast = (const pm_constant_operator_write_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the name field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"name\":", 7); + pm_dump_json_constant(buffer, parser, cast->name); + + // Dump the name_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"name_loc\":", 11); + pm_dump_json_location(buffer, &cast->name_loc); + + // Dump the binary_operator_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"binary_operator_loc\":", 22); + pm_dump_json_location(buffer, &cast->binary_operator_loc); + + // Dump the value field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"value\":", 8); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->value); + + // Dump the binary_operator field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"binary_operator\":", 18); + pm_dump_json_constant(buffer, parser, cast->binary_operator); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_CONSTANT_OR_WRITE_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"ConstantOrWriteNode\",\"location\":", 41); + + const pm_constant_or_write_node_t *cast = (const pm_constant_or_write_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the name field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"name\":", 7); + pm_dump_json_constant(buffer, parser, cast->name); + + // Dump the name_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"name_loc\":", 11); + pm_dump_json_location(buffer, &cast->name_loc); + + // Dump the operator_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"operator_loc\":", 15); + pm_dump_json_location(buffer, &cast->operator_loc); + + // Dump the value field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"value\":", 8); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->value); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_CONSTANT_PATH_AND_WRITE_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"ConstantPathAndWriteNode\",\"location\":", 46); + + const pm_constant_path_and_write_node_t *cast = (const pm_constant_path_and_write_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the target field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"target\":", 9); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->target); + + // Dump the operator_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"operator_loc\":", 15); + pm_dump_json_location(buffer, &cast->operator_loc); + + // Dump the value field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"value\":", 8); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->value); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_CONSTANT_PATH_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"ConstantPathNode\",\"location\":", 38); + + const pm_constant_path_node_t *cast = (const pm_constant_path_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the parent field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"parent\":", 9); + if (cast->parent != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->parent); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the name field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"name\":", 7); + if (cast->name != PM_CONSTANT_ID_UNSET) { + pm_dump_json_constant(buffer, parser, cast->name); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the delimiter_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"delimiter_loc\":", 16); + pm_dump_json_location(buffer, &cast->delimiter_loc); + + // Dump the name_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"name_loc\":", 11); + pm_dump_json_location(buffer, &cast->name_loc); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_CONSTANT_PATH_OPERATOR_WRITE_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"ConstantPathOperatorWriteNode\",\"location\":", 51); + + const pm_constant_path_operator_write_node_t *cast = (const pm_constant_path_operator_write_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the target field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"target\":", 9); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->target); + + // Dump the binary_operator_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"binary_operator_loc\":", 22); + pm_dump_json_location(buffer, &cast->binary_operator_loc); + + // Dump the value field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"value\":", 8); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->value); + + // Dump the binary_operator field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"binary_operator\":", 18); + pm_dump_json_constant(buffer, parser, cast->binary_operator); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_CONSTANT_PATH_OR_WRITE_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"ConstantPathOrWriteNode\",\"location\":", 45); + + const pm_constant_path_or_write_node_t *cast = (const pm_constant_path_or_write_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the target field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"target\":", 9); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->target); + + // Dump the operator_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"operator_loc\":", 15); + pm_dump_json_location(buffer, &cast->operator_loc); + + // Dump the value field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"value\":", 8); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->value); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_CONSTANT_PATH_TARGET_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"ConstantPathTargetNode\",\"location\":", 44); + + const pm_constant_path_target_node_t *cast = (const pm_constant_path_target_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the parent field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"parent\":", 9); + if (cast->parent != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->parent); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the name field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"name\":", 7); + if (cast->name != PM_CONSTANT_ID_UNSET) { + pm_dump_json_constant(buffer, parser, cast->name); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the delimiter_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"delimiter_loc\":", 16); + pm_dump_json_location(buffer, &cast->delimiter_loc); + + // Dump the name_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"name_loc\":", 11); + pm_dump_json_location(buffer, &cast->name_loc); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_CONSTANT_PATH_WRITE_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"ConstantPathWriteNode\",\"location\":", 43); + + const pm_constant_path_write_node_t *cast = (const pm_constant_path_write_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the target field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"target\":", 9); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->target); + + // Dump the operator_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"operator_loc\":", 15); + pm_dump_json_location(buffer, &cast->operator_loc); + + // Dump the value field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"value\":", 8); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->value); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_CONSTANT_READ_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"ConstantReadNode\",\"location\":", 38); + + const pm_constant_read_node_t *cast = (const pm_constant_read_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the name field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"name\":", 7); + pm_dump_json_constant(buffer, parser, cast->name); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_CONSTANT_TARGET_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"ConstantTargetNode\",\"location\":", 40); + + const pm_constant_target_node_t *cast = (const pm_constant_target_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the name field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"name\":", 7); + pm_dump_json_constant(buffer, parser, cast->name); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_CONSTANT_WRITE_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"ConstantWriteNode\",\"location\":", 39); + + const pm_constant_write_node_t *cast = (const pm_constant_write_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the name field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"name\":", 7); + pm_dump_json_constant(buffer, parser, cast->name); + + // Dump the name_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"name_loc\":", 11); + pm_dump_json_location(buffer, &cast->name_loc); + + // Dump the value field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"value\":", 8); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->value); + + // Dump the operator_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"operator_loc\":", 15); + pm_dump_json_location(buffer, &cast->operator_loc); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_DEF_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"DefNode\",\"location\":", 29); + + const pm_def_node_t *cast = (const pm_def_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the name field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"name\":", 7); + pm_dump_json_constant(buffer, parser, cast->name); + + // Dump the name_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"name_loc\":", 11); + pm_dump_json_location(buffer, &cast->name_loc); + + // Dump the receiver field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"receiver\":", 11); + if (cast->receiver != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->receiver); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the parameters field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"parameters\":", 13); + if (cast->parameters != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->parameters); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the body field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"body\":", 7); + if (cast->body != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->body); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the locals field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"locals\":", 9); + const pm_constant_id_list_t *locals = &cast->locals; + pm_buffer_append_byte(buffer, '['); + + for (size_t index = 0; index < locals->size; index++) { + if (index != 0) pm_buffer_append_byte(buffer, ','); + pm_dump_json_constant(buffer, parser, locals->ids[index]); + } + pm_buffer_append_byte(buffer, ']'); + + // Dump the def_keyword_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"def_keyword_loc\":", 18); + pm_dump_json_location(buffer, &cast->def_keyword_loc); + + // Dump the operator_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"operator_loc\":", 15); + if (cast->operator_loc.length != 0) { + pm_dump_json_location(buffer, &cast->operator_loc); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the lparen_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"lparen_loc\":", 13); + if (cast->lparen_loc.length != 0) { + pm_dump_json_location(buffer, &cast->lparen_loc); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the rparen_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"rparen_loc\":", 13); + if (cast->rparen_loc.length != 0) { + pm_dump_json_location(buffer, &cast->rparen_loc); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the equal_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"equal_loc\":", 12); + if (cast->equal_loc.length != 0) { + pm_dump_json_location(buffer, &cast->equal_loc); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the end_keyword_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"end_keyword_loc\":", 18); + if (cast->end_keyword_loc.length != 0) { + pm_dump_json_location(buffer, &cast->end_keyword_loc); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_DEFINED_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"DefinedNode\",\"location\":", 33); + + const pm_defined_node_t *cast = (const pm_defined_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the lparen_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"lparen_loc\":", 13); + if (cast->lparen_loc.length != 0) { + pm_dump_json_location(buffer, &cast->lparen_loc); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the value field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"value\":", 8); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->value); + + // Dump the rparen_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"rparen_loc\":", 13); + if (cast->rparen_loc.length != 0) { + pm_dump_json_location(buffer, &cast->rparen_loc); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the keyword_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"keyword_loc\":", 14); + pm_dump_json_location(buffer, &cast->keyword_loc); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_ELSE_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"ElseNode\",\"location\":", 30); + + const pm_else_node_t *cast = (const pm_else_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the else_keyword_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"else_keyword_loc\":", 19); + pm_dump_json_location(buffer, &cast->else_keyword_loc); + + // Dump the statements field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"statements\":", 13); + if (cast->statements != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->statements); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the end_keyword_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"end_keyword_loc\":", 18); + if (cast->end_keyword_loc.length != 0) { + pm_dump_json_location(buffer, &cast->end_keyword_loc); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_EMBEDDED_STATEMENTS_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"EmbeddedStatementsNode\",\"location\":", 44); + + const pm_embedded_statements_node_t *cast = (const pm_embedded_statements_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the opening_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"opening_loc\":", 14); + pm_dump_json_location(buffer, &cast->opening_loc); + + // Dump the statements field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"statements\":", 13); + if (cast->statements != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->statements); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the closing_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"closing_loc\":", 14); + pm_dump_json_location(buffer, &cast->closing_loc); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_EMBEDDED_VARIABLE_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"EmbeddedVariableNode\",\"location\":", 42); + + const pm_embedded_variable_node_t *cast = (const pm_embedded_variable_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the operator_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"operator_loc\":", 15); + pm_dump_json_location(buffer, &cast->operator_loc); + + // Dump the variable field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"variable\":", 11); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->variable); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_ENSURE_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"EnsureNode\",\"location\":", 32); + + const pm_ensure_node_t *cast = (const pm_ensure_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the ensure_keyword_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"ensure_keyword_loc\":", 21); + pm_dump_json_location(buffer, &cast->ensure_keyword_loc); + + // Dump the statements field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"statements\":", 13); + if (cast->statements != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->statements); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the end_keyword_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"end_keyword_loc\":", 18); + pm_dump_json_location(buffer, &cast->end_keyword_loc); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_FALSE_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"FalseNode\",\"location\":", 31); + + const pm_false_node_t *cast = (const pm_false_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_FIND_PATTERN_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"FindPatternNode\",\"location\":", 37); + + const pm_find_pattern_node_t *cast = (const pm_find_pattern_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the constant field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"constant\":", 11); + if (cast->constant != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->constant); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the left field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"left\":", 7); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->left); + + // Dump the requireds field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"requireds\":", 12); + const pm_node_list_t *requireds = &cast->requireds; + pm_buffer_append_byte(buffer, '['); + + for (size_t index = 0; index < requireds->size; index++) { + if (index != 0) pm_buffer_append_byte(buffer, ','); + pm_dump_json(buffer, parser, requireds->nodes[index]); + } + pm_buffer_append_byte(buffer, ']'); + + // Dump the right field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"right\":", 8); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->right); + + // Dump the opening_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"opening_loc\":", 14); + if (cast->opening_loc.length != 0) { + pm_dump_json_location(buffer, &cast->opening_loc); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the closing_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"closing_loc\":", 14); + if (cast->closing_loc.length != 0) { + pm_dump_json_location(buffer, &cast->closing_loc); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_FLIP_FLOP_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"FlipFlopNode\",\"location\":", 34); + + const pm_flip_flop_node_t *cast = (const pm_flip_flop_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the RangeFlags field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"flags\":", 8); + size_t flags = 0; + pm_buffer_append_byte(buffer, '['); + if (PM_NODE_FLAG_P(cast, PM_RANGE_FLAGS_EXCLUDE_END)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"EXCLUDE_END\"", 13); + flags++; + } + pm_buffer_append_byte(buffer, ']'); + + // Dump the left field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"left\":", 7); + if (cast->left != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->left); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the right field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"right\":", 8); + if (cast->right != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->right); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the operator_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"operator_loc\":", 15); + pm_dump_json_location(buffer, &cast->operator_loc); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_FLOAT_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"FloatNode\",\"location\":", 31); + + const pm_float_node_t *cast = (const pm_float_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the value field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"value\":", 8); + pm_buffer_append_format(buffer, "%f", cast->value); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_FOR_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"ForNode\",\"location\":", 29); + + const pm_for_node_t *cast = (const pm_for_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the index field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"index\":", 8); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->index); + + // Dump the collection field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"collection\":", 13); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->collection); + + // Dump the statements field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"statements\":", 13); + if (cast->statements != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->statements); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the for_keyword_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"for_keyword_loc\":", 18); + pm_dump_json_location(buffer, &cast->for_keyword_loc); + + // Dump the in_keyword_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"in_keyword_loc\":", 17); + pm_dump_json_location(buffer, &cast->in_keyword_loc); + + // Dump the do_keyword_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"do_keyword_loc\":", 17); + if (cast->do_keyword_loc.length != 0) { + pm_dump_json_location(buffer, &cast->do_keyword_loc); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the end_keyword_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"end_keyword_loc\":", 18); + pm_dump_json_location(buffer, &cast->end_keyword_loc); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_FORWARDING_ARGUMENTS_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"ForwardingArgumentsNode\",\"location\":", 45); + + const pm_forwarding_arguments_node_t *cast = (const pm_forwarding_arguments_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_FORWARDING_PARAMETER_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"ForwardingParameterNode\",\"location\":", 45); + + const pm_forwarding_parameter_node_t *cast = (const pm_forwarding_parameter_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_FORWARDING_SUPER_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"ForwardingSuperNode\",\"location\":", 41); + + const pm_forwarding_super_node_t *cast = (const pm_forwarding_super_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the block field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"block\":", 8); + if (cast->block != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->block); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_GLOBAL_VARIABLE_AND_WRITE_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"GlobalVariableAndWriteNode\",\"location\":", 48); + + const pm_global_variable_and_write_node_t *cast = (const pm_global_variable_and_write_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the name field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"name\":", 7); + pm_dump_json_constant(buffer, parser, cast->name); + + // Dump the name_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"name_loc\":", 11); + pm_dump_json_location(buffer, &cast->name_loc); + + // Dump the operator_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"operator_loc\":", 15); + pm_dump_json_location(buffer, &cast->operator_loc); + + // Dump the value field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"value\":", 8); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->value); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_GLOBAL_VARIABLE_OPERATOR_WRITE_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"GlobalVariableOperatorWriteNode\",\"location\":", 53); + + const pm_global_variable_operator_write_node_t *cast = (const pm_global_variable_operator_write_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the name field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"name\":", 7); + pm_dump_json_constant(buffer, parser, cast->name); + + // Dump the name_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"name_loc\":", 11); + pm_dump_json_location(buffer, &cast->name_loc); + + // Dump the binary_operator_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"binary_operator_loc\":", 22); + pm_dump_json_location(buffer, &cast->binary_operator_loc); + + // Dump the value field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"value\":", 8); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->value); + + // Dump the binary_operator field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"binary_operator\":", 18); + pm_dump_json_constant(buffer, parser, cast->binary_operator); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_GLOBAL_VARIABLE_OR_WRITE_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"GlobalVariableOrWriteNode\",\"location\":", 47); + + const pm_global_variable_or_write_node_t *cast = (const pm_global_variable_or_write_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the name field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"name\":", 7); + pm_dump_json_constant(buffer, parser, cast->name); + + // Dump the name_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"name_loc\":", 11); + pm_dump_json_location(buffer, &cast->name_loc); + + // Dump the operator_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"operator_loc\":", 15); + pm_dump_json_location(buffer, &cast->operator_loc); + + // Dump the value field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"value\":", 8); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->value); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_GLOBAL_VARIABLE_READ_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"GlobalVariableReadNode\",\"location\":", 44); + + const pm_global_variable_read_node_t *cast = (const pm_global_variable_read_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the name field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"name\":", 7); + pm_dump_json_constant(buffer, parser, cast->name); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_GLOBAL_VARIABLE_TARGET_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"GlobalVariableTargetNode\",\"location\":", 46); + + const pm_global_variable_target_node_t *cast = (const pm_global_variable_target_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the name field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"name\":", 7); + pm_dump_json_constant(buffer, parser, cast->name); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_GLOBAL_VARIABLE_WRITE_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"GlobalVariableWriteNode\",\"location\":", 45); + + const pm_global_variable_write_node_t *cast = (const pm_global_variable_write_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the name field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"name\":", 7); + pm_dump_json_constant(buffer, parser, cast->name); + + // Dump the name_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"name_loc\":", 11); + pm_dump_json_location(buffer, &cast->name_loc); + + // Dump the value field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"value\":", 8); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->value); + + // Dump the operator_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"operator_loc\":", 15); + pm_dump_json_location(buffer, &cast->operator_loc); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_HASH_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"HashNode\",\"location\":", 30); + + const pm_hash_node_t *cast = (const pm_hash_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the opening_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"opening_loc\":", 14); + pm_dump_json_location(buffer, &cast->opening_loc); + + // Dump the elements field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"elements\":", 11); + const pm_node_list_t *elements = &cast->elements; + pm_buffer_append_byte(buffer, '['); + + for (size_t index = 0; index < elements->size; index++) { + if (index != 0) pm_buffer_append_byte(buffer, ','); + pm_dump_json(buffer, parser, elements->nodes[index]); + } + pm_buffer_append_byte(buffer, ']'); + + // Dump the closing_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"closing_loc\":", 14); + pm_dump_json_location(buffer, &cast->closing_loc); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_HASH_PATTERN_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"HashPatternNode\",\"location\":", 37); + + const pm_hash_pattern_node_t *cast = (const pm_hash_pattern_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the constant field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"constant\":", 11); + if (cast->constant != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->constant); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the elements field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"elements\":", 11); + const pm_node_list_t *elements = &cast->elements; + pm_buffer_append_byte(buffer, '['); + + for (size_t index = 0; index < elements->size; index++) { + if (index != 0) pm_buffer_append_byte(buffer, ','); + pm_dump_json(buffer, parser, elements->nodes[index]); + } + pm_buffer_append_byte(buffer, ']'); + + // Dump the rest field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"rest\":", 7); + if (cast->rest != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->rest); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the opening_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"opening_loc\":", 14); + if (cast->opening_loc.length != 0) { + pm_dump_json_location(buffer, &cast->opening_loc); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the closing_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"closing_loc\":", 14); + if (cast->closing_loc.length != 0) { + pm_dump_json_location(buffer, &cast->closing_loc); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_IF_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"IfNode\",\"location\":", 28); + + const pm_if_node_t *cast = (const pm_if_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the if_keyword_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"if_keyword_loc\":", 17); + if (cast->if_keyword_loc.length != 0) { + pm_dump_json_location(buffer, &cast->if_keyword_loc); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the predicate field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"predicate\":", 12); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->predicate); + + // Dump the then_keyword_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"then_keyword_loc\":", 19); + if (cast->then_keyword_loc.length != 0) { + pm_dump_json_location(buffer, &cast->then_keyword_loc); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the statements field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"statements\":", 13); + if (cast->statements != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->statements); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the subsequent field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"subsequent\":", 13); + if (cast->subsequent != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->subsequent); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the end_keyword_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"end_keyword_loc\":", 18); + if (cast->end_keyword_loc.length != 0) { + pm_dump_json_location(buffer, &cast->end_keyword_loc); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_IMAGINARY_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"ImaginaryNode\",\"location\":", 35); + + const pm_imaginary_node_t *cast = (const pm_imaginary_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the numeric field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"numeric\":", 10); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->numeric); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_IMPLICIT_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"ImplicitNode\",\"location\":", 34); + + const pm_implicit_node_t *cast = (const pm_implicit_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the value field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"value\":", 8); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->value); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_IMPLICIT_REST_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"ImplicitRestNode\",\"location\":", 38); + + const pm_implicit_rest_node_t *cast = (const pm_implicit_rest_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_IN_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"InNode\",\"location\":", 28); + + const pm_in_node_t *cast = (const pm_in_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the pattern field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"pattern\":", 10); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->pattern); + + // Dump the statements field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"statements\":", 13); + if (cast->statements != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->statements); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the in_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"in_loc\":", 9); + pm_dump_json_location(buffer, &cast->in_loc); + + // Dump the then_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"then_loc\":", 11); + if (cast->then_loc.length != 0) { + pm_dump_json_location(buffer, &cast->then_loc); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_INDEX_AND_WRITE_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"IndexAndWriteNode\",\"location\":", 39); + + const pm_index_and_write_node_t *cast = (const pm_index_and_write_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the CallNodeFlags field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"flags\":", 8); + size_t flags = 0; + pm_buffer_append_byte(buffer, '['); + if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"SAFE_NAVIGATION\"", 17); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"VARIABLE_CALL\"", 15); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"ATTRIBUTE_WRITE\"", 17); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"IGNORE_VISIBILITY\"", 19); + flags++; + } + pm_buffer_append_byte(buffer, ']'); + + // Dump the receiver field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"receiver\":", 11); + if (cast->receiver != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->receiver); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the call_operator_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"call_operator_loc\":", 20); + if (cast->call_operator_loc.length != 0) { + pm_dump_json_location(buffer, &cast->call_operator_loc); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the opening_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"opening_loc\":", 14); + pm_dump_json_location(buffer, &cast->opening_loc); + + // Dump the arguments field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"arguments\":", 12); + if (cast->arguments != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->arguments); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the closing_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"closing_loc\":", 14); + pm_dump_json_location(buffer, &cast->closing_loc); + + // Dump the block field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"block\":", 8); + if (cast->block != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->block); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the operator_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"operator_loc\":", 15); + pm_dump_json_location(buffer, &cast->operator_loc); + + // Dump the value field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"value\":", 8); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->value); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_INDEX_OPERATOR_WRITE_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"IndexOperatorWriteNode\",\"location\":", 44); + + const pm_index_operator_write_node_t *cast = (const pm_index_operator_write_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the CallNodeFlags field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"flags\":", 8); + size_t flags = 0; + pm_buffer_append_byte(buffer, '['); + if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"SAFE_NAVIGATION\"", 17); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"VARIABLE_CALL\"", 15); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"ATTRIBUTE_WRITE\"", 17); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"IGNORE_VISIBILITY\"", 19); + flags++; + } + pm_buffer_append_byte(buffer, ']'); + + // Dump the receiver field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"receiver\":", 11); + if (cast->receiver != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->receiver); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the call_operator_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"call_operator_loc\":", 20); + if (cast->call_operator_loc.length != 0) { + pm_dump_json_location(buffer, &cast->call_operator_loc); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the opening_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"opening_loc\":", 14); + pm_dump_json_location(buffer, &cast->opening_loc); + + // Dump the arguments field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"arguments\":", 12); + if (cast->arguments != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->arguments); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the closing_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"closing_loc\":", 14); + pm_dump_json_location(buffer, &cast->closing_loc); + + // Dump the block field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"block\":", 8); + if (cast->block != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->block); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the binary_operator field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"binary_operator\":", 18); + pm_dump_json_constant(buffer, parser, cast->binary_operator); + + // Dump the binary_operator_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"binary_operator_loc\":", 22); + pm_dump_json_location(buffer, &cast->binary_operator_loc); + + // Dump the value field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"value\":", 8); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->value); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_INDEX_OR_WRITE_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"IndexOrWriteNode\",\"location\":", 38); + + const pm_index_or_write_node_t *cast = (const pm_index_or_write_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the CallNodeFlags field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"flags\":", 8); + size_t flags = 0; + pm_buffer_append_byte(buffer, '['); + if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"SAFE_NAVIGATION\"", 17); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"VARIABLE_CALL\"", 15); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"ATTRIBUTE_WRITE\"", 17); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"IGNORE_VISIBILITY\"", 19); + flags++; + } + pm_buffer_append_byte(buffer, ']'); + + // Dump the receiver field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"receiver\":", 11); + if (cast->receiver != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->receiver); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the call_operator_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"call_operator_loc\":", 20); + if (cast->call_operator_loc.length != 0) { + pm_dump_json_location(buffer, &cast->call_operator_loc); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the opening_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"opening_loc\":", 14); + pm_dump_json_location(buffer, &cast->opening_loc); + + // Dump the arguments field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"arguments\":", 12); + if (cast->arguments != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->arguments); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the closing_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"closing_loc\":", 14); + pm_dump_json_location(buffer, &cast->closing_loc); + + // Dump the block field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"block\":", 8); + if (cast->block != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->block); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the operator_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"operator_loc\":", 15); + pm_dump_json_location(buffer, &cast->operator_loc); + + // Dump the value field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"value\":", 8); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->value); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_INDEX_TARGET_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"IndexTargetNode\",\"location\":", 37); + + const pm_index_target_node_t *cast = (const pm_index_target_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the CallNodeFlags field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"flags\":", 8); + size_t flags = 0; + pm_buffer_append_byte(buffer, '['); + if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"SAFE_NAVIGATION\"", 17); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"VARIABLE_CALL\"", 15); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"ATTRIBUTE_WRITE\"", 17); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"IGNORE_VISIBILITY\"", 19); + flags++; + } + pm_buffer_append_byte(buffer, ']'); + + // Dump the receiver field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"receiver\":", 11); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->receiver); + + // Dump the opening_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"opening_loc\":", 14); + pm_dump_json_location(buffer, &cast->opening_loc); + + // Dump the arguments field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"arguments\":", 12); + if (cast->arguments != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->arguments); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the closing_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"closing_loc\":", 14); + pm_dump_json_location(buffer, &cast->closing_loc); + + // Dump the block field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"block\":", 8); + if (cast->block != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->block); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_INSTANCE_VARIABLE_AND_WRITE_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"InstanceVariableAndWriteNode\",\"location\":", 50); + + const pm_instance_variable_and_write_node_t *cast = (const pm_instance_variable_and_write_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the name field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"name\":", 7); + pm_dump_json_constant(buffer, parser, cast->name); + + // Dump the name_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"name_loc\":", 11); + pm_dump_json_location(buffer, &cast->name_loc); + + // Dump the operator_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"operator_loc\":", 15); + pm_dump_json_location(buffer, &cast->operator_loc); + + // Dump the value field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"value\":", 8); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->value); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_INSTANCE_VARIABLE_OPERATOR_WRITE_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"InstanceVariableOperatorWriteNode\",\"location\":", 55); + + const pm_instance_variable_operator_write_node_t *cast = (const pm_instance_variable_operator_write_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the name field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"name\":", 7); + pm_dump_json_constant(buffer, parser, cast->name); + + // Dump the name_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"name_loc\":", 11); + pm_dump_json_location(buffer, &cast->name_loc); + + // Dump the binary_operator_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"binary_operator_loc\":", 22); + pm_dump_json_location(buffer, &cast->binary_operator_loc); + + // Dump the value field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"value\":", 8); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->value); + + // Dump the binary_operator field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"binary_operator\":", 18); + pm_dump_json_constant(buffer, parser, cast->binary_operator); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_INSTANCE_VARIABLE_OR_WRITE_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"InstanceVariableOrWriteNode\",\"location\":", 49); + + const pm_instance_variable_or_write_node_t *cast = (const pm_instance_variable_or_write_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the name field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"name\":", 7); + pm_dump_json_constant(buffer, parser, cast->name); + + // Dump the name_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"name_loc\":", 11); + pm_dump_json_location(buffer, &cast->name_loc); + + // Dump the operator_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"operator_loc\":", 15); + pm_dump_json_location(buffer, &cast->operator_loc); + + // Dump the value field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"value\":", 8); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->value); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_INSTANCE_VARIABLE_READ_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"InstanceVariableReadNode\",\"location\":", 46); + + const pm_instance_variable_read_node_t *cast = (const pm_instance_variable_read_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the name field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"name\":", 7); + pm_dump_json_constant(buffer, parser, cast->name); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_INSTANCE_VARIABLE_TARGET_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"InstanceVariableTargetNode\",\"location\":", 48); + + const pm_instance_variable_target_node_t *cast = (const pm_instance_variable_target_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the name field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"name\":", 7); + pm_dump_json_constant(buffer, parser, cast->name); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_INSTANCE_VARIABLE_WRITE_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"InstanceVariableWriteNode\",\"location\":", 47); + + const pm_instance_variable_write_node_t *cast = (const pm_instance_variable_write_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the name field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"name\":", 7); + pm_dump_json_constant(buffer, parser, cast->name); + + // Dump the name_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"name_loc\":", 11); + pm_dump_json_location(buffer, &cast->name_loc); + + // Dump the value field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"value\":", 8); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->value); + + // Dump the operator_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"operator_loc\":", 15); + pm_dump_json_location(buffer, &cast->operator_loc); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_INTEGER_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"IntegerNode\",\"location\":", 33); + + const pm_integer_node_t *cast = (const pm_integer_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the IntegerBaseFlags field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"flags\":", 8); + size_t flags = 0; + pm_buffer_append_byte(buffer, '['); + if (PM_NODE_FLAG_P(cast, PM_INTEGER_BASE_FLAGS_BINARY)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"BINARY\"", 8); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_INTEGER_BASE_FLAGS_DECIMAL)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"DECIMAL\"", 9); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_INTEGER_BASE_FLAGS_OCTAL)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"OCTAL\"", 7); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_INTEGER_BASE_FLAGS_HEXADECIMAL)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"HEXADECIMAL\"", 13); + flags++; + } + pm_buffer_append_byte(buffer, ']'); + + // Dump the value field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"value\":", 8); + pm_integer_string(buffer, &cast->value); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_INTERPOLATED_MATCH_LAST_LINE_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"InterpolatedMatchLastLineNode\",\"location\":", 51); + + const pm_interpolated_match_last_line_node_t *cast = (const pm_interpolated_match_last_line_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the RegularExpressionFlags field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"flags\":", 8); + size_t flags = 0; + pm_buffer_append_byte(buffer, '['); + if (PM_NODE_FLAG_P(cast, PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"IGNORE_CASE\"", 13); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"EXTENDED\"", 10); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"MULTI_LINE\"", 12); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_REGULAR_EXPRESSION_FLAGS_ONCE)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"ONCE\"", 6); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_REGULAR_EXPRESSION_FLAGS_EUC_JP)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"EUC_JP\"", 8); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"ASCII_8BIT\"", 12); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"WINDOWS_31J\"", 13); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_REGULAR_EXPRESSION_FLAGS_UTF_8)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"UTF_8\"", 7); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_REGULAR_EXPRESSION_FLAGS_FORCED_UTF8_ENCODING)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"FORCED_UTF8_ENCODING\"", 22); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_REGULAR_EXPRESSION_FLAGS_FORCED_BINARY_ENCODING)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"FORCED_BINARY_ENCODING\"", 24); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"FORCED_US_ASCII_ENCODING\"", 26); + flags++; + } + pm_buffer_append_byte(buffer, ']'); + + // Dump the opening_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"opening_loc\":", 14); + pm_dump_json_location(buffer, &cast->opening_loc); + + // Dump the parts field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"parts\":", 8); + const pm_node_list_t *parts = &cast->parts; + pm_buffer_append_byte(buffer, '['); + + for (size_t index = 0; index < parts->size; index++) { + if (index != 0) pm_buffer_append_byte(buffer, ','); + pm_dump_json(buffer, parser, parts->nodes[index]); + } + pm_buffer_append_byte(buffer, ']'); + + // Dump the closing_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"closing_loc\":", 14); + pm_dump_json_location(buffer, &cast->closing_loc); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"InterpolatedRegularExpressionNode\",\"location\":", 55); + + const pm_interpolated_regular_expression_node_t *cast = (const pm_interpolated_regular_expression_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the RegularExpressionFlags field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"flags\":", 8); + size_t flags = 0; + pm_buffer_append_byte(buffer, '['); + if (PM_NODE_FLAG_P(cast, PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"IGNORE_CASE\"", 13); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"EXTENDED\"", 10); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"MULTI_LINE\"", 12); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_REGULAR_EXPRESSION_FLAGS_ONCE)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"ONCE\"", 6); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_REGULAR_EXPRESSION_FLAGS_EUC_JP)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"EUC_JP\"", 8); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"ASCII_8BIT\"", 12); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"WINDOWS_31J\"", 13); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_REGULAR_EXPRESSION_FLAGS_UTF_8)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"UTF_8\"", 7); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_REGULAR_EXPRESSION_FLAGS_FORCED_UTF8_ENCODING)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"FORCED_UTF8_ENCODING\"", 22); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_REGULAR_EXPRESSION_FLAGS_FORCED_BINARY_ENCODING)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"FORCED_BINARY_ENCODING\"", 24); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"FORCED_US_ASCII_ENCODING\"", 26); + flags++; + } + pm_buffer_append_byte(buffer, ']'); + + // Dump the opening_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"opening_loc\":", 14); + pm_dump_json_location(buffer, &cast->opening_loc); + + // Dump the parts field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"parts\":", 8); + const pm_node_list_t *parts = &cast->parts; + pm_buffer_append_byte(buffer, '['); + + for (size_t index = 0; index < parts->size; index++) { + if (index != 0) pm_buffer_append_byte(buffer, ','); + pm_dump_json(buffer, parser, parts->nodes[index]); + } + pm_buffer_append_byte(buffer, ']'); + + // Dump the closing_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"closing_loc\":", 14); + pm_dump_json_location(buffer, &cast->closing_loc); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_INTERPOLATED_STRING_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"InterpolatedStringNode\",\"location\":", 44); + + const pm_interpolated_string_node_t *cast = (const pm_interpolated_string_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the InterpolatedStringNodeFlags field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"flags\":", 8); + size_t flags = 0; + pm_buffer_append_byte(buffer, '['); + if (PM_NODE_FLAG_P(cast, PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"FROZEN\"", 8); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"MUTABLE\"", 9); + flags++; + } + pm_buffer_append_byte(buffer, ']'); + + // Dump the opening_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"opening_loc\":", 14); + if (cast->opening_loc.length != 0) { + pm_dump_json_location(buffer, &cast->opening_loc); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the parts field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"parts\":", 8); + const pm_node_list_t *parts = &cast->parts; + pm_buffer_append_byte(buffer, '['); + + for (size_t index = 0; index < parts->size; index++) { + if (index != 0) pm_buffer_append_byte(buffer, ','); + pm_dump_json(buffer, parser, parts->nodes[index]); + } + pm_buffer_append_byte(buffer, ']'); + + // Dump the closing_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"closing_loc\":", 14); + if (cast->closing_loc.length != 0) { + pm_dump_json_location(buffer, &cast->closing_loc); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_INTERPOLATED_SYMBOL_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"InterpolatedSymbolNode\",\"location\":", 44); + + const pm_interpolated_symbol_node_t *cast = (const pm_interpolated_symbol_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the opening_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"opening_loc\":", 14); + if (cast->opening_loc.length != 0) { + pm_dump_json_location(buffer, &cast->opening_loc); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the parts field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"parts\":", 8); + const pm_node_list_t *parts = &cast->parts; + pm_buffer_append_byte(buffer, '['); + + for (size_t index = 0; index < parts->size; index++) { + if (index != 0) pm_buffer_append_byte(buffer, ','); + pm_dump_json(buffer, parser, parts->nodes[index]); + } + pm_buffer_append_byte(buffer, ']'); + + // Dump the closing_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"closing_loc\":", 14); + if (cast->closing_loc.length != 0) { + pm_dump_json_location(buffer, &cast->closing_loc); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_INTERPOLATED_X_STRING_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"InterpolatedXStringNode\",\"location\":", 45); + + const pm_interpolated_x_string_node_t *cast = (const pm_interpolated_x_string_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the opening_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"opening_loc\":", 14); + pm_dump_json_location(buffer, &cast->opening_loc); + + // Dump the parts field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"parts\":", 8); + const pm_node_list_t *parts = &cast->parts; + pm_buffer_append_byte(buffer, '['); + + for (size_t index = 0; index < parts->size; index++) { + if (index != 0) pm_buffer_append_byte(buffer, ','); + pm_dump_json(buffer, parser, parts->nodes[index]); + } + pm_buffer_append_byte(buffer, ']'); + + // Dump the closing_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"closing_loc\":", 14); + pm_dump_json_location(buffer, &cast->closing_loc); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_IT_LOCAL_VARIABLE_READ_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"ItLocalVariableReadNode\",\"location\":", 45); + + const pm_it_local_variable_read_node_t *cast = (const pm_it_local_variable_read_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_IT_PARAMETERS_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"ItParametersNode\",\"location\":", 38); + + const pm_it_parameters_node_t *cast = (const pm_it_parameters_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_KEYWORD_HASH_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"KeywordHashNode\",\"location\":", 37); + + const pm_keyword_hash_node_t *cast = (const pm_keyword_hash_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the KeywordHashNodeFlags field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"flags\":", 8); + size_t flags = 0; + pm_buffer_append_byte(buffer, '['); + if (PM_NODE_FLAG_P(cast, PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"SYMBOL_KEYS\"", 13); + flags++; + } + pm_buffer_append_byte(buffer, ']'); + + // Dump the elements field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"elements\":", 11); + const pm_node_list_t *elements = &cast->elements; + pm_buffer_append_byte(buffer, '['); + + for (size_t index = 0; index < elements->size; index++) { + if (index != 0) pm_buffer_append_byte(buffer, ','); + pm_dump_json(buffer, parser, elements->nodes[index]); + } + pm_buffer_append_byte(buffer, ']'); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_KEYWORD_REST_PARAMETER_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"KeywordRestParameterNode\",\"location\":", 46); + + const pm_keyword_rest_parameter_node_t *cast = (const pm_keyword_rest_parameter_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the ParameterFlags field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"flags\":", 8); + size_t flags = 0; + pm_buffer_append_byte(buffer, '['); + if (PM_NODE_FLAG_P(cast, PM_PARAMETER_FLAGS_REPEATED_PARAMETER)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"REPEATED_PARAMETER\"", 20); + flags++; + } + pm_buffer_append_byte(buffer, ']'); + + // Dump the name field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"name\":", 7); + if (cast->name != PM_CONSTANT_ID_UNSET) { + pm_dump_json_constant(buffer, parser, cast->name); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the name_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"name_loc\":", 11); + if (cast->name_loc.length != 0) { + pm_dump_json_location(buffer, &cast->name_loc); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the operator_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"operator_loc\":", 15); + pm_dump_json_location(buffer, &cast->operator_loc); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_LAMBDA_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"LambdaNode\",\"location\":", 32); + + const pm_lambda_node_t *cast = (const pm_lambda_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the locals field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"locals\":", 9); + const pm_constant_id_list_t *locals = &cast->locals; + pm_buffer_append_byte(buffer, '['); + + for (size_t index = 0; index < locals->size; index++) { + if (index != 0) pm_buffer_append_byte(buffer, ','); + pm_dump_json_constant(buffer, parser, locals->ids[index]); + } + pm_buffer_append_byte(buffer, ']'); + + // Dump the operator_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"operator_loc\":", 15); + pm_dump_json_location(buffer, &cast->operator_loc); + + // Dump the opening_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"opening_loc\":", 14); + pm_dump_json_location(buffer, &cast->opening_loc); + + // Dump the closing_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"closing_loc\":", 14); + pm_dump_json_location(buffer, &cast->closing_loc); + + // Dump the parameters field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"parameters\":", 13); + if (cast->parameters != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->parameters); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the body field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"body\":", 7); + if (cast->body != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->body); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_LOCAL_VARIABLE_AND_WRITE_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"LocalVariableAndWriteNode\",\"location\":", 47); + + const pm_local_variable_and_write_node_t *cast = (const pm_local_variable_and_write_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the name_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"name_loc\":", 11); + pm_dump_json_location(buffer, &cast->name_loc); + + // Dump the operator_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"operator_loc\":", 15); + pm_dump_json_location(buffer, &cast->operator_loc); + + // Dump the value field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"value\":", 8); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->value); + + // Dump the name field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"name\":", 7); + pm_dump_json_constant(buffer, parser, cast->name); + + // Dump the depth field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"depth\":", 8); + pm_buffer_append_format(buffer, "%" PRIu32, cast->depth); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_LOCAL_VARIABLE_OPERATOR_WRITE_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"LocalVariableOperatorWriteNode\",\"location\":", 52); + + const pm_local_variable_operator_write_node_t *cast = (const pm_local_variable_operator_write_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the name_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"name_loc\":", 11); + pm_dump_json_location(buffer, &cast->name_loc); + + // Dump the binary_operator_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"binary_operator_loc\":", 22); + pm_dump_json_location(buffer, &cast->binary_operator_loc); + + // Dump the value field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"value\":", 8); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->value); + + // Dump the name field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"name\":", 7); + pm_dump_json_constant(buffer, parser, cast->name); + + // Dump the binary_operator field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"binary_operator\":", 18); + pm_dump_json_constant(buffer, parser, cast->binary_operator); + + // Dump the depth field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"depth\":", 8); + pm_buffer_append_format(buffer, "%" PRIu32, cast->depth); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_LOCAL_VARIABLE_OR_WRITE_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"LocalVariableOrWriteNode\",\"location\":", 46); + + const pm_local_variable_or_write_node_t *cast = (const pm_local_variable_or_write_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the name_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"name_loc\":", 11); + pm_dump_json_location(buffer, &cast->name_loc); + + // Dump the operator_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"operator_loc\":", 15); + pm_dump_json_location(buffer, &cast->operator_loc); + + // Dump the value field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"value\":", 8); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->value); + + // Dump the name field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"name\":", 7); + pm_dump_json_constant(buffer, parser, cast->name); + + // Dump the depth field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"depth\":", 8); + pm_buffer_append_format(buffer, "%" PRIu32, cast->depth); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_LOCAL_VARIABLE_READ_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"LocalVariableReadNode\",\"location\":", 43); + + const pm_local_variable_read_node_t *cast = (const pm_local_variable_read_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the name field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"name\":", 7); + pm_dump_json_constant(buffer, parser, cast->name); + + // Dump the depth field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"depth\":", 8); + pm_buffer_append_format(buffer, "%" PRIu32, cast->depth); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_LOCAL_VARIABLE_TARGET_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"LocalVariableTargetNode\",\"location\":", 45); + + const pm_local_variable_target_node_t *cast = (const pm_local_variable_target_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the name field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"name\":", 7); + pm_dump_json_constant(buffer, parser, cast->name); + + // Dump the depth field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"depth\":", 8); + pm_buffer_append_format(buffer, "%" PRIu32, cast->depth); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_LOCAL_VARIABLE_WRITE_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"LocalVariableWriteNode\",\"location\":", 44); + + const pm_local_variable_write_node_t *cast = (const pm_local_variable_write_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the name field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"name\":", 7); + pm_dump_json_constant(buffer, parser, cast->name); + + // Dump the depth field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"depth\":", 8); + pm_buffer_append_format(buffer, "%" PRIu32, cast->depth); + + // Dump the name_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"name_loc\":", 11); + pm_dump_json_location(buffer, &cast->name_loc); + + // Dump the value field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"value\":", 8); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->value); + + // Dump the operator_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"operator_loc\":", 15); + pm_dump_json_location(buffer, &cast->operator_loc); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_MATCH_LAST_LINE_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"MatchLastLineNode\",\"location\":", 39); + + const pm_match_last_line_node_t *cast = (const pm_match_last_line_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the RegularExpressionFlags field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"flags\":", 8); + size_t flags = 0; + pm_buffer_append_byte(buffer, '['); + if (PM_NODE_FLAG_P(cast, PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"IGNORE_CASE\"", 13); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"EXTENDED\"", 10); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"MULTI_LINE\"", 12); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_REGULAR_EXPRESSION_FLAGS_ONCE)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"ONCE\"", 6); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_REGULAR_EXPRESSION_FLAGS_EUC_JP)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"EUC_JP\"", 8); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"ASCII_8BIT\"", 12); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"WINDOWS_31J\"", 13); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_REGULAR_EXPRESSION_FLAGS_UTF_8)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"UTF_8\"", 7); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_REGULAR_EXPRESSION_FLAGS_FORCED_UTF8_ENCODING)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"FORCED_UTF8_ENCODING\"", 22); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_REGULAR_EXPRESSION_FLAGS_FORCED_BINARY_ENCODING)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"FORCED_BINARY_ENCODING\"", 24); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"FORCED_US_ASCII_ENCODING\"", 26); + flags++; + } + pm_buffer_append_byte(buffer, ']'); + + // Dump the opening_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"opening_loc\":", 14); + pm_dump_json_location(buffer, &cast->opening_loc); + + // Dump the content_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"content_loc\":", 14); + pm_dump_json_location(buffer, &cast->content_loc); + + // Dump the closing_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"closing_loc\":", 14); + pm_dump_json_location(buffer, &cast->closing_loc); + + // Dump the unescaped field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"unescaped\":", 12); + const pm_string_t *unescaped = &cast->unescaped; + pm_buffer_append_byte(buffer, '"'); + pm_buffer_append_source(buffer, pm_string_source(unescaped), pm_string_length(unescaped), PM_BUFFER_ESCAPING_JSON); + pm_buffer_append_byte(buffer, '"'); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_MATCH_PREDICATE_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"MatchPredicateNode\",\"location\":", 40); + + const pm_match_predicate_node_t *cast = (const pm_match_predicate_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the value field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"value\":", 8); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->value); + + // Dump the pattern field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"pattern\":", 10); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->pattern); + + // Dump the operator_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"operator_loc\":", 15); + pm_dump_json_location(buffer, &cast->operator_loc); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_MATCH_REQUIRED_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"MatchRequiredNode\",\"location\":", 39); + + const pm_match_required_node_t *cast = (const pm_match_required_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the value field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"value\":", 8); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->value); + + // Dump the pattern field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"pattern\":", 10); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->pattern); + + // Dump the operator_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"operator_loc\":", 15); + pm_dump_json_location(buffer, &cast->operator_loc); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_MATCH_WRITE_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"MatchWriteNode\",\"location\":", 36); + + const pm_match_write_node_t *cast = (const pm_match_write_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the call field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"call\":", 7); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->call); + + // Dump the targets field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"targets\":", 10); + const pm_node_list_t *targets = &cast->targets; + pm_buffer_append_byte(buffer, '['); + + for (size_t index = 0; index < targets->size; index++) { + if (index != 0) pm_buffer_append_byte(buffer, ','); + pm_dump_json(buffer, parser, targets->nodes[index]); + } + pm_buffer_append_byte(buffer, ']'); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_MISSING_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"MissingNode\",\"location\":", 33); + + const pm_missing_node_t *cast = (const pm_missing_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_MODULE_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"ModuleNode\",\"location\":", 32); + + const pm_module_node_t *cast = (const pm_module_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the locals field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"locals\":", 9); + const pm_constant_id_list_t *locals = &cast->locals; + pm_buffer_append_byte(buffer, '['); + + for (size_t index = 0; index < locals->size; index++) { + if (index != 0) pm_buffer_append_byte(buffer, ','); + pm_dump_json_constant(buffer, parser, locals->ids[index]); + } + pm_buffer_append_byte(buffer, ']'); + + // Dump the module_keyword_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"module_keyword_loc\":", 21); + pm_dump_json_location(buffer, &cast->module_keyword_loc); + + // Dump the constant_path field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"constant_path\":", 16); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->constant_path); + + // Dump the body field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"body\":", 7); + if (cast->body != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->body); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the end_keyword_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"end_keyword_loc\":", 18); + pm_dump_json_location(buffer, &cast->end_keyword_loc); + + // Dump the name field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"name\":", 7); + pm_dump_json_constant(buffer, parser, cast->name); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_MULTI_TARGET_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"MultiTargetNode\",\"location\":", 37); + + const pm_multi_target_node_t *cast = (const pm_multi_target_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the lefts field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"lefts\":", 8); + const pm_node_list_t *lefts = &cast->lefts; + pm_buffer_append_byte(buffer, '['); + + for (size_t index = 0; index < lefts->size; index++) { + if (index != 0) pm_buffer_append_byte(buffer, ','); + pm_dump_json(buffer, parser, lefts->nodes[index]); + } + pm_buffer_append_byte(buffer, ']'); + + // Dump the rest field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"rest\":", 7); + if (cast->rest != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->rest); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the rights field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"rights\":", 9); + const pm_node_list_t *rights = &cast->rights; + pm_buffer_append_byte(buffer, '['); + + for (size_t index = 0; index < rights->size; index++) { + if (index != 0) pm_buffer_append_byte(buffer, ','); + pm_dump_json(buffer, parser, rights->nodes[index]); + } + pm_buffer_append_byte(buffer, ']'); + + // Dump the lparen_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"lparen_loc\":", 13); + if (cast->lparen_loc.length != 0) { + pm_dump_json_location(buffer, &cast->lparen_loc); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the rparen_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"rparen_loc\":", 13); + if (cast->rparen_loc.length != 0) { + pm_dump_json_location(buffer, &cast->rparen_loc); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_MULTI_WRITE_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"MultiWriteNode\",\"location\":", 36); + + const pm_multi_write_node_t *cast = (const pm_multi_write_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the lefts field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"lefts\":", 8); + const pm_node_list_t *lefts = &cast->lefts; + pm_buffer_append_byte(buffer, '['); + + for (size_t index = 0; index < lefts->size; index++) { + if (index != 0) pm_buffer_append_byte(buffer, ','); + pm_dump_json(buffer, parser, lefts->nodes[index]); + } + pm_buffer_append_byte(buffer, ']'); + + // Dump the rest field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"rest\":", 7); + if (cast->rest != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->rest); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the rights field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"rights\":", 9); + const pm_node_list_t *rights = &cast->rights; + pm_buffer_append_byte(buffer, '['); + + for (size_t index = 0; index < rights->size; index++) { + if (index != 0) pm_buffer_append_byte(buffer, ','); + pm_dump_json(buffer, parser, rights->nodes[index]); + } + pm_buffer_append_byte(buffer, ']'); + + // Dump the lparen_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"lparen_loc\":", 13); + if (cast->lparen_loc.length != 0) { + pm_dump_json_location(buffer, &cast->lparen_loc); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the rparen_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"rparen_loc\":", 13); + if (cast->rparen_loc.length != 0) { + pm_dump_json_location(buffer, &cast->rparen_loc); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the operator_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"operator_loc\":", 15); + pm_dump_json_location(buffer, &cast->operator_loc); + + // Dump the value field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"value\":", 8); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->value); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_NEXT_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"NextNode\",\"location\":", 30); + + const pm_next_node_t *cast = (const pm_next_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the arguments field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"arguments\":", 12); + if (cast->arguments != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->arguments); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the keyword_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"keyword_loc\":", 14); + pm_dump_json_location(buffer, &cast->keyword_loc); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_NIL_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"NilNode\",\"location\":", 29); + + const pm_nil_node_t *cast = (const pm_nil_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_NO_BLOCK_PARAMETER_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"NoBlockParameterNode\",\"location\":", 42); + + const pm_no_block_parameter_node_t *cast = (const pm_no_block_parameter_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the operator_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"operator_loc\":", 15); + pm_dump_json_location(buffer, &cast->operator_loc); + + // Dump the keyword_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"keyword_loc\":", 14); + pm_dump_json_location(buffer, &cast->keyword_loc); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_NO_KEYWORDS_PARAMETER_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"NoKeywordsParameterNode\",\"location\":", 45); + + const pm_no_keywords_parameter_node_t *cast = (const pm_no_keywords_parameter_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the operator_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"operator_loc\":", 15); + pm_dump_json_location(buffer, &cast->operator_loc); + + // Dump the keyword_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"keyword_loc\":", 14); + pm_dump_json_location(buffer, &cast->keyword_loc); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_NUMBERED_PARAMETERS_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"NumberedParametersNode\",\"location\":", 44); + + const pm_numbered_parameters_node_t *cast = (const pm_numbered_parameters_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the maximum field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"maximum\":", 10); + pm_buffer_append_format(buffer, "%" PRIu8, cast->maximum); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_NUMBERED_REFERENCE_READ_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"NumberedReferenceReadNode\",\"location\":", 47); + + const pm_numbered_reference_read_node_t *cast = (const pm_numbered_reference_read_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the number field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"number\":", 9); + pm_buffer_append_format(buffer, "%" PRIu32, cast->number); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_OPTIONAL_KEYWORD_PARAMETER_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"OptionalKeywordParameterNode\",\"location\":", 50); + + const pm_optional_keyword_parameter_node_t *cast = (const pm_optional_keyword_parameter_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the ParameterFlags field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"flags\":", 8); + size_t flags = 0; + pm_buffer_append_byte(buffer, '['); + if (PM_NODE_FLAG_P(cast, PM_PARAMETER_FLAGS_REPEATED_PARAMETER)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"REPEATED_PARAMETER\"", 20); + flags++; + } + pm_buffer_append_byte(buffer, ']'); + + // Dump the name field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"name\":", 7); + pm_dump_json_constant(buffer, parser, cast->name); + + // Dump the name_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"name_loc\":", 11); + pm_dump_json_location(buffer, &cast->name_loc); + + // Dump the value field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"value\":", 8); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->value); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_OPTIONAL_PARAMETER_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"OptionalParameterNode\",\"location\":", 43); + + const pm_optional_parameter_node_t *cast = (const pm_optional_parameter_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the ParameterFlags field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"flags\":", 8); + size_t flags = 0; + pm_buffer_append_byte(buffer, '['); + if (PM_NODE_FLAG_P(cast, PM_PARAMETER_FLAGS_REPEATED_PARAMETER)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"REPEATED_PARAMETER\"", 20); + flags++; + } + pm_buffer_append_byte(buffer, ']'); + + // Dump the name field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"name\":", 7); + pm_dump_json_constant(buffer, parser, cast->name); + + // Dump the name_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"name_loc\":", 11); + pm_dump_json_location(buffer, &cast->name_loc); + + // Dump the operator_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"operator_loc\":", 15); + pm_dump_json_location(buffer, &cast->operator_loc); + + // Dump the value field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"value\":", 8); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->value); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_OR_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"OrNode\",\"location\":", 28); + + const pm_or_node_t *cast = (const pm_or_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the left field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"left\":", 7); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->left); + + // Dump the right field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"right\":", 8); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->right); + + // Dump the operator_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"operator_loc\":", 15); + pm_dump_json_location(buffer, &cast->operator_loc); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_PARAMETERS_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"ParametersNode\",\"location\":", 36); + + const pm_parameters_node_t *cast = (const pm_parameters_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the requireds field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"requireds\":", 12); + const pm_node_list_t *requireds = &cast->requireds; + pm_buffer_append_byte(buffer, '['); + + for (size_t index = 0; index < requireds->size; index++) { + if (index != 0) pm_buffer_append_byte(buffer, ','); + pm_dump_json(buffer, parser, requireds->nodes[index]); + } + pm_buffer_append_byte(buffer, ']'); + + // Dump the optionals field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"optionals\":", 12); + const pm_node_list_t *optionals = &cast->optionals; + pm_buffer_append_byte(buffer, '['); + + for (size_t index = 0; index < optionals->size; index++) { + if (index != 0) pm_buffer_append_byte(buffer, ','); + pm_dump_json(buffer, parser, optionals->nodes[index]); + } + pm_buffer_append_byte(buffer, ']'); + + // Dump the rest field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"rest\":", 7); + if (cast->rest != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->rest); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the posts field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"posts\":", 8); + const pm_node_list_t *posts = &cast->posts; + pm_buffer_append_byte(buffer, '['); + + for (size_t index = 0; index < posts->size; index++) { + if (index != 0) pm_buffer_append_byte(buffer, ','); + pm_dump_json(buffer, parser, posts->nodes[index]); + } + pm_buffer_append_byte(buffer, ']'); + + // Dump the keywords field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"keywords\":", 11); + const pm_node_list_t *keywords = &cast->keywords; + pm_buffer_append_byte(buffer, '['); + + for (size_t index = 0; index < keywords->size; index++) { + if (index != 0) pm_buffer_append_byte(buffer, ','); + pm_dump_json(buffer, parser, keywords->nodes[index]); + } + pm_buffer_append_byte(buffer, ']'); + + // Dump the keyword_rest field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"keyword_rest\":", 15); + if (cast->keyword_rest != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->keyword_rest); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the block field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"block\":", 8); + if (cast->block != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->block); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_PARENTHESES_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"ParenthesesNode\",\"location\":", 37); + + const pm_parentheses_node_t *cast = (const pm_parentheses_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the ParenthesesNodeFlags field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"flags\":", 8); + size_t flags = 0; + pm_buffer_append_byte(buffer, '['); + if (PM_NODE_FLAG_P(cast, PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"MULTIPLE_STATEMENTS\"", 21); + flags++; + } + pm_buffer_append_byte(buffer, ']'); + + // Dump the body field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"body\":", 7); + if (cast->body != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->body); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the opening_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"opening_loc\":", 14); + pm_dump_json_location(buffer, &cast->opening_loc); + + // Dump the closing_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"closing_loc\":", 14); + pm_dump_json_location(buffer, &cast->closing_loc); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_PINNED_EXPRESSION_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"PinnedExpressionNode\",\"location\":", 42); + + const pm_pinned_expression_node_t *cast = (const pm_pinned_expression_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the expression field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"expression\":", 13); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->expression); + + // Dump the operator_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"operator_loc\":", 15); + pm_dump_json_location(buffer, &cast->operator_loc); + + // Dump the lparen_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"lparen_loc\":", 13); + pm_dump_json_location(buffer, &cast->lparen_loc); + + // Dump the rparen_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"rparen_loc\":", 13); + pm_dump_json_location(buffer, &cast->rparen_loc); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_PINNED_VARIABLE_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"PinnedVariableNode\",\"location\":", 40); + + const pm_pinned_variable_node_t *cast = (const pm_pinned_variable_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the variable field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"variable\":", 11); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->variable); + + // Dump the operator_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"operator_loc\":", 15); + pm_dump_json_location(buffer, &cast->operator_loc); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_POST_EXECUTION_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"PostExecutionNode\",\"location\":", 39); + + const pm_post_execution_node_t *cast = (const pm_post_execution_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the statements field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"statements\":", 13); + if (cast->statements != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->statements); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the keyword_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"keyword_loc\":", 14); + pm_dump_json_location(buffer, &cast->keyword_loc); + + // Dump the opening_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"opening_loc\":", 14); + pm_dump_json_location(buffer, &cast->opening_loc); + + // Dump the closing_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"closing_loc\":", 14); + pm_dump_json_location(buffer, &cast->closing_loc); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_PRE_EXECUTION_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"PreExecutionNode\",\"location\":", 38); + + const pm_pre_execution_node_t *cast = (const pm_pre_execution_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the statements field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"statements\":", 13); + if (cast->statements != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->statements); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the keyword_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"keyword_loc\":", 14); + pm_dump_json_location(buffer, &cast->keyword_loc); + + // Dump the opening_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"opening_loc\":", 14); + pm_dump_json_location(buffer, &cast->opening_loc); + + // Dump the closing_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"closing_loc\":", 14); + pm_dump_json_location(buffer, &cast->closing_loc); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_PROGRAM_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"ProgramNode\",\"location\":", 33); + + const pm_program_node_t *cast = (const pm_program_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the locals field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"locals\":", 9); + const pm_constant_id_list_t *locals = &cast->locals; + pm_buffer_append_byte(buffer, '['); + + for (size_t index = 0; index < locals->size; index++) { + if (index != 0) pm_buffer_append_byte(buffer, ','); + pm_dump_json_constant(buffer, parser, locals->ids[index]); + } + pm_buffer_append_byte(buffer, ']'); + + // Dump the statements field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"statements\":", 13); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->statements); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_RANGE_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"RangeNode\",\"location\":", 31); + + const pm_range_node_t *cast = (const pm_range_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the RangeFlags field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"flags\":", 8); + size_t flags = 0; + pm_buffer_append_byte(buffer, '['); + if (PM_NODE_FLAG_P(cast, PM_RANGE_FLAGS_EXCLUDE_END)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"EXCLUDE_END\"", 13); + flags++; + } + pm_buffer_append_byte(buffer, ']'); + + // Dump the left field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"left\":", 7); + if (cast->left != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->left); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the right field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"right\":", 8); + if (cast->right != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->right); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the operator_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"operator_loc\":", 15); + pm_dump_json_location(buffer, &cast->operator_loc); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_RATIONAL_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"RationalNode\",\"location\":", 34); + + const pm_rational_node_t *cast = (const pm_rational_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the IntegerBaseFlags field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"flags\":", 8); + size_t flags = 0; + pm_buffer_append_byte(buffer, '['); + if (PM_NODE_FLAG_P(cast, PM_INTEGER_BASE_FLAGS_BINARY)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"BINARY\"", 8); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_INTEGER_BASE_FLAGS_DECIMAL)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"DECIMAL\"", 9); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_INTEGER_BASE_FLAGS_OCTAL)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"OCTAL\"", 7); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_INTEGER_BASE_FLAGS_HEXADECIMAL)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"HEXADECIMAL\"", 13); + flags++; + } + pm_buffer_append_byte(buffer, ']'); + + // Dump the numerator field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"numerator\":", 12); + pm_integer_string(buffer, &cast->numerator); + + // Dump the denominator field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"denominator\":", 14); + pm_integer_string(buffer, &cast->denominator); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_REDO_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"RedoNode\",\"location\":", 30); + + const pm_redo_node_t *cast = (const pm_redo_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_REGULAR_EXPRESSION_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"RegularExpressionNode\",\"location\":", 43); + + const pm_regular_expression_node_t *cast = (const pm_regular_expression_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the RegularExpressionFlags field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"flags\":", 8); + size_t flags = 0; + pm_buffer_append_byte(buffer, '['); + if (PM_NODE_FLAG_P(cast, PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"IGNORE_CASE\"", 13); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"EXTENDED\"", 10); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"MULTI_LINE\"", 12); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_REGULAR_EXPRESSION_FLAGS_ONCE)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"ONCE\"", 6); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_REGULAR_EXPRESSION_FLAGS_EUC_JP)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"EUC_JP\"", 8); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"ASCII_8BIT\"", 12); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"WINDOWS_31J\"", 13); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_REGULAR_EXPRESSION_FLAGS_UTF_8)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"UTF_8\"", 7); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_REGULAR_EXPRESSION_FLAGS_FORCED_UTF8_ENCODING)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"FORCED_UTF8_ENCODING\"", 22); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_REGULAR_EXPRESSION_FLAGS_FORCED_BINARY_ENCODING)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"FORCED_BINARY_ENCODING\"", 24); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"FORCED_US_ASCII_ENCODING\"", 26); + flags++; + } + pm_buffer_append_byte(buffer, ']'); + + // Dump the opening_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"opening_loc\":", 14); + pm_dump_json_location(buffer, &cast->opening_loc); + + // Dump the content_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"content_loc\":", 14); + pm_dump_json_location(buffer, &cast->content_loc); + + // Dump the closing_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"closing_loc\":", 14); + pm_dump_json_location(buffer, &cast->closing_loc); + + // Dump the unescaped field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"unescaped\":", 12); + const pm_string_t *unescaped = &cast->unescaped; + pm_buffer_append_byte(buffer, '"'); + pm_buffer_append_source(buffer, pm_string_source(unescaped), pm_string_length(unescaped), PM_BUFFER_ESCAPING_JSON); + pm_buffer_append_byte(buffer, '"'); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_REQUIRED_KEYWORD_PARAMETER_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"RequiredKeywordParameterNode\",\"location\":", 50); + + const pm_required_keyword_parameter_node_t *cast = (const pm_required_keyword_parameter_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the ParameterFlags field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"flags\":", 8); + size_t flags = 0; + pm_buffer_append_byte(buffer, '['); + if (PM_NODE_FLAG_P(cast, PM_PARAMETER_FLAGS_REPEATED_PARAMETER)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"REPEATED_PARAMETER\"", 20); + flags++; + } + pm_buffer_append_byte(buffer, ']'); + + // Dump the name field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"name\":", 7); + pm_dump_json_constant(buffer, parser, cast->name); + + // Dump the name_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"name_loc\":", 11); + pm_dump_json_location(buffer, &cast->name_loc); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_REQUIRED_PARAMETER_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"RequiredParameterNode\",\"location\":", 43); + + const pm_required_parameter_node_t *cast = (const pm_required_parameter_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the ParameterFlags field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"flags\":", 8); + size_t flags = 0; + pm_buffer_append_byte(buffer, '['); + if (PM_NODE_FLAG_P(cast, PM_PARAMETER_FLAGS_REPEATED_PARAMETER)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"REPEATED_PARAMETER\"", 20); + flags++; + } + pm_buffer_append_byte(buffer, ']'); + + // Dump the name field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"name\":", 7); + pm_dump_json_constant(buffer, parser, cast->name); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_RESCUE_MODIFIER_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"RescueModifierNode\",\"location\":", 40); + + const pm_rescue_modifier_node_t *cast = (const pm_rescue_modifier_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the expression field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"expression\":", 13); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->expression); + + // Dump the keyword_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"keyword_loc\":", 14); + pm_dump_json_location(buffer, &cast->keyword_loc); + + // Dump the rescue_expression field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"rescue_expression\":", 20); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->rescue_expression); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_RESCUE_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"RescueNode\",\"location\":", 32); + + const pm_rescue_node_t *cast = (const pm_rescue_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the keyword_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"keyword_loc\":", 14); + pm_dump_json_location(buffer, &cast->keyword_loc); + + // Dump the exceptions field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"exceptions\":", 13); + const pm_node_list_t *exceptions = &cast->exceptions; + pm_buffer_append_byte(buffer, '['); + + for (size_t index = 0; index < exceptions->size; index++) { + if (index != 0) pm_buffer_append_byte(buffer, ','); + pm_dump_json(buffer, parser, exceptions->nodes[index]); + } + pm_buffer_append_byte(buffer, ']'); + + // Dump the operator_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"operator_loc\":", 15); + if (cast->operator_loc.length != 0) { + pm_dump_json_location(buffer, &cast->operator_loc); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the reference field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"reference\":", 12); + if (cast->reference != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->reference); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the then_keyword_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"then_keyword_loc\":", 19); + if (cast->then_keyword_loc.length != 0) { + pm_dump_json_location(buffer, &cast->then_keyword_loc); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the statements field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"statements\":", 13); + if (cast->statements != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->statements); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the subsequent field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"subsequent\":", 13); + if (cast->subsequent != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->subsequent); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_REST_PARAMETER_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"RestParameterNode\",\"location\":", 39); + + const pm_rest_parameter_node_t *cast = (const pm_rest_parameter_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the ParameterFlags field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"flags\":", 8); + size_t flags = 0; + pm_buffer_append_byte(buffer, '['); + if (PM_NODE_FLAG_P(cast, PM_PARAMETER_FLAGS_REPEATED_PARAMETER)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"REPEATED_PARAMETER\"", 20); + flags++; + } + pm_buffer_append_byte(buffer, ']'); + + // Dump the name field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"name\":", 7); + if (cast->name != PM_CONSTANT_ID_UNSET) { + pm_dump_json_constant(buffer, parser, cast->name); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the name_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"name_loc\":", 11); + if (cast->name_loc.length != 0) { + pm_dump_json_location(buffer, &cast->name_loc); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the operator_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"operator_loc\":", 15); + pm_dump_json_location(buffer, &cast->operator_loc); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_RETRY_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"RetryNode\",\"location\":", 31); + + const pm_retry_node_t *cast = (const pm_retry_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_RETURN_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"ReturnNode\",\"location\":", 32); + + const pm_return_node_t *cast = (const pm_return_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the keyword_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"keyword_loc\":", 14); + pm_dump_json_location(buffer, &cast->keyword_loc); + + // Dump the arguments field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"arguments\":", 12); + if (cast->arguments != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->arguments); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_SELF_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"SelfNode\",\"location\":", 30); + + const pm_self_node_t *cast = (const pm_self_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_SHAREABLE_CONSTANT_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"ShareableConstantNode\",\"location\":", 43); + + const pm_shareable_constant_node_t *cast = (const pm_shareable_constant_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the ShareableConstantNodeFlags field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"flags\":", 8); + size_t flags = 0; + pm_buffer_append_byte(buffer, '['); + if (PM_NODE_FLAG_P(cast, PM_SHAREABLE_CONSTANT_NODE_FLAGS_LITERAL)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"LITERAL\"", 9); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_SHAREABLE_CONSTANT_NODE_FLAGS_EXPERIMENTAL_EVERYTHING)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"EXPERIMENTAL_EVERYTHING\"", 25); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_SHAREABLE_CONSTANT_NODE_FLAGS_EXPERIMENTAL_COPY)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"EXPERIMENTAL_COPY\"", 19); + flags++; + } + pm_buffer_append_byte(buffer, ']'); + + // Dump the write field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"write\":", 8); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->write); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_SINGLETON_CLASS_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"SingletonClassNode\",\"location\":", 40); + + const pm_singleton_class_node_t *cast = (const pm_singleton_class_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the locals field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"locals\":", 9); + const pm_constant_id_list_t *locals = &cast->locals; + pm_buffer_append_byte(buffer, '['); + + for (size_t index = 0; index < locals->size; index++) { + if (index != 0) pm_buffer_append_byte(buffer, ','); + pm_dump_json_constant(buffer, parser, locals->ids[index]); + } + pm_buffer_append_byte(buffer, ']'); + + // Dump the class_keyword_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"class_keyword_loc\":", 20); + pm_dump_json_location(buffer, &cast->class_keyword_loc); + + // Dump the operator_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"operator_loc\":", 15); + pm_dump_json_location(buffer, &cast->operator_loc); + + // Dump the expression field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"expression\":", 13); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->expression); + + // Dump the body field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"body\":", 7); + if (cast->body != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->body); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the end_keyword_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"end_keyword_loc\":", 18); + pm_dump_json_location(buffer, &cast->end_keyword_loc); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_SOURCE_ENCODING_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"SourceEncodingNode\",\"location\":", 40); + + const pm_source_encoding_node_t *cast = (const pm_source_encoding_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_SOURCE_FILE_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"SourceFileNode\",\"location\":", 36); + + const pm_source_file_node_t *cast = (const pm_source_file_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the StringFlags field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"flags\":", 8); + size_t flags = 0; + pm_buffer_append_byte(buffer, '['); + if (PM_NODE_FLAG_P(cast, PM_STRING_FLAGS_FORCED_UTF8_ENCODING)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"FORCED_UTF8_ENCODING\"", 22); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_STRING_FLAGS_FORCED_BINARY_ENCODING)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"FORCED_BINARY_ENCODING\"", 24); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_STRING_FLAGS_FROZEN)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"FROZEN\"", 8); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_STRING_FLAGS_MUTABLE)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"MUTABLE\"", 9); + flags++; + } + pm_buffer_append_byte(buffer, ']'); + + // Dump the filepath field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"filepath\":", 11); + const pm_string_t *filepath = &cast->filepath; + pm_buffer_append_byte(buffer, '"'); + pm_buffer_append_source(buffer, pm_string_source(filepath), pm_string_length(filepath), PM_BUFFER_ESCAPING_JSON); + pm_buffer_append_byte(buffer, '"'); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_SOURCE_LINE_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"SourceLineNode\",\"location\":", 36); + + const pm_source_line_node_t *cast = (const pm_source_line_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_SPLAT_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"SplatNode\",\"location\":", 31); + + const pm_splat_node_t *cast = (const pm_splat_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the operator_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"operator_loc\":", 15); + pm_dump_json_location(buffer, &cast->operator_loc); + + // Dump the expression field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"expression\":", 13); + if (cast->expression != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->expression); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_STATEMENTS_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"StatementsNode\",\"location\":", 36); + + const pm_statements_node_t *cast = (const pm_statements_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the body field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"body\":", 7); + const pm_node_list_t *body = &cast->body; + pm_buffer_append_byte(buffer, '['); + + for (size_t index = 0; index < body->size; index++) { + if (index != 0) pm_buffer_append_byte(buffer, ','); + pm_dump_json(buffer, parser, body->nodes[index]); + } + pm_buffer_append_byte(buffer, ']'); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_STRING_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"StringNode\",\"location\":", 32); + + const pm_string_node_t *cast = (const pm_string_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the StringFlags field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"flags\":", 8); + size_t flags = 0; + pm_buffer_append_byte(buffer, '['); + if (PM_NODE_FLAG_P(cast, PM_STRING_FLAGS_FORCED_UTF8_ENCODING)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"FORCED_UTF8_ENCODING\"", 22); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_STRING_FLAGS_FORCED_BINARY_ENCODING)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"FORCED_BINARY_ENCODING\"", 24); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_STRING_FLAGS_FROZEN)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"FROZEN\"", 8); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_STRING_FLAGS_MUTABLE)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"MUTABLE\"", 9); + flags++; + } + pm_buffer_append_byte(buffer, ']'); + + // Dump the opening_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"opening_loc\":", 14); + if (cast->opening_loc.length != 0) { + pm_dump_json_location(buffer, &cast->opening_loc); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the content_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"content_loc\":", 14); + pm_dump_json_location(buffer, &cast->content_loc); + + // Dump the closing_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"closing_loc\":", 14); + if (cast->closing_loc.length != 0) { + pm_dump_json_location(buffer, &cast->closing_loc); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the unescaped field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"unescaped\":", 12); + const pm_string_t *unescaped = &cast->unescaped; + pm_buffer_append_byte(buffer, '"'); + pm_buffer_append_source(buffer, pm_string_source(unescaped), pm_string_length(unescaped), PM_BUFFER_ESCAPING_JSON); + pm_buffer_append_byte(buffer, '"'); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_SUPER_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"SuperNode\",\"location\":", 31); + + const pm_super_node_t *cast = (const pm_super_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the keyword_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"keyword_loc\":", 14); + pm_dump_json_location(buffer, &cast->keyword_loc); + + // Dump the lparen_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"lparen_loc\":", 13); + if (cast->lparen_loc.length != 0) { + pm_dump_json_location(buffer, &cast->lparen_loc); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the arguments field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"arguments\":", 12); + if (cast->arguments != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->arguments); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the rparen_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"rparen_loc\":", 13); + if (cast->rparen_loc.length != 0) { + pm_dump_json_location(buffer, &cast->rparen_loc); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the block field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"block\":", 8); + if (cast->block != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->block); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_SYMBOL_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"SymbolNode\",\"location\":", 32); + + const pm_symbol_node_t *cast = (const pm_symbol_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the SymbolFlags field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"flags\":", 8); + size_t flags = 0; + pm_buffer_append_byte(buffer, '['); + if (PM_NODE_FLAG_P(cast, PM_SYMBOL_FLAGS_FORCED_UTF8_ENCODING)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"FORCED_UTF8_ENCODING\"", 22); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_SYMBOL_FLAGS_FORCED_BINARY_ENCODING)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"FORCED_BINARY_ENCODING\"", 24); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"FORCED_US_ASCII_ENCODING\"", 26); + flags++; + } + pm_buffer_append_byte(buffer, ']'); + + // Dump the opening_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"opening_loc\":", 14); + if (cast->opening_loc.length != 0) { + pm_dump_json_location(buffer, &cast->opening_loc); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the value_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"value_loc\":", 12); + if (cast->value_loc.length != 0) { + pm_dump_json_location(buffer, &cast->value_loc); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the closing_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"closing_loc\":", 14); + if (cast->closing_loc.length != 0) { + pm_dump_json_location(buffer, &cast->closing_loc); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the unescaped field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"unescaped\":", 12); + const pm_string_t *unescaped = &cast->unescaped; + pm_buffer_append_byte(buffer, '"'); + pm_buffer_append_source(buffer, pm_string_source(unescaped), pm_string_length(unescaped), PM_BUFFER_ESCAPING_JSON); + pm_buffer_append_byte(buffer, '"'); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_TRUE_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"TrueNode\",\"location\":", 30); + + const pm_true_node_t *cast = (const pm_true_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_UNDEF_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"UndefNode\",\"location\":", 31); + + const pm_undef_node_t *cast = (const pm_undef_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the names field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"names\":", 8); + const pm_node_list_t *names = &cast->names; + pm_buffer_append_byte(buffer, '['); + + for (size_t index = 0; index < names->size; index++) { + if (index != 0) pm_buffer_append_byte(buffer, ','); + pm_dump_json(buffer, parser, names->nodes[index]); + } + pm_buffer_append_byte(buffer, ']'); + + // Dump the keyword_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"keyword_loc\":", 14); + pm_dump_json_location(buffer, &cast->keyword_loc); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_UNLESS_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"UnlessNode\",\"location\":", 32); + + const pm_unless_node_t *cast = (const pm_unless_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the keyword_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"keyword_loc\":", 14); + pm_dump_json_location(buffer, &cast->keyword_loc); + + // Dump the predicate field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"predicate\":", 12); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->predicate); + + // Dump the then_keyword_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"then_keyword_loc\":", 19); + if (cast->then_keyword_loc.length != 0) { + pm_dump_json_location(buffer, &cast->then_keyword_loc); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the statements field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"statements\":", 13); + if (cast->statements != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->statements); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the else_clause field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"else_clause\":", 14); + if (cast->else_clause != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->else_clause); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the end_keyword_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"end_keyword_loc\":", 18); + if (cast->end_keyword_loc.length != 0) { + pm_dump_json_location(buffer, &cast->end_keyword_loc); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_UNTIL_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"UntilNode\",\"location\":", 31); + + const pm_until_node_t *cast = (const pm_until_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the LoopFlags field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"flags\":", 8); + size_t flags = 0; + pm_buffer_append_byte(buffer, '['); + if (PM_NODE_FLAG_P(cast, PM_LOOP_FLAGS_BEGIN_MODIFIER)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"BEGIN_MODIFIER\"", 16); + flags++; + } + pm_buffer_append_byte(buffer, ']'); + + // Dump the keyword_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"keyword_loc\":", 14); + pm_dump_json_location(buffer, &cast->keyword_loc); + + // Dump the do_keyword_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"do_keyword_loc\":", 17); + if (cast->do_keyword_loc.length != 0) { + pm_dump_json_location(buffer, &cast->do_keyword_loc); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the closing_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"closing_loc\":", 14); + if (cast->closing_loc.length != 0) { + pm_dump_json_location(buffer, &cast->closing_loc); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the predicate field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"predicate\":", 12); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->predicate); + + // Dump the statements field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"statements\":", 13); + if (cast->statements != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->statements); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_WHEN_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"WhenNode\",\"location\":", 30); + + const pm_when_node_t *cast = (const pm_when_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the keyword_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"keyword_loc\":", 14); + pm_dump_json_location(buffer, &cast->keyword_loc); + + // Dump the conditions field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"conditions\":", 13); + const pm_node_list_t *conditions = &cast->conditions; + pm_buffer_append_byte(buffer, '['); + + for (size_t index = 0; index < conditions->size; index++) { + if (index != 0) pm_buffer_append_byte(buffer, ','); + pm_dump_json(buffer, parser, conditions->nodes[index]); + } + pm_buffer_append_byte(buffer, ']'); + + // Dump the then_keyword_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"then_keyword_loc\":", 19); + if (cast->then_keyword_loc.length != 0) { + pm_dump_json_location(buffer, &cast->then_keyword_loc); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the statements field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"statements\":", 13); + if (cast->statements != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->statements); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_WHILE_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"WhileNode\",\"location\":", 31); + + const pm_while_node_t *cast = (const pm_while_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the LoopFlags field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"flags\":", 8); + size_t flags = 0; + pm_buffer_append_byte(buffer, '['); + if (PM_NODE_FLAG_P(cast, PM_LOOP_FLAGS_BEGIN_MODIFIER)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"BEGIN_MODIFIER\"", 16); + flags++; + } + pm_buffer_append_byte(buffer, ']'); + + // Dump the keyword_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"keyword_loc\":", 14); + pm_dump_json_location(buffer, &cast->keyword_loc); + + // Dump the do_keyword_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"do_keyword_loc\":", 17); + if (cast->do_keyword_loc.length != 0) { + pm_dump_json_location(buffer, &cast->do_keyword_loc); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the closing_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"closing_loc\":", 14); + if (cast->closing_loc.length != 0) { + pm_dump_json_location(buffer, &cast->closing_loc); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the predicate field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"predicate\":", 12); + pm_dump_json(buffer, parser, (const pm_node_t *) cast->predicate); + + // Dump the statements field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"statements\":", 13); + if (cast->statements != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->statements); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_X_STRING_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"XStringNode\",\"location\":", 33); + + const pm_x_string_node_t *cast = (const pm_x_string_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the EncodingFlags field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"flags\":", 8); + size_t flags = 0; + pm_buffer_append_byte(buffer, '['); + if (PM_NODE_FLAG_P(cast, PM_ENCODING_FLAGS_FORCED_UTF8_ENCODING)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"FORCED_UTF8_ENCODING\"", 22); + flags++; + } + if (PM_NODE_FLAG_P(cast, PM_ENCODING_FLAGS_FORCED_BINARY_ENCODING)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"FORCED_BINARY_ENCODING\"", 24); + flags++; + } + pm_buffer_append_byte(buffer, ']'); + + // Dump the opening_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"opening_loc\":", 14); + pm_dump_json_location(buffer, &cast->opening_loc); + + // Dump the content_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"content_loc\":", 14); + pm_dump_json_location(buffer, &cast->content_loc); + + // Dump the closing_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"closing_loc\":", 14); + pm_dump_json_location(buffer, &cast->closing_loc); + + // Dump the unescaped field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"unescaped\":", 12); + const pm_string_t *unescaped = &cast->unescaped; + pm_buffer_append_byte(buffer, '"'); + pm_buffer_append_source(buffer, pm_string_source(unescaped), pm_string_length(unescaped), PM_BUFFER_ESCAPING_JSON); + pm_buffer_append_byte(buffer, '"'); + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_YIELD_NODE: { + pm_buffer_append_string(buffer, "{\"type\":\"YieldNode\",\"location\":", 31); + + const pm_yield_node_t *cast = (const pm_yield_node_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + + // Dump the keyword_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"keyword_loc\":", 14); + pm_dump_json_location(buffer, &cast->keyword_loc); + + // Dump the lparen_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"lparen_loc\":", 13); + if (cast->lparen_loc.length != 0) { + pm_dump_json_location(buffer, &cast->lparen_loc); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the arguments field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"arguments\":", 12); + if (cast->arguments != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast->arguments); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + // Dump the rparen_loc field + pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"rparen_loc\":", 13); + if (cast->rparen_loc.length != 0) { + pm_dump_json_location(buffer, &cast->rparen_loc); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + + pm_buffer_append_byte(buffer, '}'); + break; + } + case PM_SCOPE_NODE: + break; + } +} + +#endif diff --git a/templates/src/json.c.erb b/templates/src/json.c.erb new file mode 100644 index 0000000000..66397b8fcd --- /dev/null +++ b/templates/src/json.c.erb @@ -0,0 +1,130 @@ +#include "prism/json.h" + +/* We optionally support dumping to JSON. For systems that don not want or need + * this functionality, it can be turned off with the PRISM_EXCLUDE_JSON define. + */ +#ifndef PRISM_EXCLUDE_JSON + +#include "prism/internal/buffer.h" +#include "prism/internal/constant_pool.h" +#include "prism/internal/integer.h" +#include "prism/internal/parser.h" + +#include + +static void +pm_dump_json_constant(pm_buffer_t *buffer, const pm_parser_t *parser, pm_constant_id_t constant_id) { + const pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, constant_id); + pm_buffer_append_byte(buffer, '"'); + pm_buffer_append_source(buffer, constant->start, constant->length, PM_BUFFER_ESCAPING_JSON); + pm_buffer_append_byte(buffer, '"'); +} + +static void +pm_dump_json_location(pm_buffer_t *buffer, const pm_location_t *location) { + pm_buffer_append_format(buffer, "{\"start\":%" PRIu32 ",\"length\":%" PRIu32 "}", location->start, location->length); +} + +/** + * Dump JSON to the given buffer. + */ +void +pm_dump_json(pm_buffer_t *buffer, const pm_parser_t *parser, const pm_node_t *node) { + switch (PM_NODE_TYPE(node)) { + <%- nodes.each do |node| -%> + case <%= node.type %>: { + pm_buffer_append_string(buffer, "{\"type\":\"<%= node.name %>\",\"location\":", <%= node.name.bytesize + 22 %>); + + const pm_<%= node.human %>_t *cast = (const pm_<%= node.human %>_t *) node; + pm_dump_json_location(buffer, &cast->base.location); + <%- [*node.flags, *node.fields].each_with_index do |field, index| -%> + + // Dump the <%= field.name %> field + pm_buffer_append_byte(buffer, ','); + <%- if field.is_a?(Prism::Template::Flags) -%> + pm_buffer_append_string(buffer, "\"flags\":", 8); + <%- else -%> + pm_buffer_append_string(buffer, "\"<%= field.name %>\":", <%= field.name.bytesize + 3 %>); + <%- end -%> + <%- case field -%> + <%- when Prism::Template::NodeField -%> + pm_dump_json(buffer, parser, (const pm_node_t *) cast-><%= field.name %>); + <%- when Prism::Template::OptionalNodeField -%> + if (cast-><%= field.name %> != NULL) { + pm_dump_json(buffer, parser, (const pm_node_t *) cast-><%= field.name %>); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + <%- when Prism::Template::NodeListField -%> + const pm_node_list_t *<%= field.name %> = &cast-><%= field.name %>; + pm_buffer_append_byte(buffer, '['); + + for (size_t index = 0; index < <%= field.name %>->size; index++) { + if (index != 0) pm_buffer_append_byte(buffer, ','); + pm_dump_json(buffer, parser, <%= field.name %>->nodes[index]); + } + pm_buffer_append_byte(buffer, ']'); + <%- when Prism::Template::StringField -%> + const pm_string_t *<%= field.name %> = &cast-><%= field.name %>; + pm_buffer_append_byte(buffer, '"'); + pm_buffer_append_source(buffer, pm_string_source(<%= field.name %>), pm_string_length(<%= field.name %>), PM_BUFFER_ESCAPING_JSON); + pm_buffer_append_byte(buffer, '"'); + <%- when Prism::Template::ConstantField -%> + pm_dump_json_constant(buffer, parser, cast-><%= field.name %>); + <%- when Prism::Template::OptionalConstantField -%> + if (cast-><%= field.name %> != PM_CONSTANT_ID_UNSET) { + pm_dump_json_constant(buffer, parser, cast-><%= field.name %>); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + <%- when Prism::Template::ConstantListField -%> + const pm_constant_id_list_t *<%= field.name %> = &cast-><%= field.name %>; + pm_buffer_append_byte(buffer, '['); + + for (size_t index = 0; index < <%= field.name %>->size; index++) { + if (index != 0) pm_buffer_append_byte(buffer, ','); + pm_dump_json_constant(buffer, parser, <%= field.name %>->ids[index]); + } + pm_buffer_append_byte(buffer, ']'); + <%- when Prism::Template::LocationField -%> + pm_dump_json_location(buffer, &cast-><%= field.name %>); + <%- when Prism::Template::OptionalLocationField -%> + if (cast-><%= field.name %>.length != 0) { + pm_dump_json_location(buffer, &cast-><%= field.name %>); + } else { + pm_buffer_append_string(buffer, "null", 4); + } + <%- when Prism::Template::UInt8Field -%> + pm_buffer_append_format(buffer, "%" PRIu8, cast-><%= field.name %>); + <%- when Prism::Template::UInt32Field -%> + pm_buffer_append_format(buffer, "%" PRIu32, cast-><%= field.name %>); + <%- when Prism::Template::Flags -%> + size_t flags = 0; + pm_buffer_append_byte(buffer, '['); + <%- node.flags.values.each_with_index do |value, index| -%> + if (PM_NODE_FLAG_P(cast, PM_<%= node.flags.human.upcase %>_<%= value.name %>)) { + if (flags != 0) pm_buffer_append_byte(buffer, ','); + pm_buffer_append_string(buffer, "\"<%= value.name %>\"", <%= value.name.bytesize + 2 %>); + flags++; + } + <%- end -%> + pm_buffer_append_byte(buffer, ']'); + <%- when Prism::Template::IntegerField -%> + pm_integer_string(buffer, &cast-><%= field.name %>); + <%- when Prism::Template::DoubleField -%> + pm_buffer_append_format(buffer, "%f", cast-><%= field.name %>); + <%- else -%> + <%- raise %> + <%- end -%> + <%- end -%> + + pm_buffer_append_byte(buffer, '}'); + break; + } + <%- end -%> + case PM_SCOPE_NODE: + break; + } +} + +#endif diff --git a/templates/src/node.c.erb b/templates/src/node.c.erb index 695175d7c8..7b95200632 100644 --- a/templates/src/node.c.erb +++ b/templates/src/node.c.erb @@ -2,14 +2,8 @@ #include "prism/internal/node.h" #include "prism/internal/arena.h" -#include "prism/internal/buffer.h" -#include "prism/internal/constant_pool.h" -#include "prism/internal/integer.h" -#include "prism/internal/parser.h" -#include #include -#include /** * Attempts to grow the node list to the next size. If there is already @@ -150,127 +144,6 @@ pm_visit_child_nodes(const pm_node_t *node, bool (*visitor)(const pm_node_t *nod break; } } - -// We optionally support dumping to JSON. For systems that don't want or need -// this functionality, it can be turned off with the PRISM_EXCLUDE_JSON define. -#ifndef PRISM_EXCLUDE_JSON - -static void -pm_dump_json_constant(pm_buffer_t *buffer, const pm_parser_t *parser, pm_constant_id_t constant_id) { - const pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, constant_id); - pm_buffer_append_byte(buffer, '"'); - pm_buffer_append_source(buffer, constant->start, constant->length, PM_BUFFER_ESCAPING_JSON); - pm_buffer_append_byte(buffer, '"'); -} - -static void -pm_dump_json_location(pm_buffer_t *buffer, const pm_location_t *location) { - pm_buffer_append_format(buffer, "{\"start\":%" PRIu32 ",\"length\":%" PRIu32 "}", location->start, location->length); -} - -/** - * Dump JSON to the given buffer. - */ -void -pm_dump_json(pm_buffer_t *buffer, const pm_parser_t *parser, const pm_node_t *node) { - switch (PM_NODE_TYPE(node)) { - <%- nodes.each do |node| -%> - case <%= node.type %>: { - pm_buffer_append_string(buffer, "{\"type\":\"<%= node.name %>\",\"location\":", <%= node.name.bytesize + 22 %>); - - const pm_<%= node.human %>_t *cast = (const pm_<%= node.human %>_t *) node; - pm_dump_json_location(buffer, &cast->base.location); - <%- [*node.flags, *node.fields].each_with_index do |field, index| -%> - - // Dump the <%= field.name %> field - pm_buffer_append_byte(buffer, ','); - <%- if field.is_a?(Prism::Template::Flags) -%> - pm_buffer_append_string(buffer, "\"flags\":", 8); - <%- else -%> - pm_buffer_append_string(buffer, "\"<%= field.name %>\":", <%= field.name.bytesize + 3 %>); - <%- end -%> - <%- case field -%> - <%- when Prism::Template::NodeField -%> - pm_dump_json(buffer, parser, (const pm_node_t *) cast-><%= field.name %>); - <%- when Prism::Template::OptionalNodeField -%> - if (cast-><%= field.name %> != NULL) { - pm_dump_json(buffer, parser, (const pm_node_t *) cast-><%= field.name %>); - } else { - pm_buffer_append_string(buffer, "null", 4); - } - <%- when Prism::Template::NodeListField -%> - const pm_node_list_t *<%= field.name %> = &cast-><%= field.name %>; - pm_buffer_append_byte(buffer, '['); - - for (size_t index = 0; index < <%= field.name %>->size; index++) { - if (index != 0) pm_buffer_append_byte(buffer, ','); - pm_dump_json(buffer, parser, <%= field.name %>->nodes[index]); - } - pm_buffer_append_byte(buffer, ']'); - <%- when Prism::Template::StringField -%> - const pm_string_t *<%= field.name %> = &cast-><%= field.name %>; - pm_buffer_append_byte(buffer, '"'); - pm_buffer_append_source(buffer, pm_string_source(<%= field.name %>), pm_string_length(<%= field.name %>), PM_BUFFER_ESCAPING_JSON); - pm_buffer_append_byte(buffer, '"'); - <%- when Prism::Template::ConstantField -%> - pm_dump_json_constant(buffer, parser, cast-><%= field.name %>); - <%- when Prism::Template::OptionalConstantField -%> - if (cast-><%= field.name %> != PM_CONSTANT_ID_UNSET) { - pm_dump_json_constant(buffer, parser, cast-><%= field.name %>); - } else { - pm_buffer_append_string(buffer, "null", 4); - } - <%- when Prism::Template::ConstantListField -%> - const pm_constant_id_list_t *<%= field.name %> = &cast-><%= field.name %>; - pm_buffer_append_byte(buffer, '['); - - for (size_t index = 0; index < <%= field.name %>->size; index++) { - if (index != 0) pm_buffer_append_byte(buffer, ','); - pm_dump_json_constant(buffer, parser, <%= field.name %>->ids[index]); - } - pm_buffer_append_byte(buffer, ']'); - <%- when Prism::Template::LocationField -%> - pm_dump_json_location(buffer, &cast-><%= field.name %>); - <%- when Prism::Template::OptionalLocationField -%> - if (cast-><%= field.name %>.length != 0) { - pm_dump_json_location(buffer, &cast-><%= field.name %>); - } else { - pm_buffer_append_string(buffer, "null", 4); - } - <%- when Prism::Template::UInt8Field -%> - pm_buffer_append_format(buffer, "%" PRIu8, cast-><%= field.name %>); - <%- when Prism::Template::UInt32Field -%> - pm_buffer_append_format(buffer, "%" PRIu32, cast-><%= field.name %>); - <%- when Prism::Template::Flags -%> - size_t flags = 0; - pm_buffer_append_byte(buffer, '['); - <%- node.flags.values.each_with_index do |value, index| -%> - if (PM_NODE_FLAG_P(cast, PM_<%= node.flags.human.upcase %>_<%= value.name %>)) { - if (flags != 0) pm_buffer_append_byte(buffer, ','); - pm_buffer_append_string(buffer, "\"<%= value.name %>\"", <%= value.name.bytesize + 2 %>); - flags++; - } - <%- end -%> - pm_buffer_append_byte(buffer, ']'); - <%- when Prism::Template::IntegerField -%> - pm_integer_string(buffer, &cast-><%= field.name %>); - <%- when Prism::Template::DoubleField -%> - pm_buffer_append_format(buffer, "%f", cast-><%= field.name %>); - <%- else -%> - <%- raise %> - <%- end -%> - <%- end -%> - - pm_buffer_append_byte(buffer, '}'); - break; - } - <%- end -%> - case PM_SCOPE_NODE: - break; - } -} - -#endif <%- nodes.each do |node| -%> <%- params = node.fields.map(&:c_param) -%> diff --git a/templates/src/prettyprint.c.erb b/templates/src/prettyprint.c.erb index ffe9b1f307..cede4b9d02 100644 --- a/templates/src/prettyprint.c.erb +++ b/templates/src/prettyprint.c.erb @@ -1,6 +1,11 @@ <%# encoding: ASCII -%> #include "prism/prettyprint.h" +/* We optionally support pretty printing nodes. For systems that don't want or + * need this functionality, it can be turned off with the + * PRISM_EXCLUDE_PRETTYPRINT define. */ +#ifndef PRISM_EXCLUDE_PRETTYPRINT + #include "prism/compiler/inline.h" #include "prism/internal/buffer.h" #include "prism/internal/constant_pool.h" @@ -10,15 +15,6 @@ #include -// We optionally support pretty printing nodes. For systems that don't want or -// need this functionality, it can be turned off with the -// PRISM_EXCLUDE_PRETTYPRINT define. -#ifdef PRISM_EXCLUDE_PRETTYPRINT - -void pm_prettyprint(void) {} - -#else - static PRISM_INLINE void prettyprint_location(pm_buffer_t *output_buffer, const pm_parser_t *parser, const pm_location_t *location) { pm_line_column_t start = pm_line_offset_list_line_column(&parser->line_offsets, location->start, parser->start_line); diff --git a/templates/template.rb b/templates/template.rb index fb778871b4..a84a912366 100755 --- a/templates/template.rb +++ b/templates/template.rb @@ -702,6 +702,7 @@ def locals "lib/prism/serialize.rb", "lib/prism/visitor.rb", "src/diagnostic.c", + "src/json.c", "src/node.c", "src/prettyprint.c", "src/serialize.c", From f02d270409d7e3033e628b2af9dd538cb804103f Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 18 Mar 2026 14:06:56 -0400 Subject: [PATCH 077/100] Move parse_success_p into serialization functions --- include/prism.h | 27 --------------------------- include/prism/parser.h | 10 ++++++++++ include/prism/serialize.h | 10 ++++++++++ include/prism/version.h | 7 +++++++ lib/prism/ffi.rb | 11 ++++++++--- src/prism.c | 22 ---------------------- templates/src/serialize.c.erb | 22 ++++++++++++++++++++++ 7 files changed, 57 insertions(+), 52 deletions(-) diff --git a/include/prism.h b/include/prism.h index b344a43691..d082e2f655 100644 --- a/include/prism.h +++ b/include/prism.h @@ -24,33 +24,6 @@ extern "C" { #include "prism/string_query.h" #include "prism/version.h" -/** - * The prism version and the serialization format. - * - * @returns The prism version as a constant string. - */ -PRISM_EXPORTED_FUNCTION const char * pm_version(void); - -/** - * Initiate the parser with the given parser. - * - * @param parser The parser to use. - * @return The AST representing the source. - * - * \public \memberof pm_parser - */ -PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse(pm_parser_t *parser); - -/** - * Parse the source and return true if it parses without errors or warnings. - * - * @param source The source to parse. - * @param size The size of the source. - * @param data The optional data to pass to the parser. - * @return True if the source parses without errors or warnings. - */ -PRISM_EXPORTED_FUNCTION bool pm_parse_success_p(const uint8_t *source, size_t size, const char *data); - /** * @mainpage * diff --git a/include/prism/parser.h b/include/prism/parser.h index f49155799d..f3927d663f 100644 --- a/include/prism/parser.h +++ b/include/prism/parser.h @@ -285,4 +285,14 @@ typedef void (*pm_constant_callback_t)(const pm_constant_t *constant, void *data */ PRISM_EXPORTED_FUNCTION void pm_parser_constants_each(const pm_parser_t *parser, pm_constant_callback_t callback, void *data); +/** + * Initiate the parser with the given parser. + * + * @param parser The parser to use. + * @return The AST representing the source. + * + * \public \memberof pm_parser + */ +PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse(pm_parser_t *parser); + #endif diff --git a/include/prism/serialize.h b/include/prism/serialize.h index b2e93e5e9b..775fa6fbfb 100644 --- a/include/prism/serialize.h +++ b/include/prism/serialize.h @@ -81,6 +81,16 @@ PRISM_EXPORTED_FUNCTION void pm_serialize_lex(pm_buffer_t *buffer, const uint8_t */ PRISM_EXPORTED_FUNCTION void pm_serialize_parse_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data); +/** + * Parse the source and return true if it parses without errors or warnings. + * + * @param source The source to parse. + * @param size The size of the source. + * @param data The optional data to pass to the parser. + * @return True if the source parses without errors or warnings. + */ +PRISM_EXPORTED_FUNCTION bool pm_serialize_parse_success_p(const uint8_t *source, size_t size, const char *data); + #endif #endif diff --git a/include/prism/version.h b/include/prism/version.h index b95611f96c..99cc99158e 100644 --- a/include/prism/version.h +++ b/include/prism/version.h @@ -26,4 +26,11 @@ */ #define PRISM_VERSION "1.9.0" +/** + * The prism version and the serialization format. + * + * @returns The prism version as a constant string. + */ +PRISM_EXPORTED_FUNCTION const char * pm_version(void); + #endif diff --git a/lib/prism/ffi.rb b/lib/prism/ffi.rb index d4a9ad0302..7b22a1304c 100644 --- a/lib/prism/ffi.rb +++ b/lib/prism/ffi.rb @@ -91,14 +91,19 @@ def self.load_exported_functions_from(header, *functions, callbacks) enum :pm_string_query_t, [:PM_STRING_QUERY_ERROR, -1, :PM_STRING_QUERY_FALSE, :PM_STRING_QUERY_TRUE] load_exported_functions_from( - "prism.h", + "prism/version.h", "pm_version", + [] + ) + + load_exported_functions_from( + "prism/serialize.h", "pm_serialize_parse", "pm_serialize_parse_stream", "pm_serialize_parse_comments", "pm_serialize_lex", "pm_serialize_parse_lex", - "pm_parse_success_p", + "pm_serialize_parse_success_p", [:pm_parse_stream_fgets_t, :pm_parse_stream_feof_t] ) @@ -404,7 +409,7 @@ def parse_lex_common(string, code, options) # :nodoc: end def parse_file_success_common(string, options) # :nodoc: - LibRubyParser.pm_parse_success_p(string.pointer, string.length, dump_options(options)) + LibRubyParser.pm_serialize_parse_success_p(string.pointer, string.length, dump_options(options)) end # Return the value that should be dumped for the command_line option. diff --git a/src/prism.c b/src/prism.c index 3f8e3ad870..901b8971cd 100644 --- a/src/prism.c +++ b/src/prism.c @@ -22778,28 +22778,6 @@ pm_parse_stream(pm_parser_t **parser, pm_arena_t *arena, pm_buffer_t *buffer, vo return node; } -/** - * Parse the source and return true if it parses without errors or warnings. - */ -bool -pm_parse_success_p(const uint8_t *source, size_t size, const char *data) { - pm_options_t options = { 0 }; - pm_options_read(&options, data); - - pm_arena_t arena = { 0 }; - pm_parser_t parser; - pm_parser_init(&arena, &parser, source, size, &options); - - pm_parse(&parser); - - bool result = parser.error_list.size == 0; - pm_parser_cleanup(&parser); - pm_arena_cleanup(&arena); - pm_options_cleanup(&options); - - return result; -} - #undef PM_CASE_KEYWORD #undef PM_CASE_OPERATOR #undef PM_CASE_WRITABLE diff --git a/templates/src/serialize.c.erb b/templates/src/serialize.c.erb index c92300b33d..0263ef56cc 100644 --- a/templates/src/serialize.c.erb +++ b/templates/src/serialize.c.erb @@ -373,4 +373,26 @@ pm_serialize_parse_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, pm_options_cleanup(&options); } +/** + * Parse the source and return true if it parses without errors or warnings. + */ +bool +pm_serialize_parse_success_p(const uint8_t *source, size_t size, const char *data) { + pm_options_t options = { 0 }; + pm_options_read(&options, data); + + pm_arena_t arena = { 0 }; + pm_parser_t parser; + pm_parser_init(&arena, &parser, source, size, &options); + + pm_parse(&parser); + + bool result = parser.error_list.size == 0; + pm_parser_cleanup(&parser); + pm_arena_cleanup(&arena); + pm_options_cleanup(&options); + + return result; +} + #endif From d54885e7cccc6c4ae6e9781527f4572d23a69e16 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 18 Mar 2026 14:13:40 -0400 Subject: [PATCH 078/100] Naming conventions --- ext/prism/extension.c | 32 +++++++++++++++---------------- include/prism/options.h | 12 ++++++------ rust/ruby-prism-sys/build/main.rs | 4 ++-- rust/ruby-prism/src/lib.rs | 6 +++--- src/options.c | 12 ++++++------ src/prism.c | 4 ++-- 6 files changed, 35 insertions(+), 35 deletions(-) diff --git a/ext/prism/extension.c b/ext/prism/extension.c index 2c64ff14ed..0c9458e6cf 100644 --- a/ext/prism/extension.c +++ b/ext/prism/extension.c @@ -148,7 +148,7 @@ build_options_scopes(pm_options_t *options, VALUE scopes) { // Initialize the scope array. size_t locals_count = RARRAY_LEN(locals); - pm_options_scope_t *options_scope = pm_options_scope_get_mut(options, scope_index); + pm_options_scope_t *options_scope = pm_options_scope_mut(options, scope_index); pm_options_scope_init(options_scope, locals_count); // Iterate over the locals and add them to the scope. @@ -162,7 +162,7 @@ build_options_scopes(pm_options_t *options, VALUE scopes) { } // Add the local to the scope. - pm_string_t *scope_local = pm_options_scope_local_get_mut(options_scope, local_index); + pm_string_t *scope_local = pm_options_scope_local_mut(options_scope, local_index); const char *name = rb_id2name(SYM2ID(local)); pm_string_constant_init(scope_local, name, strlen(name)); } @@ -327,7 +327,7 @@ file_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options, V *encoded_filepath = rb_str_encode_ospath(filepath); extract_options(options, *encoded_filepath, keywords); - const char *source = (const char *) pm_string_source(pm_options_filepath_get(options)); + const char *source = (const char *) pm_string_source(pm_options_filepath(options)); pm_string_init_result_t result; switch (result = pm_string_file_init(input, source)) { @@ -408,7 +408,7 @@ dump(int argc, VALUE *argv, VALUE self) { #endif VALUE value = dump_input(&input, options); - if (pm_options_freeze_get(options)) rb_obj_freeze(value); + if (pm_options_freeze(options)) rb_obj_freeze(value); #ifdef PRISM_BUILD_DEBUG xfree_sized(dup, length); @@ -789,7 +789,7 @@ parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nod .source = source, .tokens = rb_ary_new(), .encoding = rb_utf8_encoding(), - .freeze = pm_options_freeze_get(options), + .freeze = pm_options_freeze(options), }; parse_lex_data_t *data = &parse_lex_data; @@ -809,7 +809,7 @@ parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nod rb_ary_push(offsets, ULONG2NUM(line_offsets->offsets[index])); } - if (pm_options_freeze_get(options)) { + if (pm_options_freeze(options)) { rb_obj_freeze(source_string); rb_obj_freeze(offsets); rb_obj_freeze(source); @@ -819,12 +819,12 @@ parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nod VALUE result; if (return_nodes) { VALUE value = rb_ary_new_capa(2); - rb_ary_push(value, pm_ast_new(parser, node, parse_lex_data.encoding, source, pm_options_freeze_get(options))); + rb_ary_push(value, pm_ast_new(parser, node, parse_lex_data.encoding, source, pm_options_freeze(options))); rb_ary_push(value, parse_lex_data.tokens); - if (pm_options_freeze_get(options)) rb_obj_freeze(value); - result = parse_result_create(rb_cPrismParseLexResult, parser, value, parse_lex_data.encoding, source, pm_options_freeze_get(options)); + if (pm_options_freeze(options)) rb_obj_freeze(value); + result = parse_result_create(rb_cPrismParseLexResult, parser, value, parse_lex_data.encoding, source, pm_options_freeze(options)); } else { - result = parse_result_create(rb_cPrismLexResult, parser, parse_lex_data.tokens, parse_lex_data.encoding, source, pm_options_freeze_get(options)); + result = parse_result_create(rb_cPrismLexResult, parser, parse_lex_data.tokens, parse_lex_data.encoding, source, pm_options_freeze(options)); } pm_parser_free(parser); @@ -892,7 +892,7 @@ parse_input(pm_string_t *input, const pm_options_t *options) { pm_node_t *node = pm_parse(parser); rb_encoding *encoding = rb_enc_find(pm_parser_encoding_name(parser)); - bool freeze = pm_options_freeze_get(options); + bool freeze = pm_options_freeze(options); VALUE source = pm_source_new(parser, encoding, freeze); VALUE value = pm_ast_new(parser, node, encoding, source, freeze); VALUE result = parse_result_create(rb_cPrismParseResult, parser, value, encoding, source, freeze); @@ -1111,9 +1111,9 @@ parse_stream(int argc, VALUE *argv, VALUE self) { pm_node_t *node = pm_parse_stream(&parser, arena, buffer, (void *) stream, parse_stream_fgets, parse_stream_eof, options); rb_encoding *encoding = rb_enc_find(pm_parser_encoding_name(parser)); - VALUE source = pm_source_new(parser, encoding, pm_options_freeze_get(options)); - VALUE value = pm_ast_new(parser, node, encoding, source, pm_options_freeze_get(options)); - VALUE result = parse_result_create(rb_cPrismParseResult, parser, value, encoding, source, pm_options_freeze_get(options)); + VALUE source = pm_source_new(parser, encoding, pm_options_freeze(options)); + VALUE value = pm_ast_new(parser, node, encoding, source, pm_options_freeze(options)); + VALUE result = parse_result_create(rb_cPrismParseResult, parser, value, encoding, source, pm_options_freeze(options)); pm_buffer_free(buffer); pm_parser_free(parser); @@ -1134,8 +1134,8 @@ parse_input_comments(pm_string_t *input, const pm_options_t *options) { pm_parse(parser); rb_encoding *encoding = rb_enc_find(pm_parser_encoding_name(parser)); - VALUE source = pm_source_new(parser, encoding, pm_options_freeze_get(options)); - VALUE comments = parser_comments(parser, source, pm_options_freeze_get(options)); + VALUE source = pm_source_new(parser, encoding, pm_options_freeze(options)); + VALUE comments = parser_comments(parser, source, pm_options_freeze(options)); pm_parser_free(parser); pm_arena_free(arena); diff --git a/include/prism/options.h b/include/prism/options.h index 61afc0df92..c1df928e06 100644 --- a/include/prism/options.h +++ b/include/prism/options.h @@ -143,7 +143,7 @@ PRISM_EXPORTED_FUNCTION void pm_options_shebang_callback_set(pm_options_t *optio * * \public \memberof pm_options */ -PRISM_EXPORTED_FUNCTION const pm_string_t * pm_options_filepath_get(const pm_options_t *options); +PRISM_EXPORTED_FUNCTION const pm_string_t * pm_options_filepath(const pm_options_t *options); /** * Set the filepath option on the given options struct. @@ -266,7 +266,7 @@ PRISM_EXPORTED_FUNCTION void pm_options_partial_script_set(pm_options_t *options * * \public \memberof pm_options */ -PRISM_EXPORTED_FUNCTION bool pm_options_freeze_get(const pm_options_t *options); +PRISM_EXPORTED_FUNCTION bool pm_options_freeze(const pm_options_t *options); /** * Set the freeze option on the given options struct. @@ -299,7 +299,7 @@ PRISM_EXPORTED_FUNCTION bool pm_options_scopes_init(pm_options_t *options, size_ * * \public \memberof pm_options */ -PRISM_EXPORTED_FUNCTION const pm_options_scope_t * pm_options_scope_get(const pm_options_t *options, size_t index); +PRISM_EXPORTED_FUNCTION const pm_options_scope_t * pm_options_scope(const pm_options_t *options, size_t index); /** * Return a mutable pointer to the scope at the given index within the given @@ -311,7 +311,7 @@ PRISM_EXPORTED_FUNCTION const pm_options_scope_t * pm_options_scope_get(const pm * * \public \memberof pm_options */ -PRISM_EXPORTED_FUNCTION pm_options_scope_t * pm_options_scope_get_mut(pm_options_t *options, size_t index); +PRISM_EXPORTED_FUNCTION pm_options_scope_t * pm_options_scope_mut(pm_options_t *options, size_t index); /** * Create a new options scope struct. This will hold a set of locals that are in @@ -335,7 +335,7 @@ PRISM_EXPORTED_FUNCTION void pm_options_scope_init(pm_options_scope_t *scope, si * * \public \memberof pm_options */ -PRISM_EXPORTED_FUNCTION const pm_string_t * pm_options_scope_local_get(const pm_options_scope_t *scope, size_t index); +PRISM_EXPORTED_FUNCTION const pm_string_t * pm_options_scope_local(const pm_options_scope_t *scope, size_t index); /** * Return a mutable pointer to the local at the given index within the given @@ -347,7 +347,7 @@ PRISM_EXPORTED_FUNCTION const pm_string_t * pm_options_scope_local_get(const pm_ * * \public \memberof pm_options */ -PRISM_EXPORTED_FUNCTION pm_string_t * pm_options_scope_local_get_mut(pm_options_scope_t *scope, size_t index); +PRISM_EXPORTED_FUNCTION pm_string_t * pm_options_scope_local_mut(pm_options_scope_t *scope, size_t index); /** * Set the forwarding option on the given scope struct. diff --git a/rust/ruby-prism-sys/build/main.rs b/rust/ruby-prism-sys/build/main.rs index 9c4d807030..bf9c8e2db7 100644 --- a/rust/ruby-prism-sys/build/main.rs +++ b/rust/ruby-prism-sys/build/main.rs @@ -157,9 +157,9 @@ fn generate_bindings(ruby_include_path: &Path) -> bindgen::Bindings { .allowlist_function("pm_options_new") .allowlist_function("pm_options_partial_script_set") .allowlist_function("pm_options_scope_forwarding_set") - .allowlist_function("pm_options_scope_get_mut") + .allowlist_function("pm_options_scope_mut") .allowlist_function("pm_options_scope_init") - .allowlist_function("pm_options_scope_local_get_mut") + .allowlist_function("pm_options_scope_local_mut") .allowlist_function("pm_options_scopes_init") .allowlist_function("pm_options_version_set") .allowlist_function("pm_parse") diff --git a/rust/ruby-prism/src/lib.rs b/rust/ruby-prism/src/lib.rs index 7c678e1a67..b841af7807 100644 --- a/rust/ruby-prism/src/lib.rs +++ b/rust/ruby-prism/src/lib.rs @@ -28,7 +28,7 @@ pub use self::parse_result::{Comment, CommentType, Comments, Diagnostic, Diagnos use ruby_prism_sys::{ pm_arena_t, pm_options_command_line_set, pm_options_encoding_locked_set, pm_options_encoding_set, pm_options_filepath_set, pm_options_free, pm_options_frozen_string_literal_set, pm_options_line_set, pm_options_main_script_set, pm_options_new, pm_options_partial_script_set, pm_options_scope_forwarding_set, - pm_options_scope_get_mut, pm_options_scope_init, pm_options_scope_local_get_mut, pm_options_scopes_init, pm_options_t, pm_options_version_set, pm_parse, pm_parser_init, pm_parser_t, pm_string_constant_init, + pm_options_scope_mut, pm_options_scope_init, pm_options_scope_local_mut, pm_options_scopes_init, pm_options_t, pm_options_version_set, pm_parse, pm_parser_init, pm_parser_t, pm_string_constant_init, }; /// The version of Ruby syntax to parse with. @@ -287,11 +287,11 @@ impl Options { unsafe { pm_options_scopes_init(opts, self.scopes.len()) }; for (scope_index, scope) in self.scopes.iter().enumerate() { - let pm_scope = unsafe { pm_options_scope_get_mut(opts, scope_index) }; + let pm_scope = unsafe { pm_options_scope_mut(opts, scope_index) }; unsafe { pm_options_scope_init(pm_scope, scope.locals.len()) }; for (local_index, local) in scope.locals.iter().enumerate() { - let pm_local = unsafe { pm_options_scope_local_get_mut(pm_scope, local_index) }; + let pm_local = unsafe { pm_options_scope_local_mut(pm_scope, local_index) }; unsafe { pm_string_constant_init(pm_local, local.as_ptr().cast::(), local.len()) }; } diff --git a/src/options.c b/src/options.c index 9dbafdf63c..59f1dd4f17 100644 --- a/src/options.c +++ b/src/options.c @@ -64,7 +64,7 @@ pm_options_shebang_callback_set(pm_options_t *options, pm_options_shebang_callba * Get the filepath option on the given options struct. */ const pm_string_t * -pm_options_filepath_get(const pm_options_t *options) { +pm_options_filepath(const pm_options_t *options) { return &options->filepath; } @@ -230,7 +230,7 @@ pm_options_partial_script_set(pm_options_t *options, bool partial_script) { * Get the freeze option on the given options struct. */ bool -pm_options_freeze_get(const pm_options_t *options) { +pm_options_freeze(const pm_options_t *options) { return options->freeze; } @@ -265,7 +265,7 @@ pm_options_scopes_init(pm_options_t *options, size_t scopes_count) { * options. */ const pm_options_scope_t * -pm_options_scope_get(const pm_options_t *options, size_t index) { +pm_options_scope(const pm_options_t *options, size_t index) { return &options->scopes[index]; } @@ -274,7 +274,7 @@ pm_options_scope_get(const pm_options_t *options, size_t index) { * options. */ pm_options_scope_t * -pm_options_scope_get_mut(pm_options_t *options, size_t index) { +pm_options_scope_mut(pm_options_t *options, size_t index) { return &options->scopes[index]; } @@ -295,7 +295,7 @@ pm_options_scope_init(pm_options_scope_t *scope, size_t locals_count) { * scope. */ const pm_string_t * -pm_options_scope_local_get(const pm_options_scope_t *scope, size_t index) { +pm_options_scope_local(const pm_options_scope_t *scope, size_t index) { return &scope->locals[index]; } @@ -304,7 +304,7 @@ pm_options_scope_local_get(const pm_options_scope_t *scope, size_t index) { * scope. */ pm_string_t * -pm_options_scope_local_get_mut(pm_options_scope_t *scope, size_t index) { +pm_options_scope_local_mut(pm_options_scope_t *scope, size_t index) { return &scope->locals[index]; } diff --git a/src/prism.c b/src/prism.c index 901b8971cd..4bafdf3bb8 100644 --- a/src/prism.c +++ b/src/prism.c @@ -22339,7 +22339,7 @@ pm_parser_init(pm_arena_t *arena, pm_parser_t *parser, const uint8_t *source, si if (parser->parsing_eval) parser->warn_mismatched_indentation = false; for (size_t scope_index = 0; scope_index < options->scopes_count; scope_index++) { - const pm_options_scope_t *scope = pm_options_scope_get(options, scope_index); + const pm_options_scope_t *scope = pm_options_scope(options, scope_index); pm_parser_scope_push(parser, scope_index == 0); // Scopes given from the outside are not allowed to have numbered @@ -22347,7 +22347,7 @@ pm_parser_init(pm_arena_t *arena, pm_parser_t *parser, const uint8_t *source, si parser->current_scope->parameters = ((pm_scope_parameters_t) scope->forwarding) | PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED; for (size_t local_index = 0; local_index < scope->locals_count; local_index++) { - const pm_string_t *local = pm_options_scope_local_get(scope, local_index); + const pm_string_t *local = pm_options_scope_local(scope, local_index); const uint8_t *source = pm_string_source(local); size_t length = pm_string_length(local); From 06a944a08fa1c1c8f24d7417c17b2e9bb9db4d16 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 18 Mar 2026 14:23:02 -0400 Subject: [PATCH 079/100] Make some token logic internal --- .gitignore | 2 +- include/prism/internal/tokens.h | 17 +++++ prism.gemspec | 2 +- src/prism.c | 71 ++++++++++--------- templates/ext/prism/api_node.c.erb | 2 +- templates/include/prism/ast.h.erb | 15 ++-- .../src/{token_type.c.erb => tokens.c.erb} | 8 +-- templates/template.rb | 2 +- 8 files changed, 65 insertions(+), 54 deletions(-) create mode 100644 include/prism/internal/tokens.h rename templates/src/{token_type.c.erb => tokens.c.erb} (98%) diff --git a/.gitignore b/.gitignore index 7df40e7ba6..5bce199577 100644 --- a/.gitignore +++ b/.gitignore @@ -57,7 +57,7 @@ out.svg /src/node.c /src/prettyprint.c /src/serialize.c -/src/token_type.c +/src/tokens.c /src/**/*.o /rbi/prism/dsl.rbi /rbi/prism/node.rbi diff --git a/include/prism/internal/tokens.h b/include/prism/internal/tokens.h new file mode 100644 index 0000000000..05651bf5c8 --- /dev/null +++ b/include/prism/internal/tokens.h @@ -0,0 +1,17 @@ +/** + * @file internal/tokens.h + */ +#ifndef PRISM_INTERNAL_TOKENS_H +#define PRISM_INTERNAL_TOKENS_H + +#include "prism/ast.h" + +/** + * Returns the human name of the given token type. + * + * @param token_type The token type to convert to a human name. + * @return The human name of the given token type. + */ +const char * pm_token_str(pm_token_type_t token_type); + +#endif diff --git a/prism.gemspec b/prism.gemspec index d3fd8da546..4c9b685427 100644 --- a/prism.gemspec +++ b/prism.gemspec @@ -208,7 +208,7 @@ Gem::Specification.new do |spec| "src/strings.c", "src/strncasecmp.c", "src/strpbrk.c", - "src/token_type.c" + "src/tokens.c" ] spec.extensions = ["ext/prism/extconf.rb"] diff --git a/src/prism.c b/src/prism.c index 4bafdf3bb8..18249acc71 100644 --- a/src/prism.c +++ b/src/prism.c @@ -26,6 +26,7 @@ #include "prism/internal/strings.h" #include "prism/internal/strncasecmp.h" #include "prism/internal/strpbrk.h" +#include "prism/internal/tokens.h" #include "prism/excludes.h" #include "prism/serialize.h" @@ -10348,7 +10349,7 @@ parser_lex(pm_parser_t *parser) { // , case ',': if ((parser->previous.type == PM_TOKEN_COMMA) && (parser->enclosure_nesting > 0)) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_ARRAY_TERM, pm_token_type_human(parser->current.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_ARRAY_TERM, pm_token_str(parser->current.type)); } lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL); @@ -13487,7 +13488,7 @@ parse_statements(pm_parser_t *parser, pm_context_t context, uint16_t depth) { // This is an inlined version of accept1 because the error that we // want to add has varargs. If this happens again, we should // probably extract a helper function. - PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(parser->current.type)); parser->previous.start = parser->previous.end; parser->previous.type = 0; } @@ -15102,7 +15103,7 @@ parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accept parse_arguments(parser, arguments, accepts_block, PM_TOKEN_PARENTHESIS_RIGHT, (uint8_t) (flags & ~PM_PARSE_ACCEPTS_DO_BLOCK), (uint16_t) (depth + 1)); if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_ARGUMENT_TERM_PAREN, pm_token_type_human(parser->current.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_ARGUMENT_TERM_PAREN, pm_token_str(parser->current.type)); parser->previous.start = parser->previous.end; parser->previous.type = 0; } @@ -15124,7 +15125,7 @@ parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accept // then we have a trailing comma where we need to check whether it is // allowed or not. if (parser->previous.type == PM_TOKEN_COMMA && !match1(parser, PM_TOKEN_SEMICOLON)) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, PM_ERR_EXPECT_ARGUMENT, pm_token_type_human(parser->current.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, PM_ERR_EXPECT_ARGUMENT, pm_token_str(parser->current.type)); } pm_accepts_block_stack_pop(parser); @@ -16101,7 +16102,7 @@ parse_method_definition_name(pm_parser_t *parser) { parser_lex(parser); return parser->previous; default: - PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_DEF_NAME, pm_token_type_human(parser->current.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_DEF_NAME, pm_token_str(parser->current.type)); return (pm_token_t) { .type = 0, .start = parser->current.start, .end = parser->current.end }; } } @@ -16303,7 +16304,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current, bool accepts_label, uint1 } else if (accept1(parser, PM_TOKEN_STRING_END)) { node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped)); } else { - PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, PM_ERR_STRING_LITERAL_TERM, pm_token_type_human(parser->previous.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, PM_ERR_STRING_LITERAL_TERM, pm_token_str(parser->previous.type)); parser->previous.start = parser->previous.end; parser->previous.type = 0; node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped)); @@ -16940,7 +16941,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm first_node = parse_expression(parser, PM_BINDING_POWER_MAX, PM_PARSE_ACCEPTS_DO_BLOCK | PM_PARSE_ACCEPTS_LABEL, PM_ERR_PATTERN_HASH_KEY_LABEL, (uint16_t) (depth + 1)); break; default: { - PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_PATTERN_HASH_KEY, pm_token_type_human(parser->current.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_PATTERN_HASH_KEY, pm_token_str(parser->current.type)); parser_lex(parser); first_node = UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous))); @@ -17413,22 +17414,22 @@ static void pm_parser_err_prefix(pm_parser_t *parser, pm_diagnostic_id_t diag_id) { switch (diag_id) { case PM_ERR_HASH_KEY: { - PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, diag_id, pm_token_type_human(parser->previous.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, diag_id, pm_token_str(parser->previous.type)); break; } case PM_ERR_HASH_VALUE: case PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR: { - PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, diag_id, pm_token_type_human(parser->current.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, diag_id, pm_token_str(parser->current.type)); break; } case PM_ERR_UNARY_RECEIVER: { - const char *human = (parser->current.type == PM_TOKEN_EOF ? "end-of-input" : pm_token_type_human(parser->current.type)); + const char *human = (parser->current.type == PM_TOKEN_EOF ? "end-of-input" : pm_token_str(parser->current.type)); PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, diag_id, human, parser->previous.start[0]); break; } case PM_ERR_UNARY_DISALLOWED: case PM_ERR_EXPECT_ARGUMENT: { - PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, diag_id, pm_token_type_human(parser->current.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, diag_id, pm_token_str(parser->current.type)); break; } default: @@ -17751,7 +17752,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, u } else { // If there was no comma, then we need to add a syntax // error. - PM_PARSER_ERR_FORMAT(parser, PM_TOKEN_END(parser, &parser->previous), 0, PM_ERR_ARRAY_SEPARATOR, pm_token_type_human(parser->current.type)); + PM_PARSER_ERR_FORMAT(parser, PM_TOKEN_END(parser, &parser->previous), 0, PM_ERR_ARRAY_SEPARATOR, pm_token_str(parser->current.type)); parser->previous.start = parser->previous.end; parser->previous.type = 0; } @@ -17827,7 +17828,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, u accept1(parser, PM_TOKEN_NEWLINE); if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_ARRAY_TERM, pm_token_type_human(parser->current.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_ARRAY_TERM, pm_token_str(parser->current.type)); parser->previous.start = parser->previous.end; parser->previous.type = 0; } @@ -17970,7 +17971,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, u // If we didn't find a terminator and we didn't find a right // parenthesis, then this is a syntax error. if (!terminator_found && !match1(parser, PM_TOKEN_EOF)) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(parser->current.type)); } // Parse each statement within the parentheses. @@ -18001,7 +18002,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, u } else if (!match1(parser, PM_TOKEN_EOF)) { // If we're at the end of the file, then we're going to add // an error after this for the ) anyway. - PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(parser->current.type)); } } @@ -18760,7 +18761,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, u // Reject `foo && return bar`. if (!(flags & PM_PARSE_ACCEPTS_COMMAND_CALL) && arguments.arguments != NULL) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, &next, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(next.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &next, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(next.type)); } } } @@ -18841,7 +18842,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, u pm_parser_scope_push(parser, true); if (!match2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_SINGLETON_CLASS_DELIMITER, pm_token_type_human(parser->current.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_SINGLETON_CLASS_DELIMITER, pm_token_str(parser->current.type)); } pm_node_t *statements = NULL; @@ -19046,7 +19047,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, u name = parse_method_definition_name(parser); } else { if (!valid_name) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, &identifier, PM_ERR_DEF_NAME, pm_token_type_human(identifier.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &identifier, PM_ERR_DEF_NAME, pm_token_str(identifier.type)); } name = identifier; @@ -19120,7 +19121,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, u context_pop(parser); if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_DEF_PARAMS_TERM_PAREN, pm_token_type_human(parser->current.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_DEF_PARAMS_TERM_PAREN, pm_token_str(parser->current.type)); parser->previous.start = parser->previous.end; parser->previous.type = 0; } @@ -19232,7 +19233,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, u // `def f = def g = foo bar`) is a command assignment and // cannot appear as a def body. if (PM_NODE_TYPE_P(statement, PM_DEF_NODE) && pm_command_call_value_p(statement)) { - PM_PARSER_ERR_NODE_FORMAT(parser, statement, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type)); + PM_PARSER_ERR_NODE_FORMAT(parser, statement, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(parser->current.type)); } pm_statements_node_body_append(parser, (pm_statements_node_t *) statements, statement, false); @@ -19407,7 +19408,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, u do_keyword = parser->previous; } else { if (!match2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE)) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_FOR_DELIMITER, pm_token_type_human(parser->current.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_FOR_DELIMITER, pm_token_str(parser->current.type)); } } @@ -20439,12 +20440,12 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, u // If we get here, then we are assuming this token is closing a // parent context, so we'll indicate that to the user so that // they know how we behaved. - PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT, pm_token_type_human(parser->current.type), context_human(recoverable)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT, pm_token_str(parser->current.type), context_human(recoverable)); } else if (diag_id == PM_ERR_CANNOT_PARSE_EXPRESSION) { // We're going to make a special case here, because "cannot // parse expression" is pretty generic, and we know here that we // have an unexpected token. - PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, pm_token_type_human(parser->current.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, pm_token_str(parser->current.type)); } else { pm_parser_err_prefix(parser, diag_id); } @@ -20472,7 +20473,7 @@ parse_assignment_value(pm_parser_t *parser, pm_binding_power_t previous_binding_ // operators with higher binding power. If we find one, emit an error // and skip the operator and its right-hand side. if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER && (pm_command_call_value_p(value) || pm_block_call_p(value))) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(parser->current.type)); parser_lex(parser); parse_expression(parser, pm_binding_powers[parser->previous.type].right, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1)); } @@ -20578,7 +20579,7 @@ parse_assignment_values(pm_parser_t *parser, pm_binding_power_t previous_binding // operators with higher binding power. If we find one, emit an error // and skip the operator and its right-hand side. if (single_value && pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER && (pm_command_call_value_p(value) || pm_block_call_p(value))) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(parser->current.type)); parser_lex(parser); parse_expression(parser, pm_binding_powers[parser->previous.type].right, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1)); } @@ -21306,7 +21307,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t // In this case we have an operator but we don't know what it's for. // We need to treat it as an error. For now, we'll mark it as an error // and just skip right past it. - PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, pm_token_type_human(parser->current.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, pm_token_str(parser->current.type)); return node; } } @@ -21428,21 +21429,21 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t case PM_RESCUE_MODIFIER_NODE: { pm_rescue_modifier_node_t *cast = (pm_rescue_modifier_node_t *) node; if (PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_REQUIRED_NODE)) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(operator.type)); } break; } case PM_AND_NODE: { pm_and_node_t *cast = (pm_and_node_t *) node; if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(operator.type)); } break; } case PM_OR_NODE: { pm_or_node_t *cast = (pm_or_node_t *) node; if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(operator.type)); } break; } @@ -21481,21 +21482,21 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t case PM_RESCUE_MODIFIER_NODE: { pm_rescue_modifier_node_t *cast = (pm_rescue_modifier_node_t *) node; if (PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_REQUIRED_NODE)) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(operator.type)); } break; } case PM_AND_NODE: { pm_and_node_t *cast = (pm_and_node_t *) node; if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(operator.type)); } break; } case PM_OR_NODE: { pm_or_node_t *cast = (pm_or_node_t *) node; if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(operator.type)); } break; } @@ -21516,7 +21517,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t break; } default: { - PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_MESSAGE, pm_token_type_human(parser->current.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_MESSAGE, pm_token_str(parser->current.type)); message = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end }; } } @@ -21930,7 +21931,7 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, uint8_t // If this is a non-assoc operator and we are about to parse the // exact same operator, then we need to add an error. if (match1(parser, current_token_type)) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_type_human(parser->current.type), pm_token_type_human(current_token_type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_str(parser->current.type), pm_token_str(current_token_type)); break; } @@ -21943,7 +21944,7 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, uint8_t // if (PM_NODE_TYPE_P(node, PM_RANGE_NODE) && ((pm_range_node_t *) node)->right == NULL) { if (match4(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_DOT, PM_TOKEN_AMPERSAND_DOT)) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_type_human(parser->current.type), pm_token_type_human(current_token_type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_str(parser->current.type), pm_token_str(current_token_type)); break; } diff --git a/templates/ext/prism/api_node.c.erb b/templates/ext/prism/api_node.c.erb index ca793b471c..6dd3a59372 100644 --- a/templates/ext/prism/api_node.c.erb +++ b/templates/ext/prism/api_node.c.erb @@ -25,7 +25,7 @@ pm_location_new(const uint32_t start, const uint32_t length, VALUE source, bool VALUE pm_token_new(const pm_parser_t *parser, const pm_token_t *token, rb_encoding *encoding, VALUE source, bool freeze) { - ID type = rb_intern(pm_token_type_name(token->type)); + ID type = rb_intern(pm_token_type(token->type)); VALUE location = pm_location_new((uint32_t) (token->start - pm_parser_start(parser)), (uint32_t) (token->end - token->start), source, freeze); VALUE slice = rb_enc_str_new((const char *) token->start, token->end - token->start, encoding); diff --git a/templates/include/prism/ast.h.erb b/templates/include/prism/ast.h.erb index f08e017fc0..e1b233918d 100644 --- a/templates/include/prism/ast.h.erb +++ b/templates/include/prism/ast.h.erb @@ -50,18 +50,11 @@ typedef struct { /** * Returns a string representation of the given token type. * - * @param token_type The token type to convert to a string. - * @return A string representation of the given token type. + * @param token_type The type of the token to get the string representation of. + * @return A string representation of the given token type. This is meant for + * debugging purposes and is not guaranteed to be stable across versions. */ -PRISM_EXPORTED_FUNCTION const char * pm_token_type_name(pm_token_type_t token_type); - -/** - * Returns the human name of the given token type. - * - * @param token_type The token type to convert to a human name. - * @return The human name of the given token type. - */ -const char * pm_token_type_human(pm_token_type_t token_type); +PRISM_EXPORTED_FUNCTION const char * pm_token_type(pm_token_type_t token_type); /** * This struct represents a slice in the source code, defined by an offset and diff --git a/templates/src/token_type.c.erb b/templates/src/tokens.c.erb similarity index 98% rename from templates/src/token_type.c.erb rename to templates/src/tokens.c.erb index cdf4e73658..1e82954738 100644 --- a/templates/src/token_type.c.erb +++ b/templates/src/tokens.c.erb @@ -6,7 +6,7 @@ * Returns a string representation of the given token type. */ const char * -pm_token_type_name(pm_token_type_t token_type) { +pm_token_type(pm_token_type_t token_type) { switch (token_type) { <%- tokens.each do |token| -%> case PM_TOKEN_<%= token.name %>: @@ -27,7 +27,7 @@ pm_token_type_name(pm_token_type_t token_type) { * Returns the human name of the given token type. */ const char * -pm_token_type_human(pm_token_type_t token_type) { +pm_token_str(pm_token_type_t token_type) { switch (token_type) { case PM_TOKEN_EOF: return "end-of-input"; @@ -360,8 +360,8 @@ pm_token_type_human(pm_token_type_t token_type) { return ""; } - // Provide a default, because some compilers can't determine that the above - // switch is exhaustive. + /* Provide a default, because some compilers cannot determine that the above + * switch is exhaustive. */ assert(false && "unreachable"); return ""; } diff --git a/templates/template.rb b/templates/template.rb index a84a912366..78c8ac1954 100755 --- a/templates/template.rb +++ b/templates/template.rb @@ -706,7 +706,7 @@ def locals "src/node.c", "src/prettyprint.c", "src/serialize.c", - "src/token_type.c" + "src/tokens.c" ] end end From 7dde21044733961f93903ca082dca3de2d518f4e Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 18 Mar 2026 14:35:08 -0400 Subject: [PATCH 080/100] Documentation on public API functions --- include/prism/arena.h | 6 ++-- include/prism/buffer.h | 10 +++--- include/prism/comments.h | 6 ++-- include/prism/compiler/nodiscard.h | 22 +++++++++++++ include/prism/compiler/nonnull.h | 18 +++++++++++ include/prism/constant_pool.h | 6 ++-- include/prism/diagnostic.h | 12 ++++--- include/prism/json.h | 3 +- include/prism/line_offset_list.h | 3 +- include/prism/magic_comments.h | 5 +-- include/prism/node.h | 9 +++--- include/prism/options.h | 51 ++++++++++++++++-------------- include/prism/parser.h | 51 ++++++++++++++++-------------- include/prism/prettyprint.h | 3 +- include/prism/serialize.h | 15 +++++---- include/prism/stream.h | 3 +- include/prism/string_query.h | 7 ++-- include/prism/strings.h | 13 ++++---- templates/src/node.c.erb | 2 +- 19 files changed, 155 insertions(+), 90 deletions(-) create mode 100644 include/prism/compiler/nodiscard.h create mode 100644 include/prism/compiler/nonnull.h diff --git a/include/prism/arena.h b/include/prism/arena.h index 1b1729bb24..890c98a7b4 100644 --- a/include/prism/arena.h +++ b/include/prism/arena.h @@ -7,6 +7,8 @@ #define PRISM_ARENA_H #include "prism/compiler/exported.h" +#include "prism/compiler/nodiscard.h" +#include "prism/compiler/nonnull.h" #include @@ -23,13 +25,13 @@ typedef struct pm_arena_t pm_arena_t; * the caller to free the arena using pm_arena_free when it is no longer * needed. */ -PRISM_EXPORTED_FUNCTION pm_arena_t * pm_arena_new(void); +PRISM_EXPORTED_FUNCTION pm_arena_t * pm_arena_new(void) PRISM_NODISCARD; /** * Frees both the held memory and the arena itself. * * @param arena The arena to free. */ -PRISM_EXPORTED_FUNCTION void pm_arena_free(pm_arena_t *arena); +PRISM_EXPORTED_FUNCTION void pm_arena_free(pm_arena_t *arena) PRISM_NONNULL(1); #endif diff --git a/include/prism/buffer.h b/include/prism/buffer.h index b6e8feea31..0d67633ede 100644 --- a/include/prism/buffer.h +++ b/include/prism/buffer.h @@ -7,6 +7,8 @@ #define PRISM_BUFFER_H #include "prism/compiler/exported.h" +#include "prism/compiler/nodiscard.h" +#include "prism/compiler/nonnull.h" #include @@ -24,7 +26,7 @@ typedef struct pm_buffer_t pm_buffer_t; * * \public \memberof pm_buffer_t */ -PRISM_EXPORTED_FUNCTION pm_buffer_t * pm_buffer_new(void); +PRISM_EXPORTED_FUNCTION pm_buffer_t * pm_buffer_new(void) PRISM_NODISCARD; /** * Free both the memory held by the buffer and the buffer itself. @@ -33,7 +35,7 @@ PRISM_EXPORTED_FUNCTION pm_buffer_t * pm_buffer_new(void); * * \public \memberof pm_buffer_t */ -PRISM_EXPORTED_FUNCTION void pm_buffer_free(pm_buffer_t *buffer); +PRISM_EXPORTED_FUNCTION void pm_buffer_free(pm_buffer_t *buffer) PRISM_NONNULL(1); /** * Return the value of the buffer. @@ -43,7 +45,7 @@ PRISM_EXPORTED_FUNCTION void pm_buffer_free(pm_buffer_t *buffer); * * \public \memberof pm_buffer_t */ -PRISM_EXPORTED_FUNCTION char * pm_buffer_value(const pm_buffer_t *buffer); +PRISM_EXPORTED_FUNCTION char * pm_buffer_value(const pm_buffer_t *buffer) PRISM_NONNULL(1); /** * Return the length of the buffer. @@ -53,6 +55,6 @@ PRISM_EXPORTED_FUNCTION char * pm_buffer_value(const pm_buffer_t *buffer); * * \public \memberof pm_buffer_t */ -PRISM_EXPORTED_FUNCTION size_t pm_buffer_length(const pm_buffer_t *buffer); +PRISM_EXPORTED_FUNCTION size_t pm_buffer_length(const pm_buffer_t *buffer) PRISM_NONNULL(1); #endif diff --git a/include/prism/comments.h b/include/prism/comments.h index 91792897d9..3e2dfcddfd 100644 --- a/include/prism/comments.h +++ b/include/prism/comments.h @@ -5,6 +5,8 @@ #define PRISM_COMMENTS_H #include "prism/compiler/exported.h" +#include "prism/compiler/nodiscard.h" +#include "prism/compiler/nonnull.h" #include "prism/ast.h" @@ -25,7 +27,7 @@ typedef struct pm_comment_t pm_comment_t; * @param comment the comment whose location we want to get * @return the location associated with the given comment */ -PRISM_EXPORTED_FUNCTION pm_location_t pm_comment_location(const pm_comment_t *comment); +PRISM_EXPORTED_FUNCTION pm_location_t pm_comment_location(const pm_comment_t *comment) PRISM_NONNULL(1); /** * Returns the type associated with the given comment. @@ -34,6 +36,6 @@ PRISM_EXPORTED_FUNCTION pm_location_t pm_comment_location(const pm_comment_t *co * @return the type associated with the given comment. This can either be * PM_COMMENT_INLINE or PM_COMMENT_EMBDOC. */ -PRISM_EXPORTED_FUNCTION pm_comment_type_t pm_comment_type(const pm_comment_t *comment); +PRISM_EXPORTED_FUNCTION pm_comment_type_t pm_comment_type(const pm_comment_t *comment) PRISM_NONNULL(1); #endif diff --git a/include/prism/compiler/nodiscard.h b/include/prism/compiler/nodiscard.h new file mode 100644 index 0000000000..ccd6c00719 --- /dev/null +++ b/include/prism/compiler/nodiscard.h @@ -0,0 +1,22 @@ +/** + * @file compiler/nodiscard.h + */ +#ifndef PRISM_COMPILER_NODISCARD_H +#define PRISM_COMPILER_NODISCARD_H + +/** + * Mark the return value of a function as important so that the compiler warns + * if a caller ignores it. This is useful for functions that return error codes + * or allocated resources that must be freed. + */ +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L +# define PRISM_NODISCARD [[nodiscard]] +#elif defined(__GNUC__) || defined(__clang__) +# define PRISM_NODISCARD __attribute__((__warn_unused_result__)) +#elif defined(_MSC_VER) +# define PRISM_NODISCARD _Check_return_ +#else +# define PRISM_NODISCARD +#endif + +#endif diff --git a/include/prism/compiler/nonnull.h b/include/prism/compiler/nonnull.h new file mode 100644 index 0000000000..9d19355665 --- /dev/null +++ b/include/prism/compiler/nonnull.h @@ -0,0 +1,18 @@ +/** + * @file compiler/nonnull.h + */ +#ifndef PRISM_COMPILER_NONNULL_H +#define PRISM_COMPILER_NONNULL_H + +/** + * Mark the parameters of a function as non-null. This allows the compiler to + * warn if a caller passes NULL for a parameter that should never be NULL. The + * arguments are the 1-based indices of the parameters. + */ +#if defined(__GNUC__) || defined(__clang__) +# define PRISM_NONNULL(...) __attribute__((__nonnull__(__VA_ARGS__))) +#else +# define PRISM_NONNULL(...) +#endif + +#endif diff --git a/include/prism/constant_pool.h b/include/prism/constant_pool.h index 08a06d1612..7fea4fad94 100644 --- a/include/prism/constant_pool.h +++ b/include/prism/constant_pool.h @@ -11,6 +11,8 @@ #define PRISM_CONSTANT_POOL_H #include "prism/compiler/exported.h" +#include "prism/compiler/nodiscard.h" +#include "prism/compiler/nonnull.h" #include #include @@ -46,7 +48,7 @@ typedef struct pm_constant_pool_t pm_constant_pool_t; * @param constant The constant to get the start of. * @return A raw pointer to the start of the constant. */ -PRISM_EXPORTED_FUNCTION const uint8_t * pm_constant_start(const pm_constant_t *constant); +PRISM_EXPORTED_FUNCTION const uint8_t * pm_constant_start(const pm_constant_t *constant) PRISM_NONNULL(1); /** * Return the length of a constant. @@ -54,6 +56,6 @@ PRISM_EXPORTED_FUNCTION const uint8_t * pm_constant_start(const pm_constant_t *c * @param constant The constant to get the length of. * @return The length of the constant. */ -PRISM_EXPORTED_FUNCTION size_t pm_constant_length(const pm_constant_t *constant); +PRISM_EXPORTED_FUNCTION size_t pm_constant_length(const pm_constant_t *constant) PRISM_NONNULL(1); #endif diff --git a/include/prism/diagnostic.h b/include/prism/diagnostic.h index 6c098c55ef..f2b541024f 100644 --- a/include/prism/diagnostic.h +++ b/include/prism/diagnostic.h @@ -15,6 +15,8 @@ #define PRISM_DIAGNOSTIC_H #include "prism/compiler/exported.h" +#include "prism/compiler/nodiscard.h" +#include "prism/compiler/nonnull.h" #include "prism/ast.h" @@ -58,7 +60,7 @@ typedef enum { * not change in the future. This is meant to be used for debugging and * error reporting purposes, and not for programmatic checks. */ -PRISM_EXPORTED_FUNCTION const char * pm_diagnostic_type(const pm_diagnostic_t *diagnostic); +PRISM_EXPORTED_FUNCTION const char * pm_diagnostic_type(const pm_diagnostic_t *diagnostic) PRISM_NONNULL(1); /** * Get the location of the given diagnostic. @@ -66,7 +68,7 @@ PRISM_EXPORTED_FUNCTION const char * pm_diagnostic_type(const pm_diagnostic_t *d * @param diagnostic The diagnostic to get the location of. * @returns The location of the given diagnostic. */ -PRISM_EXPORTED_FUNCTION pm_location_t pm_diagnostic_location(const pm_diagnostic_t *diagnostic); +PRISM_EXPORTED_FUNCTION pm_location_t pm_diagnostic_location(const pm_diagnostic_t *diagnostic) PRISM_NONNULL(1); /** * Get the message of the given diagnostic. @@ -74,7 +76,7 @@ PRISM_EXPORTED_FUNCTION pm_location_t pm_diagnostic_location(const pm_diagnostic * @param diagnostic The diagnostic to get the message of. * @returns The message of the given diagnostic. */ -PRISM_EXPORTED_FUNCTION const char * pm_diagnostic_message(const pm_diagnostic_t *diagnostic); +PRISM_EXPORTED_FUNCTION const char * pm_diagnostic_message(const pm_diagnostic_t *diagnostic) PRISM_NONNULL(1); /** * Get the error level associated with the given diagnostic. @@ -84,7 +86,7 @@ PRISM_EXPORTED_FUNCTION const char * pm_diagnostic_message(const pm_diagnostic_t * warning, or is in any way not an error, then the return value is * undefined and should not be relied upon. */ -PRISM_EXPORTED_FUNCTION pm_error_level_t pm_diagnostic_error_level(const pm_diagnostic_t *diagnostic); +PRISM_EXPORTED_FUNCTION pm_error_level_t pm_diagnostic_error_level(const pm_diagnostic_t *diagnostic) PRISM_NONNULL(1); /** * Get the warning level associated with the given diagnostic. @@ -94,6 +96,6 @@ PRISM_EXPORTED_FUNCTION pm_error_level_t pm_diagnostic_error_level(const pm_diag * error, or is in any way not a warning, then the return value is * undefined and should not be relied upon. */ -PRISM_EXPORTED_FUNCTION pm_warning_level_t pm_diagnostic_warning_level(const pm_diagnostic_t *diagnostic); +PRISM_EXPORTED_FUNCTION pm_warning_level_t pm_diagnostic_warning_level(const pm_diagnostic_t *diagnostic) PRISM_NONNULL(1); #endif diff --git a/include/prism/json.h b/include/prism/json.h index cc2f6f3bb1..11039e7796 100644 --- a/include/prism/json.h +++ b/include/prism/json.h @@ -12,6 +12,7 @@ #ifndef PRISM_EXCLUDE_JSON #include "prism/compiler/exported.h" +#include "prism/compiler/nonnull.h" #include "prism/ast.h" #include "prism/buffer.h" @@ -24,7 +25,7 @@ * @param parser The parser that parsed the node. * @param node The node to serialize. */ -PRISM_EXPORTED_FUNCTION void pm_dump_json(pm_buffer_t *buffer, const pm_parser_t *parser, const pm_node_t *node); +PRISM_EXPORTED_FUNCTION void pm_dump_json(pm_buffer_t *buffer, const pm_parser_t *parser, const pm_node_t *node) PRISM_NONNULL(1, 2, 3); #endif diff --git a/include/prism/line_offset_list.h b/include/prism/line_offset_list.h index 9d0ddf3889..e839862fea 100644 --- a/include/prism/line_offset_list.h +++ b/include/prism/line_offset_list.h @@ -15,6 +15,7 @@ #define PRISM_LINE_OFFSET_LIST_H #include "prism/compiler/exported.h" +#include "prism/compiler/nonnull.h" #include #include @@ -55,6 +56,6 @@ typedef struct { * @param start_line The line to start counting from. * @return The line and column of the given offset. */ -PRISM_EXPORTED_FUNCTION pm_line_column_t pm_line_offset_list_line_column(const pm_line_offset_list_t *list, uint32_t cursor, int32_t start_line); +PRISM_EXPORTED_FUNCTION pm_line_column_t pm_line_offset_list_line_column(const pm_line_offset_list_t *list, uint32_t cursor, int32_t start_line) PRISM_NONNULL(1); #endif diff --git a/include/prism/magic_comments.h b/include/prism/magic_comments.h index a19ecedb74..4941e94885 100644 --- a/include/prism/magic_comments.h +++ b/include/prism/magic_comments.h @@ -5,6 +5,7 @@ #define PRISM_MAGIC_COMMENTS_H #include "prism/compiler/exported.h" +#include "prism/compiler/nonnull.h" #include "prism/ast.h" @@ -19,7 +20,7 @@ typedef struct pm_magic_comment_t pm_magic_comment_t; * @param comment the magic comment whose key location we want to get * @return the location of the key associated with the given magic comment */ -PRISM_EXPORTED_FUNCTION pm_location_t pm_magic_comment_key(const pm_magic_comment_t *comment); +PRISM_EXPORTED_FUNCTION pm_location_t pm_magic_comment_key(const pm_magic_comment_t *comment) PRISM_NONNULL(1); /** * Returns the location of the value associated with the given magic comment. @@ -27,6 +28,6 @@ PRISM_EXPORTED_FUNCTION pm_location_t pm_magic_comment_key(const pm_magic_commen * @param comment the magic comment whose value location we want to get * @return the location of the value associated with the given magic comment */ -PRISM_EXPORTED_FUNCTION pm_location_t pm_magic_comment_value(const pm_magic_comment_t *comment); +PRISM_EXPORTED_FUNCTION pm_location_t pm_magic_comment_value(const pm_magic_comment_t *comment) PRISM_NONNULL(1); #endif diff --git a/include/prism/node.h b/include/prism/node.h index 5056bfbb26..7db4dcd891 100644 --- a/include/prism/node.h +++ b/include/prism/node.h @@ -7,6 +7,7 @@ #define PRISM_NODE_H #include "prism/compiler/exported.h" +#include "prism/compiler/nonnull.h" #include "prism/ast.h" @@ -23,7 +24,7 @@ * @param node_type The node type to convert to a string. * @return A string representation of the given node type. */ -PRISM_EXPORTED_FUNCTION const char * pm_node_type_to_str(pm_node_type_t node_type); +PRISM_EXPORTED_FUNCTION const char * pm_node_type(pm_node_type_t node_type); /** * Visit each of the nodes in this subtree using the given visitor callback. The @@ -41,7 +42,7 @@ PRISM_EXPORTED_FUNCTION const char * pm_node_type_to_str(pm_node_type_t node_typ * bool visit(const pm_node_t *node, void *data) { * size_t *indent = (size_t *) data; * for (size_t i = 0; i < *indent * 2; i++) putc(' ', stdout); - * printf("%s\n", pm_node_type_to_str(node->type)); + * printf("%s\n", pm_node_type(node->type)); * * size_t next_indent = *indent + 1; * size_t *next_data = &next_indent; @@ -77,7 +78,7 @@ PRISM_EXPORTED_FUNCTION const char * pm_node_type_to_str(pm_node_type_t node_typ * @param visitor The callback to call for each node in the subtree. * @param data An opaque pointer that is passed to the visitor callback. */ -PRISM_EXPORTED_FUNCTION void pm_visit_node(const pm_node_t *node, bool (*visitor)(const pm_node_t *node, void *data), void *data); +PRISM_EXPORTED_FUNCTION void pm_visit_node(const pm_node_t *node, bool (*visitor)(const pm_node_t *node, void *data), void *data) PRISM_NONNULL(1); /** * Visit the children of the given node with the given callback. This is the @@ -88,6 +89,6 @@ PRISM_EXPORTED_FUNCTION void pm_visit_node(const pm_node_t *node, bool (*visitor * @param visitor The callback to call for each child node. * @param data An opaque pointer that is passed to the visitor callback. */ -PRISM_EXPORTED_FUNCTION void pm_visit_child_nodes(const pm_node_t *node, bool (*visitor)(const pm_node_t *node, void *data), void *data); +PRISM_EXPORTED_FUNCTION void pm_visit_child_nodes(const pm_node_t *node, bool (*visitor)(const pm_node_t *node, void *data), void *data) PRISM_NONNULL(1); #endif diff --git a/include/prism/options.h b/include/prism/options.h index c1df928e06..10834f28e7 100644 --- a/include/prism/options.h +++ b/include/prism/options.h @@ -6,6 +6,9 @@ #ifndef PRISM_OPTIONS_H #define PRISM_OPTIONS_H +#include "prism/compiler/nodiscard.h" +#include "prism/compiler/nonnull.h" + #include "prism/strings.h" #include @@ -112,7 +115,7 @@ static const uint8_t PM_OPTIONS_COMMAND_LINE_X = 0x20; * * \public \memberof pm_options */ -PRISM_EXPORTED_FUNCTION pm_options_t * pm_options_new(void); +PRISM_EXPORTED_FUNCTION pm_options_t * pm_options_new(void) PRISM_NODISCARD; /** * Free both the held memory of the given options struct and the struct itself. @@ -121,7 +124,7 @@ PRISM_EXPORTED_FUNCTION pm_options_t * pm_options_new(void); * * \public \memberof pm_options */ -PRISM_EXPORTED_FUNCTION void pm_options_free(pm_options_t *options); +PRISM_EXPORTED_FUNCTION void pm_options_free(pm_options_t *options) PRISM_NONNULL(1); /** * Set the shebang callback option on the given options struct. @@ -133,7 +136,7 @@ PRISM_EXPORTED_FUNCTION void pm_options_free(pm_options_t *options); * * \public \memberof pm_options */ -PRISM_EXPORTED_FUNCTION void pm_options_shebang_callback_set(pm_options_t *options, pm_options_shebang_callback_t shebang_callback, void *shebang_callback_data); +PRISM_EXPORTED_FUNCTION void pm_options_shebang_callback_set(pm_options_t *options, pm_options_shebang_callback_t shebang_callback, void *shebang_callback_data) PRISM_NONNULL(1); /** * Get the filepath option on the given options struct. @@ -143,7 +146,7 @@ PRISM_EXPORTED_FUNCTION void pm_options_shebang_callback_set(pm_options_t *optio * * \public \memberof pm_options */ -PRISM_EXPORTED_FUNCTION const pm_string_t * pm_options_filepath(const pm_options_t *options); +PRISM_EXPORTED_FUNCTION const pm_string_t * pm_options_filepath(const pm_options_t *options) PRISM_NONNULL(1); /** * Set the filepath option on the given options struct. @@ -153,7 +156,7 @@ PRISM_EXPORTED_FUNCTION const pm_string_t * pm_options_filepath(const pm_options * * \public \memberof pm_options */ -PRISM_EXPORTED_FUNCTION void pm_options_filepath_set(pm_options_t *options, const char *filepath); +PRISM_EXPORTED_FUNCTION void pm_options_filepath_set(pm_options_t *options, const char *filepath) PRISM_NONNULL(1); /** * Set the line option on the given options struct. @@ -163,7 +166,7 @@ PRISM_EXPORTED_FUNCTION void pm_options_filepath_set(pm_options_t *options, cons * * \public \memberof pm_options */ -PRISM_EXPORTED_FUNCTION void pm_options_line_set(pm_options_t *options, int32_t line); +PRISM_EXPORTED_FUNCTION void pm_options_line_set(pm_options_t *options, int32_t line) PRISM_NONNULL(1); /** * Set the encoding option on the given options struct. @@ -173,7 +176,7 @@ PRISM_EXPORTED_FUNCTION void pm_options_line_set(pm_options_t *options, int32_t * * \public \memberof pm_options */ -PRISM_EXPORTED_FUNCTION void pm_options_encoding_set(pm_options_t *options, const char *encoding); +PRISM_EXPORTED_FUNCTION void pm_options_encoding_set(pm_options_t *options, const char *encoding) PRISM_NONNULL(1); /** * Set the encoding_locked option on the given options struct. @@ -183,7 +186,7 @@ PRISM_EXPORTED_FUNCTION void pm_options_encoding_set(pm_options_t *options, cons * * \public \memberof pm_options */ -PRISM_EXPORTED_FUNCTION void pm_options_encoding_locked_set(pm_options_t *options, bool encoding_locked); +PRISM_EXPORTED_FUNCTION void pm_options_encoding_locked_set(pm_options_t *options, bool encoding_locked) PRISM_NONNULL(1); /** * Set the frozen string literal option on the given options struct. @@ -193,7 +196,7 @@ PRISM_EXPORTED_FUNCTION void pm_options_encoding_locked_set(pm_options_t *option * * \public \memberof pm_options */ -PRISM_EXPORTED_FUNCTION void pm_options_frozen_string_literal_set(pm_options_t *options, bool frozen_string_literal); +PRISM_EXPORTED_FUNCTION void pm_options_frozen_string_literal_set(pm_options_t *options, bool frozen_string_literal) PRISM_NONNULL(1); /** * Sets the command line option on the given options struct. @@ -203,7 +206,7 @@ PRISM_EXPORTED_FUNCTION void pm_options_frozen_string_literal_set(pm_options_t * * * \public \memberof pm_options */ -PRISM_EXPORTED_FUNCTION void pm_options_command_line_set(pm_options_t *options, uint8_t command_line); +PRISM_EXPORTED_FUNCTION void pm_options_command_line_set(pm_options_t *options, uint8_t command_line) PRISM_NONNULL(1); /** * Set the version option on the given options struct by parsing the given @@ -217,7 +220,7 @@ PRISM_EXPORTED_FUNCTION void pm_options_command_line_set(pm_options_t *options, * * \public \memberof pm_options */ -PRISM_EXPORTED_FUNCTION bool pm_options_version_set(pm_options_t *options, const char *version, size_t length); +PRISM_EXPORTED_FUNCTION bool pm_options_version_set(pm_options_t *options, const char *version, size_t length) PRISM_NONNULL(1); /** * Set the version option on the given options struct to the lowest version of @@ -227,7 +230,7 @@ PRISM_EXPORTED_FUNCTION bool pm_options_version_set(pm_options_t *options, const * * \public \memberof pm_options */ -PRISM_EXPORTED_FUNCTION void pm_options_version_set_lowest(pm_options_t *options); +PRISM_EXPORTED_FUNCTION void pm_options_version_set_lowest(pm_options_t *options) PRISM_NONNULL(1); /** * Set the version option on the given options struct to the highest version of @@ -237,7 +240,7 @@ PRISM_EXPORTED_FUNCTION void pm_options_version_set_lowest(pm_options_t *options * * \public \memberof pm_options */ -PRISM_EXPORTED_FUNCTION void pm_options_version_set_highest(pm_options_t *options); +PRISM_EXPORTED_FUNCTION void pm_options_version_set_highest(pm_options_t *options) PRISM_NONNULL(1); /** * Set the main script option on the given options struct. @@ -247,7 +250,7 @@ PRISM_EXPORTED_FUNCTION void pm_options_version_set_highest(pm_options_t *option * * \public \memberof pm_options */ -PRISM_EXPORTED_FUNCTION void pm_options_main_script_set(pm_options_t *options, bool main_script); +PRISM_EXPORTED_FUNCTION void pm_options_main_script_set(pm_options_t *options, bool main_script) PRISM_NONNULL(1); /** * Set the partial script option on the given options struct. @@ -257,7 +260,7 @@ PRISM_EXPORTED_FUNCTION void pm_options_main_script_set(pm_options_t *options, b * * \public \memberof pm_options */ -PRISM_EXPORTED_FUNCTION void pm_options_partial_script_set(pm_options_t *options, bool partial_script); +PRISM_EXPORTED_FUNCTION void pm_options_partial_script_set(pm_options_t *options, bool partial_script) PRISM_NONNULL(1); /** * Get the freeze option on the given options struct. @@ -266,7 +269,7 @@ PRISM_EXPORTED_FUNCTION void pm_options_partial_script_set(pm_options_t *options * * \public \memberof pm_options */ -PRISM_EXPORTED_FUNCTION bool pm_options_freeze(const pm_options_t *options); +PRISM_EXPORTED_FUNCTION bool pm_options_freeze(const pm_options_t *options) PRISM_NONNULL(1); /** * Set the freeze option on the given options struct. @@ -276,7 +279,7 @@ PRISM_EXPORTED_FUNCTION bool pm_options_freeze(const pm_options_t *options); * * \public \memberof pm_options */ -PRISM_EXPORTED_FUNCTION void pm_options_freeze_set(pm_options_t *options, bool freeze); +PRISM_EXPORTED_FUNCTION void pm_options_freeze_set(pm_options_t *options, bool freeze) PRISM_NONNULL(1); /** * Allocate and zero out the scopes array on the given options struct. @@ -287,7 +290,7 @@ PRISM_EXPORTED_FUNCTION void pm_options_freeze_set(pm_options_t *options, bool f * * \public \memberof pm_options */ -PRISM_EXPORTED_FUNCTION bool pm_options_scopes_init(pm_options_t *options, size_t scopes_count); +PRISM_EXPORTED_FUNCTION bool pm_options_scopes_init(pm_options_t *options, size_t scopes_count) PRISM_NONNULL(1); /** * Return a constant pointer to the scope at the given index within the given @@ -299,7 +302,7 @@ PRISM_EXPORTED_FUNCTION bool pm_options_scopes_init(pm_options_t *options, size_ * * \public \memberof pm_options */ -PRISM_EXPORTED_FUNCTION const pm_options_scope_t * pm_options_scope(const pm_options_t *options, size_t index); +PRISM_EXPORTED_FUNCTION const pm_options_scope_t * pm_options_scope(const pm_options_t *options, size_t index) PRISM_NONNULL(1); /** * Return a mutable pointer to the scope at the given index within the given @@ -311,7 +314,7 @@ PRISM_EXPORTED_FUNCTION const pm_options_scope_t * pm_options_scope(const pm_opt * * \public \memberof pm_options */ -PRISM_EXPORTED_FUNCTION pm_options_scope_t * pm_options_scope_mut(pm_options_t *options, size_t index); +PRISM_EXPORTED_FUNCTION pm_options_scope_t * pm_options_scope_mut(pm_options_t *options, size_t index) PRISM_NONNULL(1); /** * Create a new options scope struct. This will hold a set of locals that are in @@ -323,7 +326,7 @@ PRISM_EXPORTED_FUNCTION pm_options_scope_t * pm_options_scope_mut(pm_options_t * * * \public \memberof pm_options */ -PRISM_EXPORTED_FUNCTION void pm_options_scope_init(pm_options_scope_t *scope, size_t locals_count); +PRISM_EXPORTED_FUNCTION void pm_options_scope_init(pm_options_scope_t *scope, size_t locals_count) PRISM_NONNULL(1); /** * Return a constant pointer to the local at the given index within the given @@ -335,7 +338,7 @@ PRISM_EXPORTED_FUNCTION void pm_options_scope_init(pm_options_scope_t *scope, si * * \public \memberof pm_options */ -PRISM_EXPORTED_FUNCTION const pm_string_t * pm_options_scope_local(const pm_options_scope_t *scope, size_t index); +PRISM_EXPORTED_FUNCTION const pm_string_t * pm_options_scope_local(const pm_options_scope_t *scope, size_t index) PRISM_NONNULL(1); /** * Return a mutable pointer to the local at the given index within the given @@ -347,7 +350,7 @@ PRISM_EXPORTED_FUNCTION const pm_string_t * pm_options_scope_local(const pm_opti * * \public \memberof pm_options */ -PRISM_EXPORTED_FUNCTION pm_string_t * pm_options_scope_local_mut(pm_options_scope_t *scope, size_t index); +PRISM_EXPORTED_FUNCTION pm_string_t * pm_options_scope_local_mut(pm_options_scope_t *scope, size_t index) PRISM_NONNULL(1); /** * Set the forwarding option on the given scope struct. @@ -357,6 +360,6 @@ PRISM_EXPORTED_FUNCTION pm_string_t * pm_options_scope_local_mut(pm_options_scop * * \public \memberof pm_options */ -PRISM_EXPORTED_FUNCTION void pm_options_scope_forwarding_set(pm_options_scope_t *scope, uint8_t forwarding); +PRISM_EXPORTED_FUNCTION void pm_options_scope_forwarding_set(pm_options_scope_t *scope, uint8_t forwarding) PRISM_NONNULL(1); #endif diff --git a/include/prism/parser.h b/include/prism/parser.h index f3927d663f..6d9efa6485 100644 --- a/include/prism/parser.h +++ b/include/prism/parser.h @@ -6,6 +6,9 @@ #ifndef PRISM_PARSER_H #define PRISM_PARSER_H +#include "prism/compiler/nodiscard.h" +#include "prism/compiler/nonnull.h" + #include "prism/ast.h" #include "prism/comments.h" #include "prism/diagnostic.h" @@ -32,7 +35,7 @@ typedef struct pm_parser_t pm_parser_t; * * \public \memberof pm_parser */ -PRISM_EXPORTED_FUNCTION pm_parser_t * pm_parser_new(pm_arena_t *arena, const uint8_t *source, size_t size, const pm_options_t *options); +PRISM_EXPORTED_FUNCTION pm_parser_t * pm_parser_new(pm_arena_t *arena, const uint8_t *source, size_t size, const pm_options_t *options) PRISM_NODISCARD PRISM_NONNULL(1); /** * Free both the memory held by the given parser and the parser itself. @@ -41,7 +44,7 @@ PRISM_EXPORTED_FUNCTION pm_parser_t * pm_parser_new(pm_arena_t *arena, const uin * * \public \memberof pm_parser */ -PRISM_EXPORTED_FUNCTION void pm_parser_free(pm_parser_t *parser); +PRISM_EXPORTED_FUNCTION void pm_parser_free(pm_parser_t *parser) PRISM_NONNULL(1); /** * When the encoding that is being used to parse the source is changed by prism, @@ -64,7 +67,7 @@ typedef void (*pm_lex_callback_t)(pm_parser_t *parser, pm_token_t *token, void * * * \public \memberof pm_parser */ -PRISM_EXPORTED_FUNCTION void pm_parser_encoding_changed_callback_set(pm_parser_t *parser, pm_encoding_changed_callback_t callback); +PRISM_EXPORTED_FUNCTION void pm_parser_encoding_changed_callback_set(pm_parser_t *parser, pm_encoding_changed_callback_t callback) PRISM_NONNULL(1); /** * Register a callback that will be called whenever a token is lexed. @@ -75,7 +78,7 @@ PRISM_EXPORTED_FUNCTION void pm_parser_encoding_changed_callback_set(pm_parser_t * * \public \memberof pm_parser */ -PRISM_EXPORTED_FUNCTION void pm_parser_lex_callback_set(pm_parser_t *parser, pm_lex_callback_t callback, void *data); +PRISM_EXPORTED_FUNCTION void pm_parser_lex_callback_set(pm_parser_t *parser, pm_lex_callback_t callback, void *data) PRISM_NONNULL(1); /** * Returns the opaque data that is passed to the lex callback when it is called. @@ -83,7 +86,7 @@ PRISM_EXPORTED_FUNCTION void pm_parser_lex_callback_set(pm_parser_t *parser, pm_ * @param parser The parser whose lex callback data we want to get. * @return The opaque data that is passed to the lex callback when it is called. */ -PRISM_EXPORTED_FUNCTION void * pm_parser_lex_callback_data(const pm_parser_t *parser); +PRISM_EXPORTED_FUNCTION void * pm_parser_lex_callback_data(const pm_parser_t *parser) PRISM_NONNULL(1); /** * Returns the raw pointer to the start of the source that is being parsed. @@ -91,7 +94,7 @@ PRISM_EXPORTED_FUNCTION void * pm_parser_lex_callback_data(const pm_parser_t *pa * @param parser the parser whose start pointer we want to get * @return the raw pointer to the start of the source that is being parsed */ -PRISM_EXPORTED_FUNCTION const uint8_t * pm_parser_start(const pm_parser_t *parser); +PRISM_EXPORTED_FUNCTION const uint8_t * pm_parser_start(const pm_parser_t *parser) PRISM_NONNULL(1); /** * Returns the raw pointer to the end of the source that is being parsed. @@ -99,7 +102,7 @@ PRISM_EXPORTED_FUNCTION const uint8_t * pm_parser_start(const pm_parser_t *parse * @param parser the parser whose end pointer we want to get * @return the raw pointer to the end of the source that is being parsed */ -PRISM_EXPORTED_FUNCTION const uint8_t * pm_parser_end(const pm_parser_t *parser); +PRISM_EXPORTED_FUNCTION const uint8_t * pm_parser_end(const pm_parser_t *parser) PRISM_NONNULL(1); /** * Returns the line that the parser was considered to have started on. @@ -107,7 +110,7 @@ PRISM_EXPORTED_FUNCTION const uint8_t * pm_parser_end(const pm_parser_t *parser) * @param parser the parser whose start line we want to get * @return the line that the parser was considered to have started on */ -PRISM_EXPORTED_FUNCTION int32_t pm_parser_start_line(const pm_parser_t *parser); +PRISM_EXPORTED_FUNCTION int32_t pm_parser_start_line(const pm_parser_t *parser) PRISM_NONNULL(1); /** * Returns the name of the encoding that is being used to parse the source. @@ -115,7 +118,7 @@ PRISM_EXPORTED_FUNCTION int32_t pm_parser_start_line(const pm_parser_t *parser); * @param parser the parser whose encoding name we want to get * @return the name of the encoding that is being used to parse the source */ -PRISM_EXPORTED_FUNCTION const char * pm_parser_encoding_name(const pm_parser_t *parser); +PRISM_EXPORTED_FUNCTION const char * pm_parser_encoding_name(const pm_parser_t *parser) PRISM_NONNULL(1); /** * Returns the line offsets that are associated with the given parser. @@ -123,7 +126,7 @@ PRISM_EXPORTED_FUNCTION const char * pm_parser_encoding_name(const pm_parser_t * * @param parser the parser whose line offsets we want to get * @return the line offsets that are associated with the given parser */ -PRISM_EXPORTED_FUNCTION const pm_line_offset_list_t * pm_parser_line_offsets(const pm_parser_t *parser); +PRISM_EXPORTED_FUNCTION const pm_line_offset_list_t * pm_parser_line_offsets(const pm_parser_t *parser) PRISM_NONNULL(1); /** * Returns the location of the __DATA__ section that is associated with the @@ -133,7 +136,7 @@ PRISM_EXPORTED_FUNCTION const pm_line_offset_list_t * pm_parser_line_offsets(con * @return the location of the __DATA__ section that is associated with the * given parser. If it is unset, then the length will be set to 0. */ -PRISM_EXPORTED_FUNCTION const pm_location_t * pm_parser_data_loc(const pm_parser_t *parser); +PRISM_EXPORTED_FUNCTION const pm_location_t * pm_parser_data_loc(const pm_parser_t *parser) PRISM_NONNULL(1); /** * Returns whether the given parser is continuable, meaning that it could become @@ -142,7 +145,7 @@ PRISM_EXPORTED_FUNCTION const pm_location_t * pm_parser_data_loc(const pm_parser * @param parser the parser whose continuable status we want to get * @return whether the given parser is continuable */ -PRISM_EXPORTED_FUNCTION bool pm_parser_continuable(const pm_parser_t *parser); +PRISM_EXPORTED_FUNCTION bool pm_parser_continuable(const pm_parser_t *parser) PRISM_NONNULL(1); /** * Returns the lex state of the parser. Note that this is an internal detail, @@ -153,7 +156,7 @@ PRISM_EXPORTED_FUNCTION bool pm_parser_continuable(const pm_parser_t *parser); * @param parser the parser whose lex state we want to get * @return the lex state of the parser */ -PRISM_EXPORTED_FUNCTION int pm_parser_lex_state(const pm_parser_t *parser); +PRISM_EXPORTED_FUNCTION int pm_parser_lex_state(const pm_parser_t *parser) PRISM_NONNULL(1); /** * Returns the number of comments associated with the given parser. @@ -161,7 +164,7 @@ PRISM_EXPORTED_FUNCTION int pm_parser_lex_state(const pm_parser_t *parser); * @param parser the parser whose comments we want to get the size of * @return the number of comments associated with the given parser */ -PRISM_EXPORTED_FUNCTION size_t pm_parser_comments_size(const pm_parser_t *parser); +PRISM_EXPORTED_FUNCTION size_t pm_parser_comments_size(const pm_parser_t *parser) PRISM_NONNULL(1); /** * A callback function that can be used to process comments found while parsing. @@ -179,7 +182,7 @@ typedef void (*pm_comment_callback_t)(const pm_comment_t *comment, void *data); * @param data the data to pass to the callback function for each comment. This * can be NULL if no data needs to be passed to the callback function. */ -PRISM_EXPORTED_FUNCTION void pm_parser_comments_each(const pm_parser_t *parser, pm_comment_callback_t callback, void *data); +PRISM_EXPORTED_FUNCTION void pm_parser_comments_each(const pm_parser_t *parser, pm_comment_callback_t callback, void *data) PRISM_NONNULL(1); /** * Returns the number of magic comments associated with the given parser. @@ -187,7 +190,7 @@ PRISM_EXPORTED_FUNCTION void pm_parser_comments_each(const pm_parser_t *parser, * @param parser the parser whose magic comments we want to get the size of * @return the number of magic comments associated with the given parser */ -PRISM_EXPORTED_FUNCTION size_t pm_parser_magic_comments_size(const pm_parser_t *parser); +PRISM_EXPORTED_FUNCTION size_t pm_parser_magic_comments_size(const pm_parser_t *parser) PRISM_NONNULL(1); /** * A callback function that can be used to process magic comments found while parsing. @@ -205,7 +208,7 @@ typedef void (*pm_magic_comment_callback_t)(const pm_magic_comment_t *magic_comm * @param data the data to pass to the callback function for each magic comment. * This can be NULL if no data needs to be passed to the callback function. */ -PRISM_EXPORTED_FUNCTION void pm_parser_magic_comments_each(const pm_parser_t *parser, pm_magic_comment_callback_t callback, void *data); +PRISM_EXPORTED_FUNCTION void pm_parser_magic_comments_each(const pm_parser_t *parser, pm_magic_comment_callback_t callback, void *data) PRISM_NONNULL(1); /** * Returns the number of errors associated with the given parser. @@ -213,7 +216,7 @@ PRISM_EXPORTED_FUNCTION void pm_parser_magic_comments_each(const pm_parser_t *pa * @param parser the parser whose errors we want to get the size of * @return the number of errors associated with the given parser */ -PRISM_EXPORTED_FUNCTION size_t pm_parser_errors_size(const pm_parser_t *parser); +PRISM_EXPORTED_FUNCTION size_t pm_parser_errors_size(const pm_parser_t *parser) PRISM_NONNULL(1); /** * Returns the number of warnings associated with the given parser. @@ -221,7 +224,7 @@ PRISM_EXPORTED_FUNCTION size_t pm_parser_errors_size(const pm_parser_t *parser); * @param parser the parser whose warnings we want to get the size of * @return the number of warnings associated with the given parser */ -PRISM_EXPORTED_FUNCTION size_t pm_parser_warnings_size(const pm_parser_t *parser); +PRISM_EXPORTED_FUNCTION size_t pm_parser_warnings_size(const pm_parser_t *parser) PRISM_NONNULL(1); /** * A callback function that can be used to process diagnostics found while @@ -240,7 +243,7 @@ typedef void (*pm_diagnostic_callback_t)(const pm_diagnostic_t *diagnostic, void * @param data the data to pass to the callback function for each error. This * can be NULL if no data needs to be passed to the callback function. */ -PRISM_EXPORTED_FUNCTION void pm_parser_errors_each(const pm_parser_t *parser, pm_diagnostic_callback_t callback, void *data); +PRISM_EXPORTED_FUNCTION void pm_parser_errors_each(const pm_parser_t *parser, pm_diagnostic_callback_t callback, void *data) PRISM_NONNULL(1); /** * Iterates over the warnings associated with the given parser and calls the @@ -253,7 +256,7 @@ PRISM_EXPORTED_FUNCTION void pm_parser_errors_each(const pm_parser_t *parser, pm * @param data the data to pass to the callback function for each warning. This * can be NULL if no data needs to be passed to the callback function. */ -PRISM_EXPORTED_FUNCTION void pm_parser_warnings_each(const pm_parser_t *parser, pm_diagnostic_callback_t callback, void *data); +PRISM_EXPORTED_FUNCTION void pm_parser_warnings_each(const pm_parser_t *parser, pm_diagnostic_callback_t callback, void *data) PRISM_NONNULL(1); /** * Returns the number of constants in the constant pool associated with the @@ -264,7 +267,7 @@ PRISM_EXPORTED_FUNCTION void pm_parser_warnings_each(const pm_parser_t *parser, * @return the number of constants in the constant pool associated with the * given parser */ -PRISM_EXPORTED_FUNCTION size_t pm_parser_constants_size(const pm_parser_t *parser); +PRISM_EXPORTED_FUNCTION size_t pm_parser_constants_size(const pm_parser_t *parser) PRISM_NONNULL(1); /** * A callback function that can be used to process constants found while @@ -283,7 +286,7 @@ typedef void (*pm_constant_callback_t)(const pm_constant_t *constant, void *data * @param data the data to pass to the callback function for each constant. This * can be NULL if no data needs to be passed to the callback function. */ -PRISM_EXPORTED_FUNCTION void pm_parser_constants_each(const pm_parser_t *parser, pm_constant_callback_t callback, void *data); +PRISM_EXPORTED_FUNCTION void pm_parser_constants_each(const pm_parser_t *parser, pm_constant_callback_t callback, void *data) PRISM_NONNULL(1); /** * Initiate the parser with the given parser. @@ -293,6 +296,6 @@ PRISM_EXPORTED_FUNCTION void pm_parser_constants_each(const pm_parser_t *parser, * * \public \memberof pm_parser */ -PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse(pm_parser_t *parser); +PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse(pm_parser_t *parser) PRISM_NONNULL(1); #endif diff --git a/include/prism/prettyprint.h b/include/prism/prettyprint.h index 0c81618e7f..0d8e416341 100644 --- a/include/prism/prettyprint.h +++ b/include/prism/prettyprint.h @@ -11,6 +11,7 @@ #ifndef PRISM_EXCLUDE_PRETTYPRINT #include "prism/compiler/exported.h" +#include "prism/compiler/nonnull.h" #include "prism/ast.h" #include "prism/buffer.h" @@ -23,7 +24,7 @@ * @param parser The parser that parsed the AST. * @param node The root node of the AST to pretty-print. */ -PRISM_EXPORTED_FUNCTION void pm_prettyprint(pm_buffer_t *output_buffer, const pm_parser_t *parser, const pm_node_t *node); +PRISM_EXPORTED_FUNCTION void pm_prettyprint(pm_buffer_t *output_buffer, const pm_parser_t *parser, const pm_node_t *node) PRISM_NONNULL(1, 2, 3); #endif diff --git a/include/prism/serialize.h b/include/prism/serialize.h index 775fa6fbfb..ee94801f6e 100644 --- a/include/prism/serialize.h +++ b/include/prism/serialize.h @@ -14,6 +14,7 @@ #ifndef PRISM_EXCLUDE_SERIALIZATION #include "prism/compiler/exported.h" +#include "prism/compiler/nonnull.h" #include "prism/buffer.h" #include "prism/parser.h" @@ -26,7 +27,7 @@ * @param node The node to serialize. * @param buffer The buffer to serialize to. */ -PRISM_EXPORTED_FUNCTION void pm_serialize(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer); +PRISM_EXPORTED_FUNCTION void pm_serialize(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) PRISM_NONNULL(1, 2, 3); /** * Parse the given source to the AST and dump the AST to the given buffer. @@ -36,7 +37,7 @@ PRISM_EXPORTED_FUNCTION void pm_serialize(pm_parser_t *parser, pm_node_t *node, * @param size The size of the source. * @param data The optional data to pass to the parser. */ -PRISM_EXPORTED_FUNCTION void pm_serialize_parse(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data); +PRISM_EXPORTED_FUNCTION void pm_serialize_parse(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) PRISM_NONNULL(1, 2); /** * Parse and serialize the AST represented by the source that is read out of the @@ -48,7 +49,7 @@ PRISM_EXPORTED_FUNCTION void pm_serialize_parse(pm_buffer_t *buffer, const uint8 * @param stream_feof The function to use to tell if the stream has hit eof. * @param data The optional data to pass to the parser. */ -PRISM_EXPORTED_FUNCTION void pm_serialize_parse_stream(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const char *data); +PRISM_EXPORTED_FUNCTION void pm_serialize_parse_stream(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const char *data) PRISM_NONNULL(1, 2); /** * Parse and serialize the comments in the given source to the given buffer. @@ -58,7 +59,7 @@ PRISM_EXPORTED_FUNCTION void pm_serialize_parse_stream(pm_buffer_t *buffer, void * @param size The size of the source. * @param data The optional data to pass to the parser. */ -PRISM_EXPORTED_FUNCTION void pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data); +PRISM_EXPORTED_FUNCTION void pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) PRISM_NONNULL(1, 2); /** * Lex the given source and serialize to the given buffer. @@ -68,7 +69,7 @@ PRISM_EXPORTED_FUNCTION void pm_serialize_parse_comments(pm_buffer_t *buffer, co * @param buffer The buffer to serialize to. * @param data The optional data to pass to the lexer. */ -PRISM_EXPORTED_FUNCTION void pm_serialize_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data); +PRISM_EXPORTED_FUNCTION void pm_serialize_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) PRISM_NONNULL(1, 2); /** * Parse and serialize both the AST and the tokens represented by the given @@ -79,7 +80,7 @@ PRISM_EXPORTED_FUNCTION void pm_serialize_lex(pm_buffer_t *buffer, const uint8_t * @param size The size of the source. * @param data The optional data to pass to the parser. */ -PRISM_EXPORTED_FUNCTION void pm_serialize_parse_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data); +PRISM_EXPORTED_FUNCTION void pm_serialize_parse_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) PRISM_NONNULL(1, 2); /** * Parse the source and return true if it parses without errors or warnings. @@ -89,7 +90,7 @@ PRISM_EXPORTED_FUNCTION void pm_serialize_parse_lex(pm_buffer_t *buffer, const u * @param data The optional data to pass to the parser. * @return True if the source parses without errors or warnings. */ -PRISM_EXPORTED_FUNCTION bool pm_serialize_parse_success_p(const uint8_t *source, size_t size, const char *data); +PRISM_EXPORTED_FUNCTION bool pm_serialize_parse_success_p(const uint8_t *source, size_t size, const char *data) PRISM_NONNULL(1); #endif diff --git a/include/prism/stream.h b/include/prism/stream.h index 9ed94f58e9..47325d667d 100644 --- a/include/prism/stream.h +++ b/include/prism/stream.h @@ -7,6 +7,7 @@ #define PRISM_STREAM_H #include "prism/compiler/exported.h" +#include "prism/compiler/nonnull.h" #include "prism/arena.h" #include "prism/buffer.h" @@ -39,6 +40,6 @@ typedef int (pm_parse_stream_feof_t)(void *stream); * @param options The optional options to use when parsing. * @return The AST representing the source. */ -PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse_stream(pm_parser_t **parser, pm_arena_t *arena, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const pm_options_t *options); +PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse_stream(pm_parser_t **parser, pm_arena_t *arena, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const pm_options_t *options) PRISM_NONNULL(1, 2, 3); #endif diff --git a/include/prism/string_query.h b/include/prism/string_query.h index 55c6c82697..406aa952a9 100644 --- a/include/prism/string_query.h +++ b/include/prism/string_query.h @@ -8,6 +8,7 @@ #define PRISM_STRING_QUERY_H #include "prism/compiler/exported.h" +#include "prism/compiler/nonnull.h" #include #include @@ -35,7 +36,7 @@ typedef enum { * @return PM_STRING_QUERY_TRUE if the query is true, PM_STRING_QUERY_FALSE if * the query is false, and PM_STRING_QUERY_ERROR if the encoding was invalid. */ -PRISM_EXPORTED_FUNCTION pm_string_query_t pm_string_query_local(const uint8_t *source, size_t length, const char *encoding_name); +PRISM_EXPORTED_FUNCTION pm_string_query_t pm_string_query_local(const uint8_t *source, size_t length, const char *encoding_name) PRISM_NONNULL(1, 3); /** * Check that the slice is a valid constant name. @@ -46,7 +47,7 @@ PRISM_EXPORTED_FUNCTION pm_string_query_t pm_string_query_local(const uint8_t *s * @return PM_STRING_QUERY_TRUE if the query is true, PM_STRING_QUERY_FALSE if * the query is false, and PM_STRING_QUERY_ERROR if the encoding was invalid. */ -PRISM_EXPORTED_FUNCTION pm_string_query_t pm_string_query_constant(const uint8_t *source, size_t length, const char *encoding_name); +PRISM_EXPORTED_FUNCTION pm_string_query_t pm_string_query_constant(const uint8_t *source, size_t length, const char *encoding_name) PRISM_NONNULL(1, 3); /** * Check that the slice is a valid method name. @@ -57,6 +58,6 @@ PRISM_EXPORTED_FUNCTION pm_string_query_t pm_string_query_constant(const uint8_t * @return PM_STRING_QUERY_TRUE if the query is true, PM_STRING_QUERY_FALSE if * the query is false, and PM_STRING_QUERY_ERROR if the encoding was invalid. */ -PRISM_EXPORTED_FUNCTION pm_string_query_t pm_string_query_method_name(const uint8_t *source, size_t length, const char *encoding_name); +PRISM_EXPORTED_FUNCTION pm_string_query_t pm_string_query_method_name(const uint8_t *source, size_t length, const char *encoding_name) PRISM_NONNULL(1, 3); #endif diff --git a/include/prism/strings.h b/include/prism/strings.h index e4de068abc..48d3f9b0d9 100644 --- a/include/prism/strings.h +++ b/include/prism/strings.h @@ -8,6 +8,7 @@ #include "prism/compiler/exported.h" #include "prism/compiler/filesystem.h" +#include "prism/compiler/nonnull.h" #include #include @@ -55,7 +56,7 @@ PRISM_EXPORTED_FUNCTION size_t pm_string_sizeof(void); * @param source The source of the string. * @param length The length of the string. */ -PRISM_EXPORTED_FUNCTION void pm_string_constant_init(pm_string_t *string, const char *source, size_t length); +PRISM_EXPORTED_FUNCTION void pm_string_constant_init(pm_string_t *string, const char *source, size_t length) PRISM_NONNULL(1); /** * Represents the result of calling pm_string_mapped_init or @@ -96,7 +97,7 @@ typedef enum { * * \public \memberof pm_string_t */ -PRISM_EXPORTED_FUNCTION pm_string_init_result_t pm_string_mapped_init(pm_string_t *string, const char *filepath); +PRISM_EXPORTED_FUNCTION pm_string_init_result_t pm_string_mapped_init(pm_string_t *string, const char *filepath) PRISM_NONNULL(1, 2); /** * Read the file indicated by the filepath parameter into source and load its @@ -109,7 +110,7 @@ PRISM_EXPORTED_FUNCTION pm_string_init_result_t pm_string_mapped_init(pm_string_ * * \public \memberof pm_string_t */ -PRISM_EXPORTED_FUNCTION pm_string_init_result_t pm_string_file_init(pm_string_t *string, const char *filepath); +PRISM_EXPORTED_FUNCTION pm_string_init_result_t pm_string_file_init(pm_string_t *string, const char *filepath) PRISM_NONNULL(1, 2); /** * Returns the length associated with the string. @@ -119,7 +120,7 @@ PRISM_EXPORTED_FUNCTION pm_string_init_result_t pm_string_file_init(pm_string_t * * \public \memberof pm_string_t */ -PRISM_EXPORTED_FUNCTION size_t pm_string_length(const pm_string_t *string); +PRISM_EXPORTED_FUNCTION size_t pm_string_length(const pm_string_t *string) PRISM_NONNULL(1); /** * Returns the start pointer associated with the string. @@ -129,7 +130,7 @@ PRISM_EXPORTED_FUNCTION size_t pm_string_length(const pm_string_t *string); * * \public \memberof pm_string_t */ -PRISM_EXPORTED_FUNCTION const uint8_t * pm_string_source(const pm_string_t *string); +PRISM_EXPORTED_FUNCTION const uint8_t * pm_string_source(const pm_string_t *string) PRISM_NONNULL(1); /** * Free the associated memory of the given string. @@ -138,6 +139,6 @@ PRISM_EXPORTED_FUNCTION const uint8_t * pm_string_source(const pm_string_t *stri * * \public \memberof pm_string_t */ -PRISM_EXPORTED_FUNCTION void pm_string_cleanup(pm_string_t *string); +PRISM_EXPORTED_FUNCTION void pm_string_cleanup(pm_string_t *string) PRISM_NONNULL(1); #endif diff --git a/templates/src/node.c.erb b/templates/src/node.c.erb index 7b95200632..f51aff6e53 100644 --- a/templates/src/node.c.erb +++ b/templates/src/node.c.erb @@ -79,7 +79,7 @@ pm_node_list_concat(pm_arena_t *arena, pm_node_list_t *list, pm_node_list_t *oth * Returns a string representation of the given node type. */ const char * -pm_node_type_to_str(pm_node_type_t node_type) +pm_node_type(pm_node_type_t node_type) { switch (node_type) { <%- nodes.each do |node| -%> From 94d16c61cf60535737edd35fecc1f63d20326ff6 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 18 Mar 2026 14:53:09 -0400 Subject: [PATCH 081/100] Clean up documentation --- Doxyfile | 4 +- include/prism/arena.h | 2 +- include/prism/buffer.h | 8 - include/prism/comments.h | 4 +- include/prism/constant_pool.h | 8 +- include/prism/diagnostic.h | 8 - include/prism/internal/allocator.h | 41 +- include/prism/internal/allocator_debug.h | 5 - include/prism/internal/arena.h | 58 +-- include/prism/internal/bit.h | 7 +- include/prism/internal/buffer.h | 148 ++----- include/prism/internal/char.h | 135 +----- include/prism/internal/comments.h | 13 +- include/prism/internal/constant_pool.h | 132 ++---- include/prism/internal/encoding.h | 83 ++-- include/prism/internal/integer.h | 43 +- include/prism/internal/isinf.h | 9 +- include/prism/internal/line_offset_list.h | 50 +-- include/prism/internal/list.h | 31 +- include/prism/internal/magic_comments.h | 13 +- include/prism/internal/memchr.h | 15 +- include/prism/internal/node.h | 33 +- include/prism/internal/options.h | 76 ++-- include/prism/internal/parser.h | 389 +++++++++--------- include/prism/internal/regexp.h | 38 +- include/prism/internal/serialize.h | 19 +- include/prism/internal/static_literals.h | 58 +-- include/prism/internal/strings.h | 29 +- include/prism/internal/strncasecmp.h | 13 +- include/prism/internal/strpbrk.h | 16 +- include/prism/internal/tokens.h | 8 +- include/prism/line_offset_list.h | 2 +- include/prism/magic_comments.h | 12 +- include/prism/node.h | 2 +- include/prism/options.h | 67 +-- include/prism/parser.h | 42 +- include/prism/serialize.h | 2 +- include/prism/stream.h | 2 +- include/prism/string_query.h | 6 +- include/prism/strings.h | 20 +- templates/include/prism/ast.h.erb | 4 +- .../include/prism/internal/diagnostic.h.erb | 36 +- templates/template.rb | 2 +- 43 files changed, 481 insertions(+), 1212 deletions(-) diff --git a/Doxyfile b/Doxyfile index 00bb3537ab..adfd78c431 100644 --- a/Doxyfile +++ b/Doxyfile @@ -23,8 +23,8 @@ PROJECT_NAME = "Prism Ruby parser" OUTPUT_DIRECTORY = doc JAVADOC_AUTOBRIEF = YES OPTIMIZE_OUTPUT_FOR_C = YES -INPUT = src include include/prism -EXCLUDE = include/prism/internal/allocator_debug.h +INPUT = include/prism.h include/prism +EXCLUDE = include/prism/internal HTML_OUTPUT = c SORT_MEMBER_DOCS = NO GENERATE_LATEX = NO diff --git a/include/prism/arena.h b/include/prism/arena.h index 890c98a7b4..a637e9cef3 100644 --- a/include/prism/arena.h +++ b/include/prism/arena.h @@ -21,7 +21,7 @@ typedef struct pm_arena_t pm_arena_t; * Returns a newly allocated and initialized arena. If the arena cannot be * allocated, this function aborts the process. * - * @return A pointer to the newly allocated arena. It is the responsibility of + * @returns A pointer to the newly allocated arena. It is the responsibility of * the caller to free the arena using pm_arena_free when it is no longer * needed. */ diff --git a/include/prism/buffer.h b/include/prism/buffer.h index 0d67633ede..12844d60ff 100644 --- a/include/prism/buffer.h +++ b/include/prism/buffer.h @@ -23,8 +23,6 @@ typedef struct pm_buffer_t pm_buffer_t; * * @returns A pointer to the initialized buffer. The caller is responsible for * freeing the buffer with pm_buffer_free. - * - * \public \memberof pm_buffer_t */ PRISM_EXPORTED_FUNCTION pm_buffer_t * pm_buffer_new(void) PRISM_NODISCARD; @@ -32,8 +30,6 @@ PRISM_EXPORTED_FUNCTION pm_buffer_t * pm_buffer_new(void) PRISM_NODISCARD; * Free both the memory held by the buffer and the buffer itself. * * @param buffer The buffer to free. - * - * \public \memberof pm_buffer_t */ PRISM_EXPORTED_FUNCTION void pm_buffer_free(pm_buffer_t *buffer) PRISM_NONNULL(1); @@ -42,8 +38,6 @@ PRISM_EXPORTED_FUNCTION void pm_buffer_free(pm_buffer_t *buffer) PRISM_NONNULL(1 * * @param buffer The buffer to get the value of. * @returns The value of the buffer. - * - * \public \memberof pm_buffer_t */ PRISM_EXPORTED_FUNCTION char * pm_buffer_value(const pm_buffer_t *buffer) PRISM_NONNULL(1); @@ -52,8 +46,6 @@ PRISM_EXPORTED_FUNCTION char * pm_buffer_value(const pm_buffer_t *buffer) PRISM_ * * @param buffer The buffer to get the length of. * @returns The length of the buffer. - * - * \public \memberof pm_buffer_t */ PRISM_EXPORTED_FUNCTION size_t pm_buffer_length(const pm_buffer_t *buffer) PRISM_NONNULL(1); diff --git a/include/prism/comments.h b/include/prism/comments.h index 3e2dfcddfd..5938f388cd 100644 --- a/include/prism/comments.h +++ b/include/prism/comments.h @@ -25,7 +25,7 @@ typedef struct pm_comment_t pm_comment_t; * Returns the location associated with the given comment. * * @param comment the comment whose location we want to get - * @return the location associated with the given comment + * @returns the location associated with the given comment */ PRISM_EXPORTED_FUNCTION pm_location_t pm_comment_location(const pm_comment_t *comment) PRISM_NONNULL(1); @@ -33,7 +33,7 @@ PRISM_EXPORTED_FUNCTION pm_location_t pm_comment_location(const pm_comment_t *co * Returns the type associated with the given comment. * * @param comment the comment whose type we want to get - * @return the type associated with the given comment. This can either be + * @returns the type associated with the given comment. This can either be * PM_COMMENT_INLINE or PM_COMMENT_EMBDOC. */ PRISM_EXPORTED_FUNCTION pm_comment_type_t pm_comment_type(const pm_comment_t *comment) PRISM_NONNULL(1); diff --git a/include/prism/constant_pool.h b/include/prism/constant_pool.h index 7fea4fad94..7868c584a7 100644 --- a/include/prism/constant_pool.h +++ b/include/prism/constant_pool.h @@ -39,14 +39,16 @@ typedef struct { /** A constant in the pool which effectively stores a string. */ typedef struct pm_constant_t pm_constant_t; -/** The overall constant pool, which stores constants found while parsing. */ +/** + * The overall constant pool, which stores constants found while parsing. + */ typedef struct pm_constant_pool_t pm_constant_pool_t; /** * Return a raw pointer to the start of a constant. * * @param constant The constant to get the start of. - * @return A raw pointer to the start of the constant. + * @returns A raw pointer to the start of the constant. */ PRISM_EXPORTED_FUNCTION const uint8_t * pm_constant_start(const pm_constant_t *constant) PRISM_NONNULL(1); @@ -54,7 +56,7 @@ PRISM_EXPORTED_FUNCTION const uint8_t * pm_constant_start(const pm_constant_t *c * Return the length of a constant. * * @param constant The constant to get the length of. - * @return The length of the constant. + * @returns The length of the constant. */ PRISM_EXPORTED_FUNCTION size_t pm_constant_length(const pm_constant_t *constant) PRISM_NONNULL(1); diff --git a/include/prism/diagnostic.h b/include/prism/diagnostic.h index f2b541024f..370061ec56 100644 --- a/include/prism/diagnostic.h +++ b/include/prism/diagnostic.h @@ -1,11 +1,3 @@ -/*----------------------------------------------------------------------------*/ -/* This file is generated by the templates/template.rb script and should not */ -/* be modified manually. See */ -/* templates/include/prism/diagnostic.h.erb */ -/* if you are looking to modify the */ -/* template */ -/*----------------------------------------------------------------------------*/ - /** * @file diagnostic.h * diff --git a/include/prism/internal/allocator.h b/include/prism/internal/allocator.h index bd46257e44..6c54010dbf 100644 --- a/include/prism/internal/allocator.h +++ b/include/prism/internal/allocator.h @@ -1,13 +1,7 @@ -/** - * @file internal/allocator.h - * - * Macro definitions for defining the main and a custom allocator for Prism. - */ #ifndef PRISM_INTERNAL_ALLOCATOR_H #define PRISM_INTERNAL_ALLOCATOR_H -/** - * If you build Prism with a custom allocator, configure it with +/* If you build Prism with a custom allocator, configure it with * "-D PRISM_XALLOCATOR" to use your own allocator that defines xmalloc, * xrealloc, xcalloc, and xfree. * @@ -29,52 +23,41 @@ #include "prism_xallocator.h" #else #ifndef xmalloc - /** - * The malloc function that should be used. This can be overridden with - * the PRISM_XALLOCATOR define. - */ + /* The malloc function that should be used. This can be overridden with + * the PRISM_XALLOCATOR define. */ #define xmalloc malloc #endif #ifndef xrealloc - /** - * The realloc function that should be used. This can be overridden with - * the PRISM_XALLOCATOR define. - */ + /* The realloc function that should be used. This can be overridden with + * the PRISM_XALLOCATOR define. */ #define xrealloc realloc #endif #ifndef xcalloc - /** - * The calloc function that should be used. This can be overridden with - * the PRISM_XALLOCATOR define. - */ + /* The calloc function that should be used. This can be overridden with + * the PRISM_XALLOCATOR define. */ #define xcalloc calloc #endif #ifndef xfree - /** - * The free function that should be used. This can be overridden with - * the PRISM_XALLOCATOR define. - */ + /* The free function that should be used. This can be overridden with + * the PRISM_XALLOCATOR define. */ #define xfree free #endif #endif #ifndef xfree_sized - /** - * The free_sized function that should be used. This can be overridden with + /* The free_sized function that should be used. This can be overridden with * the PRISM_XALLOCATOR define. If not defined, defaults to calling xfree. */ #define xfree_sized(p, s) xfree(((void)(s), (p))) #endif #ifndef xrealloc_sized - /** - * The xrealloc_sized function that should be used. This can be overridden + /* The xrealloc_sized function that should be used. This can be overridden * with the PRISM_XALLOCATOR define. If not defined, defaults to calling - * xrealloc. - */ + * xrealloc. */ #define xrealloc_sized(p, ns, os) xrealloc((p), ((void)(os), (ns))) #endif diff --git a/include/prism/internal/allocator_debug.h b/include/prism/internal/allocator_debug.h index 40f2a7b4cf..846e96ba2d 100644 --- a/include/prism/internal/allocator_debug.h +++ b/include/prism/internal/allocator_debug.h @@ -1,8 +1,3 @@ -/** - * @file internal/allocator_debug.h - * - * Decorate allocation function to ensure sizes are correct. - */ #ifndef PRISM_INTERNAL_ALLOCATOR_DEBUG_H #define PRISM_INTERNAL_ALLOCATOR_DEBUG_H diff --git a/include/prism/internal/arena.h b/include/prism/internal/arena.h index 54bbead6bd..2e413b42bf 100644 --- a/include/prism/internal/arena.h +++ b/include/prism/internal/arena.h @@ -1,8 +1,3 @@ -/** - * @file internal/arena.h - * - * A bump allocator for the prism parser. - */ #ifndef PRISM_INTERNAL_ARENA_H #define PRISM_INTERNAL_ARENA_H @@ -16,76 +11,62 @@ #include #include -/** +/* * A single block of memory in the arena. Blocks are linked via prev pointers so * they can be freed by walking the chain. */ typedef struct pm_arena_block { - /** The previous block in the chain (for freeing). */ + /* The previous block in the chain (for freeing). */ struct pm_arena_block *prev; - /** The total usable bytes in data[]. */ + /* The total usable bytes in data[]. */ size_t capacity; - /** The number of bytes consumed so far. */ + /* The number of bytes consumed so far. */ size_t used; - /** The block's data. */ + /* The block's data. */ char data[PM_FLEX_ARRAY_LENGTH]; } pm_arena_block_t; -/** +/* * A bump allocator. Allocations are made by bumping a pointer within the * current block. When a block is full, a new block is allocated and linked to * the previous one. All blocks are freed at once by walking the chain. */ struct pm_arena_t { - /** The active block (allocate from here). */ + /* The active block (allocate from here). */ pm_arena_block_t *current; - /** The number of blocks allocated. */ + /* The number of blocks allocated. */ size_t block_count; }; -/** +/* * Free all blocks in the arena. After this call, all pointers returned by * pm_arena_alloc and pm_arena_zalloc are invalid. - * - * @param arena The arena whose held memory should be freed. */ void pm_arena_cleanup(pm_arena_t *arena); -/** +/* * Ensure the arena has at least `capacity` bytes available in its current * block, allocating a new block if necessary. This allows callers to * pre-size the arena to avoid repeated small block allocations. - * - * @param arena The arena to pre-size. - * @param capacity The minimum number of bytes to ensure are available. */ void pm_arena_reserve(pm_arena_t *arena, size_t capacity); -/** +/* * Slow path for pm_arena_alloc: allocate a new block and return a pointer to * the first `size` bytes. Do not call directly — use pm_arena_alloc instead. - * - * @param arena The arena to allocate from. - * @param size The number of bytes to allocate. - * @returns A pointer to the allocated memory. */ void * pm_arena_alloc_slow(pm_arena_t *arena, size_t size); -/** +/* * Allocate memory from the arena. The returned memory is NOT zeroed. This * function is infallible — it aborts on allocation failure. * * The fast path (bump pointer within the current block) is inlined at each * call site. The slow path (new block allocation) is out-of-line. - * - * @param arena The arena to allocate from. - * @param size The number of bytes to allocate. - * @param alignment The required alignment (must be a power of 2). - * @returns A pointer to the allocated memory. */ static PRISM_FORCE_INLINE void * pm_arena_alloc(pm_arena_t *arena, size_t size, size_t alignment) { @@ -102,14 +83,9 @@ pm_arena_alloc(pm_arena_t *arena, size_t size, size_t alignment) { return pm_arena_alloc_slow(arena, size); } -/** +/* * Allocate zero-initialized memory from the arena. This function is infallible * — it aborts on allocation failure. - * - * @param arena The arena to allocate from. - * @param size The number of bytes to allocate. - * @param alignment The required alignment (must be a power of 2). - * @returns A pointer to the allocated, zero-initialized memory. */ static PRISM_INLINE void * pm_arena_zalloc(pm_arena_t *arena, size_t size, size_t alignment) { @@ -118,15 +94,9 @@ pm_arena_zalloc(pm_arena_t *arena, size_t size, size_t alignment) { return ptr; } -/** +/* * Allocate memory from the arena and copy the given data into it. This is a * convenience wrapper around pm_arena_alloc + memcpy. - * - * @param arena The arena to allocate from. - * @param src The source data to copy. - * @param size The number of bytes to allocate and copy. - * @param alignment The required alignment (must be a power of 2). - * @returns A pointer to the allocated copy. */ static PRISM_INLINE void * pm_arena_memdup(pm_arena_t *arena, const void *src, size_t size, size_t alignment) { diff --git a/include/prism/internal/bit.h b/include/prism/internal/bit.h index b4249825a3..4eec494887 100644 --- a/include/prism/internal/bit.h +++ b/include/prism/internal/bit.h @@ -1,14 +1,9 @@ -/** - * @file internal/bit.h - * - * Bit manipulation utilities used throughout the prism library. - */ #ifndef PRISM_INTERNAL_BIT_H #define PRISM_INTERNAL_BIT_H #include "prism/compiler/inline.h" -/** +/* * Count trailing zero bits in a 64-bit value. Used by SWAR identifier scanning * to find the first non-matching byte in a word. * diff --git a/include/prism/internal/buffer.h b/include/prism/internal/buffer.h index 8eb0c7e243..a849bbf8e6 100644 --- a/include/prism/internal/buffer.h +++ b/include/prism/internal/buffer.h @@ -1,8 +1,3 @@ -/** - * @file internal/buffer.h - * - * A wrapper around a contiguous block of allocated memory. - */ #ifndef PRISM_INTERNAL_BUFFER_H #define PRISM_INTERNAL_BUFFER_H @@ -13,113 +8,54 @@ #include #include -/** +/* * A simple memory buffer that stores data in a contiguous block of memory. */ struct pm_buffer_t { - /** The length of the buffer in bytes. */ + /* The length of the buffer in bytes. */ size_t length; - /** The capacity of the buffer in bytes that has been allocated. */ + /* The capacity of the buffer in bytes that has been allocated. */ size_t capacity; - /** A pointer to the start of the buffer. */ + /* A pointer to the start of the buffer. */ char *value; }; -/** - * Initialize a pm_buffer_t with the given capacity. - * - * @param buffer The buffer to initialize. - * @param capacity The capacity of the buffer. - */ +/* Initialize a pm_buffer_t with the given capacity. */ void pm_buffer_init(pm_buffer_t *buffer, size_t capacity); -/** - * Free the memory held by the buffer. - * - * @param buffer The buffer whose held memory should be freed. - */ +/* Free the memory held by the buffer. */ void pm_buffer_cleanup(pm_buffer_t *buffer); -/** - * Append the given amount of space as zeroes to the buffer. - * - * @param buffer The buffer to append to. - * @param length The amount of space to append and zero. - */ +/* Append the given amount of space as zeroes to the buffer. */ void pm_buffer_append_zeroes(pm_buffer_t *buffer, size_t length); -/** - * Append a formatted string to the buffer. - * - * @param buffer The buffer to append to. - * @param format The format string to append. - * @param ... The arguments to the format string. - */ +/* Append a formatted string to the buffer. */ void pm_buffer_append_format(pm_buffer_t *buffer, const char *format, ...) PRISM_ATTRIBUTE_FORMAT(2, 3); -/** - * Append a string to the buffer. - * - * @param buffer The buffer to append to. - * @param value The string to append. - * @param length The length of the string to append. - */ +/* Append a string to the buffer. */ void pm_buffer_append_string(pm_buffer_t *buffer, const char *value, size_t length); -/** - * Append a list of bytes to the buffer. - * - * @param buffer The buffer to append to. - * @param value The bytes to append. - * @param length The length of the bytes to append. - */ +/* Append a list of bytes to the buffer. */ void pm_buffer_append_bytes(pm_buffer_t *buffer, const uint8_t *value, size_t length); -/** - * Append a single byte to the buffer. - * - * @param buffer The buffer to append to. - * @param value The byte to append. - */ +/* Append a single byte to the buffer. */ void pm_buffer_append_byte(pm_buffer_t *buffer, uint8_t value); -/** - * Append a 32-bit unsigned integer to the buffer as a variable-length integer. - * - * @param buffer The buffer to append to. - * @param value The integer to append. - */ +/* Append a 32-bit unsigned integer to the buffer as a variable-length integer. */ void pm_buffer_append_varuint(pm_buffer_t *buffer, uint32_t value); -/** - * Append a 32-bit signed integer to the buffer as a variable-length integer. - * - * @param buffer The buffer to append to. - * @param value The integer to append. - */ +/* Append a 32-bit signed integer to the buffer as a variable-length integer. */ void pm_buffer_append_varsint(pm_buffer_t *buffer, int32_t value); -/** - * Append a double to the buffer. - * - * @param buffer The buffer to append to. - * @param value The double to append. - */ +/* Append a double to the buffer. */ void pm_buffer_append_double(pm_buffer_t *buffer, double value); -/** - * Append a unicode codepoint to the buffer. - * - * @param buffer The buffer to append to. - * @param value The character to append. - * @returns True if the codepoint was valid and appended successfully, false - * otherwise. - */ +/* Append a unicode codepoint to the buffer. */ bool pm_buffer_append_unicode_codepoint(pm_buffer_t *buffer, uint32_t value); -/** +/* * The different types of escaping that can be performed by the buffer when * appending a slice of Ruby source code. */ @@ -128,66 +64,28 @@ typedef enum { PM_BUFFER_ESCAPING_JSON } pm_buffer_escaping_t; -/** - * Append a slice of source code to the buffer. - * - * @param buffer The buffer to append to. - * @param source The source code to append. - * @param length The length of the source code to append. - * @param escaping The type of escaping to perform. - */ +/* Append a slice of source code to the buffer. */ void pm_buffer_append_source(pm_buffer_t *buffer, const uint8_t *source, size_t length, pm_buffer_escaping_t escaping); -/** - * Prepend the given string to the buffer. - * - * @param buffer The buffer to prepend to. - * @param value The string to prepend. - * @param length The length of the string to prepend. - */ +/* Prepend the given string to the buffer. */ void pm_buffer_prepend_string(pm_buffer_t *buffer, const char *value, size_t length); -/** - * Concatenate one buffer onto another. - * - * @param destination The buffer to concatenate onto. - * @param source The buffer to concatenate. - */ +/* Concatenate one buffer onto another. */ void pm_buffer_concat(pm_buffer_t *destination, const pm_buffer_t *source); -/** +/* * Clear the buffer by reducing its size to 0. This does not free the allocated * memory, but it does allow the buffer to be reused. - * - * @param buffer The buffer to clear. */ void pm_buffer_clear(pm_buffer_t *buffer); -/** - * Strip the whitespace from the end of the buffer. - * - * @param buffer The buffer to strip. - */ +/* Strip the whitespace from the end of the buffer. */ void pm_buffer_rstrip(pm_buffer_t *buffer); -/** - * Checks if the buffer includes the given value. - * - * @param buffer The buffer to check. - * @param value The value to check for. - * @returns The index of the first occurrence of the value in the buffer, or - * SIZE_MAX if the value is not found. - */ +/* Checks if the buffer includes the given value. */ size_t pm_buffer_index(const pm_buffer_t *buffer, char value); -/** - * Insert the given string into the buffer at the given index. - * - * @param buffer The buffer to insert into. - * @param index The index to insert at. - * @param value The string to insert. - * @param length The length of the string to insert. - */ +/* Insert the given string into the buffer at the given index. */ void pm_buffer_insert(pm_buffer_t *buffer, size_t index, const char *value, size_t length); #endif diff --git a/include/prism/internal/char.h b/include/prism/internal/char.h index 8216a1828c..9a58fba8c5 100644 --- a/include/prism/internal/char.h +++ b/include/prism/internal/char.h @@ -1,8 +1,3 @@ -/** - * @file internal/char.h - * - * Functions for working with characters and strings. - */ #ifndef PRISM_INTERNAL_CHAR_H #define PRISM_INTERNAL_CHAR_H @@ -15,48 +10,33 @@ #include #include -/** Bit flag for whitespace characters in pm_byte_table. */ +/* Bit flag for whitespace characters in pm_byte_table. */ #define PRISM_CHAR_BIT_WHITESPACE (1 << 0) -/** Bit flag for inline whitespace characters in pm_byte_table. */ +/* Bit flag for inline whitespace characters in pm_byte_table. */ #define PRISM_CHAR_BIT_INLINE_WHITESPACE (1 << 1) -/** +/* * A lookup table for classifying bytes. Each entry is a bitfield of * PRISM_CHAR_BIT_* flags. Defined in char.c. */ extern const uint8_t pm_byte_table[256]; -/** - * Returns true if the given character is a whitespace character. - * - * @param b The character to check. - * @return True if the given character is a whitespace character. - */ +/* Returns true if the given character is a whitespace character. */ static PRISM_FORCE_INLINE bool pm_char_is_whitespace(const uint8_t b) { return (pm_byte_table[b] & PRISM_CHAR_BIT_WHITESPACE) != 0; } -/** - * Returns true if the given character is an inline whitespace character. - * - * @param b The character to check. - * @return True if the given character is an inline whitespace character. - */ +/* Returns true if the given character is an inline whitespace character. */ static PRISM_FORCE_INLINE bool pm_char_is_inline_whitespace(const uint8_t b) { return (pm_byte_table[b] & PRISM_CHAR_BIT_INLINE_WHITESPACE) != 0; } -/** +/* * Returns the number of characters at the start of the string that are inline * whitespace (space/tab). Scans the byte table directly for use in hot paths. - * - * @param string The string to search. - * @param length The maximum number of characters to search. - * @return The number of characters at the start of the string that are inline - * whitespace. */ static PRISM_FORCE_INLINE size_t pm_strspn_inline_whitespace(const uint8_t *string, ptrdiff_t length) { @@ -67,57 +47,33 @@ pm_strspn_inline_whitespace(const uint8_t *string, ptrdiff_t length) { return size; } -/** +/* * Returns the number of characters at the start of the string that are * whitespace. Disallows searching past the given maximum number of characters. - * - * @param string The string to search. - * @param length The maximum number of characters to search. - * @return The number of characters at the start of the string that are - * whitespace. */ size_t pm_strspn_whitespace(const uint8_t *string, ptrdiff_t length); -/** +/* * Returns the number of characters at the start of the string that are * whitespace while also tracking the location of each newline. Disallows * searching past the given maximum number of characters. - * - * @param string The string to search. - * @param length The maximum number of characters to search. - * @param arena The arena to allocate from when appending to line_offsets. - * @param line_offsets The list of newlines to populate. - * @param start_offset The offset at which the string occurs in the source, for - * the purpose of tracking newlines. - * @return The number of characters at the start of the string that are - * whitespace. */ size_t pm_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, pm_arena_t *arena, pm_line_offset_list_t *line_offsets, uint32_t start_offset); -/** +/* * Returns the number of characters at the start of the string that are decimal * digits. Disallows searching past the given maximum number of characters. - * - * @param string The string to search. - * @param length The maximum number of characters to search. - * @return The number of characters at the start of the string that are decimal - * digits. */ size_t pm_strspn_decimal_digit(const uint8_t *string, ptrdiff_t length); -/** +/* * Returns the number of characters at the start of the string that are * hexadecimal digits. Disallows searching past the given maximum number of * characters. - * - * @param string The string to search. - * @param length The maximum number of characters to search. - * @return The number of characters at the start of the string that are - * hexadecimal digits. */ size_t pm_strspn_hexadecimal_digit(const uint8_t *string, ptrdiff_t length); -/** +/* * Returns the number of characters at the start of the string that are octal * digits or underscores. Disallows searching past the given maximum number of * characters. @@ -125,17 +81,10 @@ size_t pm_strspn_hexadecimal_digit(const uint8_t *string, ptrdiff_t length); * If multiple underscores are found in a row or if an underscore is * found at the end of the number, then the invalid pointer is set to the index * of the first invalid underscore. - * - * @param string The string to search. - * @param length The maximum number of characters to search. - * @param invalid The pointer to set to the index of the first invalid - * underscore. - * @return The number of characters at the start of the string that are octal - * digits or underscores. */ size_t pm_strspn_octal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid); -/** +/* * Returns the number of characters at the start of the string that are decimal * digits or underscores. Disallows searching past the given maximum number of * characters. @@ -143,17 +92,10 @@ size_t pm_strspn_octal_number(const uint8_t *string, ptrdiff_t length, const uin * If multiple underscores are found in a row or if an underscore is * found at the end of the number, then the invalid pointer is set to the index * of the first invalid underscore. - * - * @param string The string to search. - * @param length The maximum number of characters to search. - * @param invalid The pointer to set to the index of the first invalid - * underscore. - * @return The number of characters at the start of the string that are decimal - * digits or underscores. */ size_t pm_strspn_decimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid); -/** +/* * Returns the number of characters at the start of the string that are * hexadecimal digits or underscores. Disallows searching past the given maximum * number of characters. @@ -161,28 +103,16 @@ size_t pm_strspn_decimal_number(const uint8_t *string, ptrdiff_t length, const u * If multiple underscores are found in a row or if an underscore is * found at the end of the number, then the invalid pointer is set to the index * of the first invalid underscore. - * - * @param string The string to search. - * @param length The maximum number of characters to search. - * @param invalid The pointer to set to the index of the first invalid - * underscore. - * @return The number of characters at the start of the string that are - * hexadecimal digits or underscores. */ size_t pm_strspn_hexadecimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid); -/** +/* * Returns the number of characters at the start of the string that are regexp * options. Disallows searching past the given maximum number of characters. - * - * @param string The string to search. - * @param length The maximum number of characters to search. - * @return The number of characters at the start of the string that are regexp - * options. */ size_t pm_strspn_regexp_option(const uint8_t *string, ptrdiff_t length); -/** +/* * Returns the number of characters at the start of the string that are binary * digits or underscores. Disallows searching past the given maximum number of * characters. @@ -190,47 +120,20 @@ size_t pm_strspn_regexp_option(const uint8_t *string, ptrdiff_t length); * If multiple underscores are found in a row or if an underscore is * found at the end of the number, then the invalid pointer is set to the index * of the first invalid underscore. - * - * @param string The string to search. - * @param length The maximum number of characters to search. - * @param invalid The pointer to set to the index of the first invalid - * underscore. - * @return The number of characters at the start of the string that are binary - * digits or underscores. */ size_t pm_strspn_binary_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid); -/** - * Returns true if the given character is a binary digit. - * - * @param b The character to check. - * @return True if the given character is a binary digit. - */ +/* Returns true if the given character is a binary digit. */ bool pm_char_is_binary_digit(const uint8_t b); -/** - * Returns true if the given character is an octal digit. - * - * @param b The character to check. - * @return True if the given character is an octal digit. - */ +/* Returns true if the given character is an octal digit. */ bool pm_char_is_octal_digit(const uint8_t b); -/** - * Returns true if the given character is a decimal digit. - * - * @param b The character to check. - * @return True if the given character is a decimal digit. - */ +/* Returns true if the given character is a decimal digit. */ bool pm_char_is_decimal_digit(const uint8_t b); -/** - * Returns true if the given character is a hexadecimal digit. - * - * @param b The character to check. - * @return True if the given character is a hexadecimal digit. - */ +/* Returns true if the given character is a hexadecimal digit. */ bool pm_char_is_hexadecimal_digit(const uint8_t b); #endif diff --git a/include/prism/internal/comments.h b/include/prism/internal/comments.h index e8fbb0e6aa..bb3039a658 100644 --- a/include/prism/internal/comments.h +++ b/include/prism/internal/comments.h @@ -1,6 +1,3 @@ -/** - * @file internal/comments.h - */ #ifndef PRISM_INTERNAL_COMMENTS_H #define PRISM_INTERNAL_COMMENTS_H @@ -8,17 +5,15 @@ #include "prism/internal/list.h" -/** - * A comment found while parsing. - */ +/* A comment found while parsing. */ struct pm_comment_t { - /** The embedded base node. */ + /* The embedded base node. */ pm_list_node_t node; - /** The location of the comment in the source. */ + /* The location of the comment in the source. */ pm_location_t location; - /** The type of the comment. */ + /* The type of the comment. */ pm_comment_type_t type; }; diff --git a/include/prism/internal/constant_pool.h b/include/prism/internal/constant_pool.h index 68d7d63203..7ca265d594 100644 --- a/include/prism/internal/constant_pool.h +++ b/include/prism/internal/constant_pool.h @@ -1,12 +1,3 @@ -/** - * @file internal/constant_pool.h - * - * A data structure that stores a set of strings. - * - * Each string is assigned a unique id, which can be used to compare strings for - * equality. This comparison ends up being much faster than strcmp, since it - * only requires a single integer comparison. - */ #ifndef PRISM_INTERNAL_CONSTANT_POOL_H #define PRISM_INTERNAL_CONSTANT_POOL_H @@ -16,179 +7,116 @@ #include -/** A constant in the pool which effectively stores a string. */ +/* A constant in the pool which effectively stores a string. */ struct pm_constant_t { - /** A pointer to the start of the string. */ + /* A pointer to the start of the string. */ const uint8_t *start; - /** The length of the string. */ + /* The length of the string. */ size_t length; }; -/** +/* * The type of bucket in the constant pool hash map. This determines how the * bucket should be freed. */ typedef unsigned int pm_constant_pool_bucket_type_t; -/** By default, each constant is a slice of the source. */ +/* By default, each constant is a slice of the source. */ static const pm_constant_pool_bucket_type_t PM_CONSTANT_POOL_BUCKET_DEFAULT = 0; -/** An owned constant is one for which memory has been allocated. */ +/* An owned constant is one for which memory has been allocated. */ static const pm_constant_pool_bucket_type_t PM_CONSTANT_POOL_BUCKET_OWNED = 1; -/** A constant constant is known at compile time. */ +/* A constant constant is known at compile time. */ static const pm_constant_pool_bucket_type_t PM_CONSTANT_POOL_BUCKET_CONSTANT = 2; -/** A bucket in the hash map. */ +/* A bucket in the hash map. */ typedef struct { - /** The incremental ID used for indexing back into the pool. */ + /* The incremental ID used for indexing back into the pool. */ unsigned int id: 30; - /** The type of the bucket, which determines how to free it. */ + /* The type of the bucket, which determines how to free it. */ pm_constant_pool_bucket_type_t type: 2; - /** The hash of the bucket. */ + /* The hash of the bucket. */ uint32_t hash; - /** + /* * A pointer to the start of the string, stored directly in the bucket to * avoid a pointer chase to the constants array during probing. */ const uint8_t *start; - /** The length of the string. */ + /* The length of the string. */ size_t length; } pm_constant_pool_bucket_t; -/** The overall constant pool, which stores constants found while parsing. */ +/* The overall constant pool, which stores constants found while parsing. */ struct pm_constant_pool_t { - /** The buckets in the hash map. */ + /* The buckets in the hash map. */ pm_constant_pool_bucket_t *buckets; - /** The constants that are stored in the buckets. */ + /* The constants that are stored in the buckets. */ pm_constant_t *constants; - /** The number of buckets in the hash map. */ + /* The number of buckets in the hash map. */ uint32_t size; - /** The number of buckets that have been allocated in the hash map. */ + /* The number of buckets that have been allocated in the hash map. */ uint32_t capacity; }; -/** +/* * When we allocate constants into the pool, we reserve 0 to mean that the slot * is not yet filled. This constant is reused in other places to indicate the * lack of a constant id. */ #define PM_CONSTANT_ID_UNSET 0 -/** - * Initialize a list of constant ids. - * - * @param list The list to initialize. - */ +/* Initialize a list of constant ids. */ void pm_constant_id_list_init(pm_constant_id_list_t *list); -/** - * Initialize a list of constant ids with a given capacity. - * - * @param arena The arena to allocate from. - * @param list The list to initialize. - * @param capacity The initial capacity of the list. - */ +/* Initialize a list of constant ids with a given capacity. */ void pm_constant_id_list_init_capacity(pm_arena_t *arena, pm_constant_id_list_t *list, size_t capacity); -/** - * Append a constant id to a list of constant ids. - * - * @param arena The arena to allocate from. - * @param list The list to append to. - * @param id The id to append. - */ +/* Append a constant id to a list of constant ids. */ void pm_constant_id_list_append(pm_arena_t *arena, pm_constant_id_list_t *list, pm_constant_id_t id); -/** - * Insert a constant id into a list of constant ids at the specified index. - * - * @param list The list to insert into. - * @param index The index at which to insert. - * @param id The id to insert. - */ +/* Insert a constant id into a list of constant ids at the specified index. */ void pm_constant_id_list_insert(pm_constant_id_list_t *list, size_t index, pm_constant_id_t id); -/** - * Checks if the current constant id list includes the given constant id. - * - * @param list The list to check. - * @param id The id to check for. - * @return Whether the list includes the given id. - */ +/* Checks if the current constant id list includes the given constant id. */ bool pm_constant_id_list_includes(pm_constant_id_list_t *list, pm_constant_id_t id); -/** - * Initialize a new constant pool with a given capacity. - * - * @param arena The arena to allocate from. - * @param pool The pool to initialize. - * @param capacity The initial capacity of the pool. - */ +/* Initialize a new constant pool with a given capacity. */ void pm_constant_pool_init(pm_arena_t *arena, pm_constant_pool_t *pool, uint32_t capacity); -/** - * Return a pointer to the constant indicated by the given constant id. - * - * @param pool The pool to get the constant from. - * @param constant_id The id of the constant to get. - * @return A pointer to the constant. - */ +/* Return a pointer to the constant indicated by the given constant id. */ pm_constant_t * pm_constant_pool_id_to_constant(const pm_constant_pool_t *pool, pm_constant_id_t constant_id); -/** +/* * Find a constant in a constant pool. Returns the id of the constant, or 0 if * the constant is not found. - * - * @param pool The pool to find the constant in. - * @param start A pointer to the start of the constant. - * @param length The length of the constant. - * @return The id of the constant. */ pm_constant_id_t pm_constant_pool_find(const pm_constant_pool_t *pool, const uint8_t *start, size_t length); -/** +/* * Insert a constant into a constant pool that is a slice of a source string. * Returns the id of the constant, or 0 if any potential calls to resize fail. - * - * @param arena The arena to allocate from. - * @param pool The pool to insert the constant into. - * @param start A pointer to the start of the constant. - * @param length The length of the constant. - * @return The id of the constant. */ pm_constant_id_t pm_constant_pool_insert_shared(pm_arena_t *arena, pm_constant_pool_t *pool, const uint8_t *start, size_t length); -/** +/* * Insert a constant into a constant pool from memory that is now owned by the * constant pool. Returns the id of the constant, or 0 if any potential calls to * resize fail. - * - * @param arena The arena to allocate from. - * @param pool The pool to insert the constant into. - * @param start A pointer to the start of the constant. - * @param length The length of the constant. - * @return The id of the constant. */ pm_constant_id_t pm_constant_pool_insert_owned(pm_arena_t *arena, pm_constant_pool_t *pool, uint8_t *start, size_t length); -/** +/* * Insert a constant into a constant pool from memory that is constant. Returns * the id of the constant, or 0 if any potential calls to resize fail. - * - * @param arena The arena to allocate from. - * @param pool The pool to insert the constant into. - * @param start A pointer to the start of the constant. - * @param length The length of the constant. - * @return The id of the constant. */ pm_constant_id_t pm_constant_pool_insert_constant(pm_arena_t *arena, pm_constant_pool_t *pool, const uint8_t *start, size_t length); diff --git a/include/prism/internal/encoding.h b/include/prism/internal/encoding.h index 409345fd7f..62392ef970 100644 --- a/include/prism/internal/encoding.h +++ b/include/prism/internal/encoding.h @@ -1,8 +1,3 @@ -/** - * @file internal/encoding.h - * - * The encoding interface and implementations used by the parser. - */ #ifndef PRISM_INTERNAL_ENCODING_H #define PRISM_INTERNAL_ENCODING_H @@ -10,115 +5,91 @@ #include #include -/** +/* * This struct defines the functions necessary to implement the encoding * interface so we can determine how many bytes the subsequent character takes. * Each callback should return the number of bytes, or 0 if the next bytes are * invalid for the encoding and type. */ typedef struct { - /** + /* * Return the number of bytes that the next character takes if it is valid * in the encoding. Does not read more than n bytes. It is assumed that n is * at least 1. */ size_t (*char_width)(const uint8_t *b, ptrdiff_t n); - /** + /* * Return the number of bytes that the next character takes if it is valid * in the encoding and is alphabetical. Does not read more than n bytes. It * is assumed that n is at least 1. */ size_t (*alpha_char)(const uint8_t *b, ptrdiff_t n); - /** + /* * Return the number of bytes that the next character takes if it is valid * in the encoding and is alphanumeric. Does not read more than n bytes. It * is assumed that n is at least 1. */ size_t (*alnum_char)(const uint8_t *b, ptrdiff_t n); - /** + /* * Return true if the next character is valid in the encoding and is an * uppercase character. Does not read more than n bytes. It is assumed that * n is at least 1. */ bool (*isupper_char)(const uint8_t *b, ptrdiff_t n); - /** + /* * The name of the encoding. This should correspond to a value that can be * passed to Encoding.find in Ruby. */ const char *name; - /** - * Return true if the encoding is a multibyte encoding. - */ + /* Return true if the encoding is a multibyte encoding. */ bool multibyte; } pm_encoding_t; -/** +/* * All of the lookup tables use the first bit of each embedded byte to indicate * whether the codepoint is alphabetical. */ #define PRISM_ENCODING_ALPHABETIC_BIT 1 << 0 -/** +/* * All of the lookup tables use the second bit of each embedded byte to indicate * whether the codepoint is alphanumeric. */ #define PRISM_ENCODING_ALPHANUMERIC_BIT 1 << 1 -/** +/* * All of the lookup tables use the third bit of each embedded byte to indicate * whether the codepoint is uppercase. */ #define PRISM_ENCODING_UPPERCASE_BIT 1 << 2 -/** - * Return the size of the next character in the UTF-8 encoding. - * - * @param b The bytes to read. - * @param n The number of bytes that can be read. - * @returns The number of bytes that the next character takes if it is valid in - * the encoding, or 0 if it is not. - */ +/* Return the size of the next character in the UTF-8 encoding. */ size_t pm_encoding_utf_8_char_width(const uint8_t *b, ptrdiff_t n); -/** +/* * Return the size of the next character in the UTF-8 encoding if it is an * alphabetical character. - * - * @param b The bytes to read. - * @param n The number of bytes that can be read. - * @returns The number of bytes that the next character takes if it is valid in - * the encoding, or 0 if it is not. */ size_t pm_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n); -/** +/* * Return the size of the next character in the UTF-8 encoding if it is an * alphanumeric character. - * - * @param b The bytes to read. - * @param n The number of bytes that can be read. - * @returns The number of bytes that the next character takes if it is valid in - * the encoding, or 0 if it is not. */ size_t pm_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n); -/** +/* * Return true if the next character in the UTF-8 encoding if it is an uppercase * character. - * - * @param b The bytes to read. - * @param n The number of bytes that can be read. - * @returns True if the next character is valid in the encoding and is an - * uppercase character, or false if it is not. */ bool pm_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n); -/** +/* * This lookup table is referenced in both the UTF-8 encoding file and the * parser directly in order to speed up the default encoding processing. It is * used to indicate whether a character is alphabetical, alphanumeric, or @@ -126,9 +97,7 @@ bool pm_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n); */ extern const uint8_t pm_encoding_unicode_table[256]; -/** - * These are all of the encodings that prism supports. - */ +/* These are all of the encodings that prism supports. */ typedef enum { PM_ENCODING_UTF_8 = 0, PM_ENCODING_US_ASCII, @@ -229,50 +198,44 @@ typedef enum { PM_ENCODING_MAXIMUM } pm_encoding_type_t; -/** - * This is the table of all of the encodings that prism supports. - */ +/* This is the table of all of the encodings that prism supports. */ extern const pm_encoding_t pm_encodings[PM_ENCODING_MAXIMUM]; -/** +/* * This is the default UTF-8 encoding. We need a reference to it to quickly * create parsers. */ #define PM_ENCODING_UTF_8_ENTRY (&pm_encodings[PM_ENCODING_UTF_8]) -/** +/* * This is the US-ASCII encoding. We need a reference to it to be able to * compare against it when a string is being created because it could possibly * need to fall back to ASCII-8BIT. */ #define PM_ENCODING_US_ASCII_ENTRY (&pm_encodings[PM_ENCODING_US_ASCII]) -/** +/* * This is the ASCII-8BIT encoding. We need a reference to it so that pm_strpbrk * can compare against it because invalid multibyte characters are not a thing * in this encoding. It is also needed for handling Regexp encoding flags. */ #define PM_ENCODING_ASCII_8BIT_ENTRY (&pm_encodings[PM_ENCODING_ASCII_8BIT]) -/** +/* * This is the EUC-JP encoding. We need a reference to it to quickly process * regular expression modifiers. */ #define PM_ENCODING_EUC_JP_ENTRY (&pm_encodings[PM_ENCODING_EUC_JP]) -/** +/* * This is the Windows-31J encoding. We need a reference to it to quickly * process regular expression modifiers. */ #define PM_ENCODING_WINDOWS_31J_ENTRY (&pm_encodings[PM_ENCODING_WINDOWS_31J]) -/** +/* * Parse the given name of an encoding and return a pointer to the corresponding * encoding struct if one can be found, otherwise return NULL. - * - * @param start A pointer to the first byte of the name. - * @param end A pointer to the last byte of the name. - * @returns A pointer to the encoding struct if one is found, otherwise NULL. */ const pm_encoding_t * pm_encoding_find(const uint8_t *start, const uint8_t *end); diff --git a/include/prism/internal/integer.h b/include/prism/internal/integer.h index 8bf21ae69d..7c9767e323 100644 --- a/include/prism/internal/integer.h +++ b/include/prism/internal/integer.h @@ -1,6 +1,4 @@ -/** - * @file internal/integer.h - * +/* * This module provides functions for working with arbitrary-sized integers. */ #ifndef PRISM_INTERNAL_INTEGER_H @@ -11,28 +9,28 @@ #include -/** +/* * An enum controlling the base of an integer. It is expected that the base is * already known before parsing the integer, even though it could be derived * from the string itself. */ typedef enum { - /** The default decimal base, with no prefix. Leading 0s will be ignored. */ + /* The default decimal base, with no prefix. Leading 0s will be ignored. */ PM_INTEGER_BASE_DEFAULT, - /** The binary base, indicated by a 0b or 0B prefix. */ + /* The binary base, indicated by a 0b or 0B prefix. */ PM_INTEGER_BASE_BINARY, - /** The octal base, indicated by a 0, 0o, or 0O prefix. */ + /* The octal base, indicated by a 0, 0o, or 0O prefix. */ PM_INTEGER_BASE_OCTAL, - /** The decimal base, indicated by a 0d, 0D, or empty prefix. */ + /* The decimal base, indicated by a 0d, 0D, or empty prefix. */ PM_INTEGER_BASE_DECIMAL, - /** The hexadecimal base, indicated by a 0x or 0X prefix. */ + /* The hexadecimal base, indicated by a 0x or 0X prefix. */ PM_INTEGER_BASE_HEXADECIMAL, - /** + /* * An unknown base, in which case pm_integer_parse will derive it based on * the content of the string. This is less efficient and does more * comparisons, so if callers know the base ahead of time, they should use @@ -41,47 +39,30 @@ typedef enum { PM_INTEGER_BASE_UNKNOWN } pm_integer_base_t; -/** +/* * Parse an integer from a string. This assumes that the format of the integer * has already been validated, as internal validation checks are not performed * here. - * - * @param integer The integer to parse into. - * @param base The base of the integer. - * @param start The start of the string. - * @param end The end of the string. */ void pm_integer_parse(pm_integer_t *integer, pm_integer_base_t base, const uint8_t *start, const uint8_t *end); -/** +/* * Compare two integers. This function returns -1 if the left integer is less * than the right integer, 0 if they are equal, and 1 if the left integer is * greater than the right integer. - * - * @param left The left integer to compare. - * @param right The right integer to compare. - * @return The result of the comparison. */ int pm_integer_compare(const pm_integer_t *left, const pm_integer_t *right); -/** +/* * Reduce a ratio of integers to its simplest form. * * If either the numerator or denominator do not fit into a 32-bit integer, then * this function is a no-op. In the future, we may consider reducing even the * larger numbers, but for now we're going to keep it simple. - * - * @param numerator The numerator of the ratio. - * @param denominator The denominator of the ratio. */ void pm_integers_reduce(pm_integer_t *numerator, pm_integer_t *denominator); -/** - * Convert an integer to a decimal string. - * - * @param buffer The buffer to append the string to. - * @param integer The integer to convert to a string. - */ +/* Convert an integer to a decimal string. */ void pm_integer_string(pm_buffer_t *buffer, const pm_integer_t *integer); #endif diff --git a/include/prism/internal/isinf.h b/include/prism/internal/isinf.h index 569f4726e7..41c160f56d 100644 --- a/include/prism/internal/isinf.h +++ b/include/prism/internal/isinf.h @@ -1,10 +1,7 @@ -/** - * @file isinf.h - */ -#ifndef PRISM_ISINF_H -#define PRISM_ISINF_H +#ifndef PRISM_INTERNAL_ISINF_H +#define PRISM_INTERNAL_ISINF_H -/** +/* * isinf on POSIX systems accepts a float, a double, or a long double. But mingw * didn't provide an isinf macro, only an isinf function that only accepts * floats, so we need to use _finite instead. diff --git a/include/prism/internal/line_offset_list.h b/include/prism/internal/line_offset_list.h index e18f7276e6..dac9f7052e 100644 --- a/include/prism/internal/line_offset_list.h +++ b/include/prism/internal/line_offset_list.h @@ -1,16 +1,3 @@ -/** - * @file internal/line_offset_list.h - * - * A list of byte offsets of newlines in a string. - * - * When compiling the syntax tree, it's necessary to know the line and column - * of many nodes. This is necessary to support things like error messages, - * tracepoints, etc. - * - * It's possible that we could store the start line, start column, end line, and - * end column on every node in addition to the offsets that we already store, - * but that would be quite a lot of memory overhead. - */ #ifndef PRISM_INTERNAL_LINE_OFFSET_LIST_H #define PRISM_INTERNAL_LINE_OFFSET_LIST_H @@ -19,38 +6,16 @@ #include "prism/arena.h" #include "prism/line_offset_list.h" -/** - * Initialize a new line offset list with the given capacity. - * - * @param arena The arena to allocate from. - * @param list The list to initialize. - * @param capacity The initial capacity of the list. - */ +/* Initialize a new line offset list with the given capacity. */ void pm_line_offset_list_init(pm_arena_t *arena, pm_line_offset_list_t *list, size_t capacity); -/** - * Clear out the offsets that have been appended to the list. - * - * @param list The list to clear. - */ +/* Clear out the offsets that have been appended to the list. */ void pm_line_offset_list_clear(pm_line_offset_list_t *list); -/** - * Append a new offset to the list (slow path with resize). - * - * @param arena The arena to allocate from. - * @param list The list to append to. - * @param cursor The offset to append. - */ +/* Append a new offset to the list (slow path with resize). */ void pm_line_offset_list_append_slow(pm_arena_t *arena, pm_line_offset_list_t *list, uint32_t cursor); -/** - * Append a new offset to the list. - * - * @param arena The arena to allocate from. - * @param list The list to append to. - * @param cursor The offset to append. - */ +/* Append a new offset to the list. */ static PRISM_FORCE_INLINE void pm_line_offset_list_append(pm_arena_t *arena, pm_line_offset_list_t *list, uint32_t cursor) { if (list->size < list->capacity) { @@ -60,14 +25,9 @@ pm_line_offset_list_append(pm_arena_t *arena, pm_line_offset_list_t *list, uint3 } } -/** +/* * Returns the line of the given offset. If the offset is not in the list, the * line of the closest offset less than the given offset is returned. - * - * @param list The list to search. - * @param cursor The offset to search for. - * @param start_line The line to start counting from. - * @return The line of the given offset. */ int32_t pm_line_offset_list_line(const pm_line_offset_list_t *list, uint32_t cursor, int32_t start_line); diff --git a/include/prism/internal/list.h b/include/prism/internal/list.h index 9a73e6bd3f..0ab59ef32a 100644 --- a/include/prism/internal/list.h +++ b/include/prism/internal/list.h @@ -1,14 +1,9 @@ -/** - * @file internal/list.h - * - * An abstract linked list. - */ #ifndef PRISM_INTERNAL_LIST_H #define PRISM_INTERNAL_LIST_H #include -/** +/* * This struct represents an abstract linked list that provides common * functionality. It is meant to be used any time a linked list is necessary to * store data. @@ -39,39 +34,29 @@ * iteration and appending of new nodes. */ typedef struct pm_list_node { - /** A pointer to the next node in the list. */ + /* A pointer to the next node in the list. */ struct pm_list_node *next; } pm_list_node_t; -/** +/* * This represents the overall linked list. It keeps a pointer to the head and * tail so that iteration is easy and pushing new nodes is easy. */ typedef struct { - /** The size of the list. */ + /* The size of the list. */ size_t size; - /** A pointer to the head of the list. */ + /* A pointer to the head of the list. */ pm_list_node_t *head; - /** A pointer to the tail of the list. */ + /* A pointer to the tail of the list. */ pm_list_node_t *tail; } pm_list_t; -/** - * Returns the size of the list. - * - * @param list The list to check. - * @return The size of the list. - */ +/* Returns the size of the list. */ size_t pm_list_size(pm_list_t *list); -/** - * Append a node to the given list. - * - * @param list The list to append to. - * @param node The node to append. - */ +/* Append a node to the given list. */ void pm_list_append(pm_list_t *list, pm_list_node_t *node); #endif diff --git a/include/prism/internal/magic_comments.h b/include/prism/internal/magic_comments.h index 57c964bf4e..72a581c5d7 100644 --- a/include/prism/internal/magic_comments.h +++ b/include/prism/internal/magic_comments.h @@ -1,6 +1,3 @@ -/** - * @file internal/magic_comments.h - */ #ifndef PRISM_INTERNAL_MAGIC_COMMENTS_H #define PRISM_INTERNAL_MAGIC_COMMENTS_H @@ -8,20 +5,18 @@ #include "prism/internal/list.h" -/** +/* * This is a node in the linked list of magic comments that we've found while * parsing. - * - * @extends pm_list_node_t */ struct pm_magic_comment_t { - /** The embedded base node. */ + /* The embedded base node. */ pm_list_node_t node; - /** The key of the magic comment. */ + /* The key of the magic comment. */ pm_location_t key; - /** The value of the magic comment. */ + /* The value of the magic comment. */ pm_location_t value; }; diff --git a/include/prism/internal/memchr.h b/include/prism/internal/memchr.h index 905e3f33a1..63c738387d 100644 --- a/include/prism/internal/memchr.h +++ b/include/prism/internal/memchr.h @@ -1,8 +1,3 @@ -/** - * @file internal/memchr.h - * - * A custom memchr implementation. - */ #ifndef PRISM_INTERNAL_MEMCHR_H #define PRISM_INTERNAL_MEMCHR_H @@ -10,18 +5,10 @@ #include -/** +/* * We need to roll our own memchr to handle cases where the encoding changes and * we need to search for a character in a buffer that could be the trailing byte * of a multibyte character. - * - * @param source The source string. - * @param character The character to search for. - * @param number The maximum number of bytes to search. - * @param encoding_changed Whether the encoding changed. - * @param encoding A pointer to the encoding. - * @return A pointer to the first occurrence of the character in the source - * string, or NULL if no such character exists. */ void * pm_memchr(const void *source, int character, size_t number, bool encoding_changed, const pm_encoding_t *encoding); diff --git a/include/prism/internal/node.h b/include/prism/internal/node.h index 075dc33e0a..ca6d5616d7 100644 --- a/include/prism/internal/node.h +++ b/include/prism/internal/node.h @@ -1,6 +1,3 @@ -/** - * @file internal/node.h - */ #ifndef PRISM_INTERNAL_NODE_H #define PRISM_INTERNAL_NODE_H @@ -10,23 +7,13 @@ #include "prism/arena.h" -/** +/* * Slow path for pm_node_list_append: grow the list and append the node. * Do not call directly — use pm_node_list_append instead. - * - * @param arena The arena to allocate from. - * @param list The list to append to. - * @param node The node to append. */ void pm_node_list_append_slow(pm_arena_t *arena, pm_node_list_t *list, pm_node_t *node); -/** - * Append a new node onto the end of the node list. - * - * @param arena The arena to allocate from. - * @param list The list to append to. - * @param node The node to append. - */ +/* Append a new node onto the end of the node list. */ static PRISM_FORCE_INLINE void pm_node_list_append(pm_arena_t *arena, pm_node_list_t *list, pm_node_t *node) { if (list->size < list->capacity) { @@ -36,22 +23,10 @@ pm_node_list_append(pm_arena_t *arena, pm_node_list_t *list, pm_node_t *node) { } } -/** - * Prepend a new node onto the beginning of the node list. - * - * @param arena The arena to allocate from. - * @param list The list to prepend to. - * @param node The node to prepend. - */ +/* Prepend a new node onto the beginning of the node list. */ void pm_node_list_prepend(pm_arena_t *arena, pm_node_list_t *list, pm_node_t *node); -/** - * Concatenate the given node list onto the end of the other node list. - * - * @param arena The arena to allocate from. - * @param list The list to concatenate onto. - * @param other The list to concatenate. - */ +/* Concatenate the given node list onto the end of the other node list. */ void pm_node_list_concat(pm_arena_t *arena, pm_node_list_t *list, pm_node_list_t *other); #endif diff --git a/include/prism/internal/options.h b/include/prism/internal/options.h index 59606b09e6..7e37742a8b 100644 --- a/include/prism/internal/options.h +++ b/include/prism/internal/options.h @@ -1,95 +1,84 @@ -/** - * @file internal/options.h - * - * The options that can be passed to parsing. - */ #ifndef PRISM_INTERNAL_OPTIONS_H #define PRISM_INTERNAL_OPTIONS_H #include "prism/options.h" -/** - * A scope of locals surrounding the code that is being parsed. - */ +/* A scope of locals surrounding the code that is being parsed. */ struct pm_options_scope_t { - /** The number of locals in the scope. */ + /* The number of locals in the scope. */ size_t locals_count; - /** The names of the locals in the scope. */ + /* The names of the locals in the scope. */ pm_string_t *locals; - /** Flags for the set of forwarding parameters in this scope. */ + /* Flags for the set of forwarding parameters in this scope. */ uint8_t forwarding; }; -/** +/* * The version of Ruby syntax that we should be parsing with. This is used to * allow consumers to specify which behavior they want in case they need to * parse in the same way as a specific version of CRuby would have. */ typedef enum { - /** + /* * If an explicit version is not provided, the current version of prism will * be used. */ PM_OPTIONS_VERSION_UNSET = 0, - /** The vendored version of prism in CRuby 3.3.x. */ + /* The vendored version of prism in CRuby 3.3.x. */ PM_OPTIONS_VERSION_CRUBY_3_3 = 1, - /** The vendored version of prism in CRuby 3.4.x. */ + /* The vendored version of prism in CRuby 3.4.x. */ PM_OPTIONS_VERSION_CRUBY_3_4 = 2, - /** The vendored version of prism in CRuby 4.0.x. */ + /* The vendored version of prism in CRuby 4.0.x. */ PM_OPTIONS_VERSION_CRUBY_3_5 = 3, - /** The vendored version of prism in CRuby 4.0.x. */ + /* The vendored version of prism in CRuby 4.0.x. */ PM_OPTIONS_VERSION_CRUBY_4_0 = 3, - /** The vendored version of prism in CRuby 4.1.x. */ + /* The vendored version of prism in CRuby 4.1.x. */ PM_OPTIONS_VERSION_CRUBY_4_1 = 4, - /** The current version of prism. */ + /* The current version of prism. */ PM_OPTIONS_VERSION_LATEST = PM_OPTIONS_VERSION_CRUBY_4_1 } pm_options_version_t; -/** - * The options that can be passed to the parser. - */ +/* The options that can be passed to the parser. */ struct pm_options_t { - /** + /* * The callback to call when additional switches are found in a shebang * comment. */ pm_options_shebang_callback_t shebang_callback; - /** + /* * Any additional data that should be passed along to the shebang callback * if one was set. */ void *shebang_callback_data; - /** The name of the file that is currently being parsed. */ + /* The name of the file that is currently being parsed. */ pm_string_t filepath; - /** + /* * The line within the file that the parse starts on. This value is * 1-indexed. */ int32_t line; - /** + /* * The name of the encoding that the source file is in. Note that this must * correspond to a name that can be found with Encoding.find in Ruby. */ pm_string_t encoding; - /** - * The number of scopes surrounding the code that is being parsed. - */ + /* The number of scopes surrounding the code that is being parsed. */ size_t scopes_count; - /** + /* * The scopes surrounding the code that is being parsed. For most parses * this will be NULL, but for evals it will be the locals that are in scope * surrounding the eval. Scopes are ordered from the outermost scope to the @@ -97,17 +86,17 @@ struct pm_options_t { */ pm_options_scope_t *scopes; - /** + /* * The version of prism that we should be parsing with. This is used to * allow consumers to specify which behavior they want in case they need to * parse exactly as a specific version of CRuby. */ pm_options_version_t version; - /** A bitset of the various options that were set on the command line. */ + /* A bitset of the various options that were set on the command line. */ uint8_t command_line; - /** + /* * Whether or not the frozen string literal option has been set. * May be: * - PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED @@ -116,21 +105,21 @@ struct pm_options_t { */ int8_t frozen_string_literal; - /** + /* * Whether or not the encoding magic comments should be respected. This is a * niche use-case where you want to parse a file with a specific encoding * but ignore any encoding magic comments at the top of the file. */ bool encoding_locked; - /** + /* * When the file being parsed is the main script, the shebang will be * considered for command-line flags (or for implicit -x). The caller needs * to pass this information to the parser so that it can behave correctly. */ bool main_script; - /** + /* * When the file being parsed is considered a "partial" script, jumps will * not be marked as errors if they are not contained within loops/blocks. * This is used in the case that you're parsing a script that you know will @@ -140,7 +129,7 @@ struct pm_options_t { */ bool partial_script; - /** + /* * Whether or not the parser should freeze the nodes that it creates. This * makes it possible to have a deeply frozen AST that is safe to share * between concurrency primitives. @@ -148,14 +137,10 @@ struct pm_options_t { bool freeze; }; -/** - * Free the internal memory associated with the options. - * - * @param options The options struct whose internal memory should be freed. - */ +/* Free the internal memory associated with the options. */ void pm_options_cleanup(pm_options_t *options); -/** +/* * Deserialize an options struct from the given binary string. This is used to * pass options to the parser from an FFI call so that consumers of the library * from an FFI perspective don't have to worry about the structure of our @@ -221,9 +206,6 @@ void pm_options_cleanup(pm_options_t *options); * * The frozen string literal, encoding locked, main script, and partial script * fields are booleans, so their values should be either 0 or 1. * * The number of scopes can be 0. - * - * @param options The options struct to deserialize into. - * @param data The binary string to deserialize from. */ void pm_options_read(pm_options_t *options, const char *data); diff --git a/include/prism/internal/parser.h b/include/prism/internal/parser.h index c731e629db..dbed71e737 100644 --- a/include/prism/internal/parser.h +++ b/include/prism/internal/parser.h @@ -1,8 +1,3 @@ -/** - * @file internal/parser.h - * - * The parser used to parse Ruby source. - */ #ifndef PRISM_INTERNAL_PARSER_H #define PRISM_INTERNAL_PARSER_H @@ -23,7 +18,7 @@ #include #include -/** +/* * This enum provides various bits that represent different kinds of states that * the lexer can track. This is used to determine which kind of token to return * based on the context of the parser. @@ -44,7 +39,7 @@ typedef enum { PM_LEX_STATE_BIT_FITEM } pm_lex_state_bit_t; -/** +/* * This enum combines the various bits from the above enum into individual * values that represent the various states of the lexer. */ @@ -68,7 +63,7 @@ typedef enum { PM_LEX_STATE_END_ANY = PM_LEX_STATE_END | PM_LEX_STATE_ENDARG | PM_LEX_STATE_ENDFN } pm_lex_state_t; -/** +/* * The type of quote that a heredoc uses. */ typedef enum { @@ -78,7 +73,7 @@ typedef enum { PM_HEREDOC_QUOTE_BACKTICK = '`', } pm_heredoc_quote_t; -/** +/* * The type of indentation that a heredoc uses. */ typedef enum { @@ -87,24 +82,24 @@ typedef enum { PM_HEREDOC_INDENT_TILDE, } pm_heredoc_indent_t; -/** +/* * All of the information necessary to store to lexing a heredoc. */ typedef struct { - /** A pointer to the start of the heredoc identifier. */ + /* A pointer to the start of the heredoc identifier. */ const uint8_t *ident_start; - /** The length of the heredoc identifier. */ + /* The length of the heredoc identifier. */ size_t ident_length; - /** The type of quote that the heredoc uses. */ + /* The type of quote that the heredoc uses. */ pm_heredoc_quote_t quote; - /** The type of indentation that the heredoc uses. */ + /* The type of indentation that the heredoc uses. */ pm_heredoc_indent_t indent; } pm_heredoc_lex_mode_t; -/** +/* * When lexing Ruby source, the lexer has a small amount of state to tell which * kind of token it is currently lexing. For example, when we find the start of * a string, the first token that we return is a TOKEN_STRING_BEGIN token. After @@ -112,64 +107,64 @@ typedef struct { * are found as part of a string. */ typedef struct pm_lex_mode { - /** The type of this lex mode. */ + /* The type of this lex mode. */ enum { - /** This state is used when any given token is being lexed. */ + /* This state is used when any given token is being lexed. */ PM_LEX_DEFAULT, - /** + /* * This state is used when we're lexing as normal but inside an embedded * expression of a string. */ PM_LEX_EMBEXPR, - /** + /* * This state is used when we're lexing a variable that is embedded * directly inside of a string with the # shorthand. */ PM_LEX_EMBVAR, - /** This state is used when you are inside the content of a heredoc. */ + /* This state is used when you are inside the content of a heredoc. */ PM_LEX_HEREDOC, - /** + /* * This state is used when we are lexing a list of tokens, as in a %w * word list literal or a %i symbol list literal. */ PM_LEX_LIST, - /** + /* * This state is used when a regular expression has been begun and we * are looking for the terminator. */ PM_LEX_REGEXP, - /** + /* * This state is used when we are lexing a string or a string-like * token, as in string content with either quote or an xstring. */ PM_LEX_STRING } mode; - /** The data associated with this type of lex mode. */ + /* The data associated with this type of lex mode. */ union { struct { - /** This keeps track of the nesting level of the list. */ + /* This keeps track of the nesting level of the list. */ size_t nesting; - /** Whether or not interpolation is allowed in this list. */ + /* Whether or not interpolation is allowed in this list. */ bool interpolation; - /** + /* * When lexing a list, it takes into account balancing the * terminator if the terminator is one of (), [], {}, or <>. */ uint8_t incrementor; - /** This is the terminator of the list literal. */ + /* This is the terminator of the list literal. */ uint8_t terminator; - /** + /* * This is the character set that should be used to delimit the * tokens within the list. */ @@ -177,21 +172,21 @@ typedef struct pm_lex_mode { } list; struct { - /** + /* * This keeps track of the nesting level of the regular expression. */ size_t nesting; - /** + /* * When lexing a regular expression, it takes into account balancing * the terminator if the terminator is one of (), [], {}, or <>. */ uint8_t incrementor; - /** This is the terminator of the regular expression. */ + /* This is the terminator of the regular expression. */ uint8_t terminator; - /** + /* * This is the character set that should be used to delimit the * tokens within the regular expression. */ @@ -199,32 +194,32 @@ typedef struct pm_lex_mode { } regexp; struct { - /** This keeps track of the nesting level of the string. */ + /* This keeps track of the nesting level of the string. */ size_t nesting; - /** Whether or not interpolation is allowed in this string. */ + /* Whether or not interpolation is allowed in this string. */ bool interpolation; - /** + /* * Whether or not at the end of the string we should allow a :, * which would indicate this was a dynamic symbol instead of a * string. */ bool label_allowed; - /** + /* * When lexing a string, it takes into account balancing the * terminator if the terminator is one of (), [], {}, or <>. */ uint8_t incrementor; - /** + /* * This is the terminator of the string. It is typically either a * single or double quote. */ uint8_t terminator; - /** + /* * This is the character set that should be used to delimit the * tokens within the string. */ @@ -232,273 +227,273 @@ typedef struct pm_lex_mode { } string; struct { - /** + /* * All of the data necessary to lex a heredoc. */ pm_heredoc_lex_mode_t base; - /** + /* * This is the pointer to the character where lexing should resume * once the heredoc has been completely processed. */ const uint8_t *next_start; - /** + /* * This is used to track the amount of common whitespace on each * line so that we know how much to dedent each line in the case of * a tilde heredoc. */ size_t *common_whitespace; - /** True if the previous token ended with a line continuation. */ + /* True if the previous token ended with a line continuation. */ bool line_continuation; } heredoc; } as; - /** The previous lex state so that it knows how to pop. */ + /* The previous lex state so that it knows how to pop. */ struct pm_lex_mode *prev; } pm_lex_mode_t; -/** +/* * We pre-allocate a certain number of lex states in order to avoid having to * call malloc too many times while parsing. You really shouldn't need more than * this because you only really nest deeply when doing string interpolation. */ #define PM_LEX_STACK_SIZE 4 -/** +/* * While parsing, we keep track of a stack of contexts. This is helpful for * error recovery so that we can pop back to a previous context when we hit a * token that is understood by a parent context but not by the current context. */ typedef enum { - /** a null context, used for returning a value from a function */ + /* a null context, used for returning a value from a function */ PM_CONTEXT_NONE = 0, - /** a begin statement */ + /* a begin statement */ PM_CONTEXT_BEGIN, - /** an ensure statement with an explicit begin */ + /* an ensure statement with an explicit begin */ PM_CONTEXT_BEGIN_ENSURE, - /** a rescue else statement with an explicit begin */ + /* a rescue else statement with an explicit begin */ PM_CONTEXT_BEGIN_ELSE, - /** a rescue statement with an explicit begin */ + /* a rescue statement with an explicit begin */ PM_CONTEXT_BEGIN_RESCUE, - /** expressions in block arguments using braces */ + /* expressions in block arguments using braces */ PM_CONTEXT_BLOCK_BRACES, - /** expressions in block arguments using do..end */ + /* expressions in block arguments using do..end */ PM_CONTEXT_BLOCK_KEYWORDS, - /** an ensure statement within a do..end block */ + /* an ensure statement within a do..end block */ PM_CONTEXT_BLOCK_ENSURE, - /** a rescue else statement within a do..end block */ + /* a rescue else statement within a do..end block */ PM_CONTEXT_BLOCK_ELSE, - /** expressions in block parameters `foo do |...| end ` */ + /* expressions in block parameters `foo do |...| end ` */ PM_CONTEXT_BLOCK_PARAMETERS, - /** a rescue statement within a do..end block */ + /* a rescue statement within a do..end block */ PM_CONTEXT_BLOCK_RESCUE, - /** a case when statements */ + /* a case when statements */ PM_CONTEXT_CASE_WHEN, - /** a case in statements */ + /* a case in statements */ PM_CONTEXT_CASE_IN, - /** a class declaration */ + /* a class declaration */ PM_CONTEXT_CLASS, - /** an ensure statement within a class statement */ + /* an ensure statement within a class statement */ PM_CONTEXT_CLASS_ENSURE, - /** a rescue else statement within a class statement */ + /* a rescue else statement within a class statement */ PM_CONTEXT_CLASS_ELSE, - /** a rescue statement within a class statement */ + /* a rescue statement within a class statement */ PM_CONTEXT_CLASS_RESCUE, - /** a method definition */ + /* a method definition */ PM_CONTEXT_DEF, - /** an ensure statement within a method definition */ + /* an ensure statement within a method definition */ PM_CONTEXT_DEF_ENSURE, - /** a rescue else statement within a method definition */ + /* a rescue else statement within a method definition */ PM_CONTEXT_DEF_ELSE, - /** a rescue statement within a method definition */ + /* a rescue statement within a method definition */ PM_CONTEXT_DEF_RESCUE, - /** a method definition's parameters */ + /* a method definition's parameters */ PM_CONTEXT_DEF_PARAMS, - /** a defined? expression */ + /* a defined? expression */ PM_CONTEXT_DEFINED, - /** a method definition's default parameter */ + /* a method definition's default parameter */ PM_CONTEXT_DEFAULT_PARAMS, - /** an else clause */ + /* an else clause */ PM_CONTEXT_ELSE, - /** an elsif clause */ + /* an elsif clause */ PM_CONTEXT_ELSIF, - /** an interpolated expression */ + /* an interpolated expression */ PM_CONTEXT_EMBEXPR, - /** a for loop */ + /* a for loop */ PM_CONTEXT_FOR, - /** a for loop's index */ + /* a for loop's index */ PM_CONTEXT_FOR_INDEX, - /** an if statement */ + /* an if statement */ PM_CONTEXT_IF, - /** a lambda expression with braces */ + /* a lambda expression with braces */ PM_CONTEXT_LAMBDA_BRACES, - /** a lambda expression with do..end */ + /* a lambda expression with do..end */ PM_CONTEXT_LAMBDA_DO_END, - /** an ensure statement within a lambda expression */ + /* an ensure statement within a lambda expression */ PM_CONTEXT_LAMBDA_ENSURE, - /** a rescue else statement within a lambda expression */ + /* a rescue else statement within a lambda expression */ PM_CONTEXT_LAMBDA_ELSE, - /** a rescue statement within a lambda expression */ + /* a rescue statement within a lambda expression */ PM_CONTEXT_LAMBDA_RESCUE, - /** the predicate clause of a loop statement */ + /* the predicate clause of a loop statement */ PM_CONTEXT_LOOP_PREDICATE, - /** the top level context */ + /* the top level context */ PM_CONTEXT_MAIN, - /** a module declaration */ + /* a module declaration */ PM_CONTEXT_MODULE, - /** an ensure statement within a module statement */ + /* an ensure statement within a module statement */ PM_CONTEXT_MODULE_ENSURE, - /** a rescue else statement within a module statement */ + /* a rescue else statement within a module statement */ PM_CONTEXT_MODULE_ELSE, - /** a rescue statement within a module statement */ + /* a rescue statement within a module statement */ PM_CONTEXT_MODULE_RESCUE, - /** a multiple target expression */ + /* a multiple target expression */ PM_CONTEXT_MULTI_TARGET, - /** a parenthesized expression */ + /* a parenthesized expression */ PM_CONTEXT_PARENS, - /** an END block */ + /* an END block */ PM_CONTEXT_POSTEXE, - /** a predicate inside an if/elsif/unless statement */ + /* a predicate inside an if/elsif/unless statement */ PM_CONTEXT_PREDICATE, - /** a BEGIN block */ + /* a BEGIN block */ PM_CONTEXT_PREEXE, - /** a modifier rescue clause */ + /* a modifier rescue clause */ PM_CONTEXT_RESCUE_MODIFIER, - /** a singleton class definition */ + /* a singleton class definition */ PM_CONTEXT_SCLASS, - /** an ensure statement with a singleton class */ + /* an ensure statement with a singleton class */ PM_CONTEXT_SCLASS_ENSURE, - /** a rescue else statement with a singleton class */ + /* a rescue else statement with a singleton class */ PM_CONTEXT_SCLASS_ELSE, - /** a rescue statement with a singleton class */ + /* a rescue statement with a singleton class */ PM_CONTEXT_SCLASS_RESCUE, - /** a ternary expression */ + /* a ternary expression */ PM_CONTEXT_TERNARY, - /** an unless statement */ + /* an unless statement */ PM_CONTEXT_UNLESS, - /** an until statement */ + /* an until statement */ PM_CONTEXT_UNTIL, - /** a while statement */ + /* a while statement */ PM_CONTEXT_WHILE, } pm_context_t; -/** This is a node in a linked list of contexts. */ +/* This is a node in a linked list of contexts. */ typedef struct pm_context_node { - /** The context that this node represents. */ + /* The context that this node represents. */ pm_context_t context; - /** A pointer to the previous context in the linked list. */ + /* A pointer to the previous context in the linked list. */ struct pm_context_node *prev; } pm_context_node_t; -/** The type of shareable constant value that can be set. */ +/* The type of shareable constant value that can be set. */ typedef uint8_t pm_shareable_constant_value_t; static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_NONE = 0x0; static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_LITERAL = PM_SHAREABLE_CONSTANT_NODE_FLAGS_LITERAL; static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_EVERYTHING = PM_SHAREABLE_CONSTANT_NODE_FLAGS_EXPERIMENTAL_EVERYTHING; static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_COPY = PM_SHAREABLE_CONSTANT_NODE_FLAGS_EXPERIMENTAL_COPY; -/** +/* * This tracks an individual local variable in a certain lexical context, as * well as the number of times is it read. */ typedef struct { - /** The name of the local variable. */ + /* The name of the local variable. */ pm_constant_id_t name; - /** The location of the local variable in the source. */ + /* The location of the local variable in the source. */ pm_location_t location; - /** The index of the local variable in the local table. */ + /* The index of the local variable in the local table. */ uint32_t index; - /** The number of times the local variable is read. */ + /* The number of times the local variable is read. */ uint32_t reads; - /** The hash of the local variable. */ + /* The hash of the local variable. */ uint32_t hash; } pm_local_t; -/** +/* * This is a set of local variables in a certain lexical context (method, class, * module, etc.). We need to track how many times these variables are read in * order to warn if they only get written. */ typedef struct pm_locals { - /** The number of local variables in the set. */ + /* The number of local variables in the set. */ uint32_t size; - /** The capacity of the local variables set. */ + /* The capacity of the local variables set. */ uint32_t capacity; - /** + /* * A bloom filter over constant IDs stored in this set. Used to quickly * reject lookups for names that are definitely not present, avoiding the * cost of a linear scan or hash probe. */ uint32_t bloom; - /** The nullable allocated memory for the local variables in the set. */ + /* The nullable allocated memory for the local variables in the set. */ pm_local_t *locals; } pm_locals_t; -/** The flags about scope parameters that can be set. */ +/* The flags about scope parameters that can be set. */ typedef uint8_t pm_scope_parameters_t; static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_NONE = 0x0; static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS = 0x1; @@ -509,18 +504,18 @@ static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED = 0x1 static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_NUMBERED_INNER = 0x20; static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_NUMBERED_FOUND = 0x40; -/** +/* * This struct represents a node in a linked list of scopes. Some scopes can see * into their parent scopes, while others cannot. */ typedef struct pm_scope { - /** A pointer to the previous scope in the linked list. */ + /* A pointer to the previous scope in the linked list. */ struct pm_scope *previous; - /** The IDs of the locals in the given scope. */ + /* The IDs of the locals in the given scope. */ pm_locals_t locals; - /** + /* * This is a list of the implicit parameters contained within the block. * These will be processed after the block is parsed to determine the kind * of parameters node that should be used and to check if any errors need to @@ -528,7 +523,7 @@ typedef struct pm_scope { */ pm_node_list_t implicit_parameters; - /** + /* * This is a bitfield that indicates the parameters that are being used in * this scope. It is a combination of the PM_SCOPE_PARAMETERS_* constants. * There are three different kinds of parameters that can be used in a @@ -549,106 +544,106 @@ typedef struct pm_scope { */ pm_scope_parameters_t parameters; - /** + /* * The current state of constant shareability for this scope. This is * changed by magic shareable_constant_value comments. */ pm_shareable_constant_value_t shareable_constant; - /** + /* * A boolean indicating whether or not this scope can see into its parent. * If closed is true, then the scope cannot see into its parent. */ bool closed; } pm_scope_t; -/** +/* * A struct that represents a stack of boolean values. */ typedef uint32_t pm_state_stack_t; -/** +/* * This struct represents the overall parser. It contains a reference to the * source file, as well as pointers that indicate where in the source it's * currently parsing. It also contains the most recent and current token that * it's considering. */ struct pm_parser_t { - /** The arena used for all AST-lifetime allocations. Caller-owned. */ + /* The arena used for all AST-lifetime allocations. Caller-owned. */ pm_arena_t *arena; - /** The arena used for parser metadata (comments, diagnostics, etc.). */ + /* The arena used for parser metadata (comments, diagnostics, etc.). */ pm_arena_t metadata_arena; - /** + /* * The next node identifier that will be assigned. This is a unique * identifier used to track nodes such that the syntax tree can be dropped * but the node can be found through another parse. */ uint32_t node_id; - /** The current state of the lexer. */ + /* The current state of the lexer. */ pm_lex_state_t lex_state; - /** Tracks the current nesting of (), [], and {}. */ + /* Tracks the current nesting of (), [], and {}. */ int enclosure_nesting; - /** + /* * Used to temporarily track the nesting of enclosures to determine if a { * is the beginning of a lambda following the parameters of a lambda. */ int lambda_enclosure_nesting; - /** + /* * Used to track the nesting of braces to ensure we get the correct value * when we are interpolating blocks with braces. */ int brace_nesting; - /** + /* * The stack used to determine if a do keyword belongs to the predicate of a * while, until, or for loop. */ pm_state_stack_t do_loop_stack; - /** + /* * The stack used to determine if a do keyword belongs to the beginning of a * block. */ pm_state_stack_t accepts_block_stack; - /** A stack of lex modes. */ + /* A stack of lex modes. */ struct { - /** The current mode of the lexer. */ + /* The current mode of the lexer. */ pm_lex_mode_t *current; - /** The stack of lexer modes. */ + /* The stack of lexer modes. */ pm_lex_mode_t stack[PM_LEX_STACK_SIZE]; - /** The current index into the lexer mode stack. */ + /* The current index into the lexer mode stack. */ size_t index; } lex_modes; - /** The pointer to the start of the source. */ + /* The pointer to the start of the source. */ const uint8_t *start; - /** The pointer to the end of the source. */ + /* The pointer to the end of the source. */ const uint8_t *end; - /** The previous token we were considering. */ + /* The previous token we were considering. */ pm_token_t previous; - /** The current token we're considering. */ + /* The current token we're considering. */ pm_token_t current; - /** + /* * This is a special field set on the parser when we need the parser to jump * to a specific location when lexing the next token, as opposed to just * using the end of the previous token. Normally this is NULL. */ const uint8_t *next_start; - /** + /* * This field indicates the end of a heredoc whose identifier was found on * the current line. If another heredoc is found on the same line, then this * will be moved forward to the end of that heredoc. If no heredocs are @@ -656,32 +651,32 @@ struct pm_parser_t { */ const uint8_t *heredoc_end; - /** The list of comments that have been found while parsing. */ + /* The list of comments that have been found while parsing. */ pm_list_t comment_list; - /** The list of magic comments that have been found while parsing. */ + /* The list of magic comments that have been found while parsing. */ pm_list_t magic_comment_list; - /** + /* * An optional location that represents the location of the __END__ marker * and the rest of the content of the file. This content is loaded into the * DATA constant when the file being parsed is the main file being executed. */ pm_location_t data_loc; - /** The list of warnings that have been found while parsing. */ + /* The list of warnings that have been found while parsing. */ pm_list_t warning_list; - /** The list of errors that have been found while parsing. */ + /* The list of errors that have been found while parsing. */ pm_list_t error_list; - /** The current local scope. */ + /* The current local scope. */ pm_scope_t *current_scope; - /** The current parsing context. */ + /* The current parsing context. */ pm_context_node_t *current_context; - /** + /* * The hash keys for the hash that is currently being parsed. This is not * usually necessary because it can pass it down the various call chains, * but in the event that you're parsing a hash that is being directly @@ -690,26 +685,26 @@ struct pm_parser_t { */ pm_static_literals_t *current_hash_keys; - /** + /* * The encoding functions for the current file is attached to the parser as * it's parsing so that it can change with a magic comment. */ const pm_encoding_t *encoding; - /** + /* * When the encoding that is being used to parse the source is changed by * prism, we provide the ability here to call out to a user-defined * function. */ pm_encoding_changed_callback_t encoding_changed_callback; - /** + /* * This pointer indicates where a comment must start if it is to be * considered an encoding comment. */ const uint8_t *encoding_comment_start; - /** + /* * When you are lexing through a file, the lexer needs all of the information * that the parser additionally provides (for example, the local table). So if * you want to properly lex Ruby, you need to actually lex it in the context of @@ -718,14 +713,14 @@ struct pm_parser_t { * callback when each token is lexed. */ struct { - /** + /* * This is the callback that is called when a token is lexed. It is * passed the opaque data pointer, the parser, and the token that was * lexed. */ pm_lex_callback_t callback; - /** + /* * This opaque pointer is used to provide whatever information the user * deemed necessary to the callback. In our case we use it to pass the * array that the tokens get appended into. @@ -733,32 +728,32 @@ struct pm_parser_t { void *data; } lex_callback; - /** + /* * This is the path of the file being parsed. We use the filepath when * constructing SourceFileNodes. */ pm_string_t filepath; - /** + /* * This constant pool keeps all of the constants defined throughout the file * so that we can reference them later. */ pm_constant_pool_t constant_pool; - /** This is the list of line offsets in the source file. */ + /* This is the list of line offsets in the source file. */ pm_line_offset_list_t line_offsets; - /** + /* * State communicated from the lexer to the parser for integer tokens. */ struct { - /** + /* * A flag indicating the base of the integer (binary, octal, decimal, * hexadecimal). Set during lexing and read during node creation. */ pm_node_flags_t base; - /** + /* * When lexing a decimal integer that fits in a uint32_t, we compute * the value during lexing to avoid re-scanning the digits during * parsing. If lexed is true, this holds the result and @@ -766,23 +761,23 @@ struct pm_parser_t { */ uint32_t value; - /** Whether value holds a valid pre-computed integer. */ + /* Whether value holds a valid pre-computed integer. */ bool lexed; } integer; - /** + /* * This string is used to pass information from the lexer to the parser. It * is particularly necessary because of escape sequences. */ pm_string_t current_string; - /** + /* * The line number at the start of the parse. This will be used to offset * the line numbers of all of the locations. */ int32_t start_line; - /** + /* * When a string-like expression is being lexed, any byte or escape sequence * that resolves to a value whose top bit is set (i.e., >= 0x80) will * explicitly set the encoding to the same encoding as the source. @@ -813,7 +808,7 @@ struct pm_parser_t { */ const pm_encoding_t *explicit_encoding; - /** + /* * When parsing block exits (e.g., break, next, redo), we need to validate * that they are in correct contexts. For the most part we can do this by * looking at our parent contexts. However, modifier while and until @@ -826,13 +821,13 @@ struct pm_parser_t { */ pm_node_list_t *current_block_exits; - /** The version of prism that we should use to parse. */ + /* The version of prism that we should use to parse. */ pm_options_version_t version; - /** The command line flags given from the options. */ + /* The command line flags given from the options. */ uint8_t command_line; - /** + /* * Whether or not we have found a frozen_string_literal magic comment with * a true or false value. * May be: @@ -842,33 +837,33 @@ struct pm_parser_t { */ int8_t frozen_string_literal; - /** + /* * Whether or not we are parsing an eval string. This impacts whether or not * we should evaluate if block exits/yields are valid. */ bool parsing_eval; - /** + /* * Whether or not we are parsing a "partial" script, which is a script that * will be evaluated in the context of another script, so we should not * check jumps (next/break/etc.) for validity. */ bool partial_script; - /** Whether or not we're at the beginning of a command. */ + /* Whether or not we're at the beginning of a command. */ bool command_start; - /** + /* * Whether or not we're currently parsing the body of an endless method * definition. In this context, PM_TOKEN_KEYWORD_DO_BLOCK should not be * consumed by commands (it should bubble up to the outer context). */ bool in_endless_def_body; - /** Whether or not we're currently recovering from a syntax error. */ + /* Whether or not we're currently recovering from a syntax error. */ bool recovering; - /** + /* * Whether or not the source being parsed could become valid if more input * were appended. This is set to false when the parser encounters a token * that is definitively wrong (e.g., a stray `end` or `]`) as opposed to @@ -876,7 +871,7 @@ struct pm_parser_t { */ bool continuable; - /** + /* * This is very specialized behavior for when you want to parse in a context * that does not respect encoding comments. Its main use case is translating * into the whitequark/parser AST which re-encodes source files in UTF-8 @@ -884,76 +879,66 @@ struct pm_parser_t { */ bool encoding_locked; - /** + /* * Whether or not the encoding has been changed by a magic comment. We use * this to provide a fast path for the lexer instead of going through the * function pointer. */ bool encoding_changed; - /** + /* * This flag indicates that we are currently parsing a pattern matching * expression and impacts that calculation of newlines. */ bool pattern_matching_newlines; - /** This flag indicates that we are currently parsing a keyword argument. */ + /* This flag indicates that we are currently parsing a keyword argument. */ bool in_keyword_arg; - /** + /* * Whether or not the parser has seen a token that has semantic meaning * (i.e., a token that is not a comment or whitespace). */ bool semantic_token_seen; - /** + /* * By default, Ruby always warns about mismatched indentation. This can be * toggled with a magic comment. */ bool warn_mismatched_indentation; #if defined(PRISM_HAS_NEON) || defined(PRISM_HAS_SSSE3) || defined(PRISM_HAS_SWAR) - /** + /* * Cached lookup tables for pm_strpbrk's SIMD fast path. Avoids rebuilding * the nibble-based tables on every call when the charset hasn't changed * (which is the common case during string/regex/list lexing). */ struct { - /** The cached charset (null-terminated, max 11 chars + NUL). */ + /* The cached charset (null-terminated, max 11 chars + NUL). */ uint8_t charset[12]; - /** Nibble-based low lookup table for SIMD matching. */ + /* Nibble-based low lookup table for SIMD matching. */ uint8_t low_lut[16]; - /** Nibble-based high lookup table for SIMD matching. */ + /* Nibble-based high lookup table for SIMD matching. */ uint8_t high_lut[16]; - /** Scalar fallback table (4 x 64-bit bitmasks covering all ASCII). */ + /* Scalar fallback table (4 x 64-bit bitmasks covering all ASCII). */ uint64_t table[4]; } strpbrk_cache; #endif }; -/** +/* * Initialize a parser with the given start and end pointers. - * - * @param arena The arena to use for all AST-lifetime allocations. It is caller- - * owned and must outlive the parser. - * @param parser The parser to initialize. - * @param source The source to parse. - * @param size The size of the source. - * @param options The optional options to use when parsing. These options must - * live for the whole lifetime of this parser. */ void pm_parser_init(pm_arena_t *arena, pm_parser_t *parser, const uint8_t *source, size_t size, const pm_options_t *options); -/** +/* * Free the memory held by the given parser. * * This does not free the `pm_options_t` object that was used to initialize the * parser. - * - * @param parser The parser whose held memory should be freed. */ void pm_parser_cleanup(pm_parser_t *parser); diff --git a/include/prism/internal/regexp.h b/include/prism/internal/regexp.h index 7f4731967c..3710c984fc 100644 --- a/include/prism/internal/regexp.h +++ b/include/prism/internal/regexp.h @@ -1,64 +1,40 @@ -/** - * @file internal/regexp.h - * - * A regular expression parser. - */ #ifndef PRISM_INTERNAL_REGEXP_H #define PRISM_INTERNAL_REGEXP_H #include "prism/ast.h" #include "prism/parser.h" -/** +/* * Accumulation state for named capture groups found during regexp parsing. * The caller initializes this with the call node and passes it to * pm_regexp_parse. The regexp parser populates match and names as groups * are found. */ typedef struct { - /** The call node wrapping the regular expression node (for =~). */ + /* The call node wrapping the regular expression node (for =~). */ pm_call_node_t *call; - /** The match write node being built, or NULL if no captures found yet. */ + /* The match write node being built, or NULL if no captures found yet. */ pm_match_write_node_t *match; - /** The list of capture names found so far (for deduplication). */ + /* The list of capture names found so far (for deduplication). */ pm_constant_id_list_t names; } pm_regexp_name_data_t; -/** +/* * Callback invoked by pm_regexp_parse() for each named capture group found. - * - * @param parser The main parser. - * @param name The name of the capture group. - * @param shared Whether the source content is shared (impacts constant storage). - * @param data The accumulation state for named captures. */ typedef void (*pm_regexp_name_callback_t)(pm_parser_t *parser, const pm_string_t *name, bool shared, pm_regexp_name_data_t *data); -/** +/* * Parse a regular expression, validate its encoding, and optionally extract * named capture groups. Returns the encoding flags to set on the node. - * - * @param parser The parser that is currently being used. - * @param node The regular expression node to parse and validate. - * @param name_callback The optional callback to call when a named capture group is found. - * @param name_data The optional accumulation state for named captures. - * @return The encoding flags to set on the node (e.g., FORCED_UTF8_ENCODING). */ PRISM_EXPORTED_FUNCTION pm_node_flags_t pm_regexp_parse(pm_parser_t *parser, pm_regular_expression_node_t *node, pm_regexp_name_callback_t name_callback, pm_regexp_name_data_t *name_data); -/** +/* * Parse an interpolated regular expression for named capture groups only. * No encoding validation is performed. - * - * @param parser The parser that is currently being used. - * @param source The source content to parse. - * @param size The length of the source content. - * @param shared Whether the source points into the parser's source buffer. - * @param extended_mode Whether or not the regular expression is in extended mode. - * @param name_callback The callback to call when a named capture group is found. - * @param name_data The accumulation state for named captures. */ void pm_regexp_parse_named_captures(pm_parser_t *parser, const uint8_t *source, size_t size, bool shared, bool extended_mode, pm_regexp_name_callback_t name_callback, pm_regexp_name_data_t *name_data); diff --git a/include/prism/internal/serialize.h b/include/prism/internal/serialize.h index c691a1fed9..e611a0374b 100644 --- a/include/prism/internal/serialize.h +++ b/include/prism/internal/serialize.h @@ -1,6 +1,3 @@ -/** - * @file internal/serialize.h - */ #ifndef PRISM_INTERNAL_SERIALIZE_H #define PRISM_INTERNAL_SERIALIZE_H @@ -17,28 +14,18 @@ * PRISM_EXCLUDE_SERIALIZATION define. */ #ifndef PRISM_EXCLUDE_SERIALIZATION -/** +/* * Serialize the given list of comments to the given buffer. - * - * @param list The list of comments to serialize. - * @param buffer The buffer to serialize to. */ void pm_serialize_comment_list(pm_list_t *list, pm_buffer_t *buffer); -/** +/* * Serialize the name of the encoding to the buffer. - * - * @param encoding The encoding to serialize. - * @param buffer The buffer to serialize to. */ void pm_serialize_encoding(const pm_encoding_t *encoding, pm_buffer_t *buffer); -/** +/* * Serialize the encoding, metadata, nodes, and constant pool. - * - * @param parser The parser to serialize. - * @param node The node to serialize. - * @param buffer The buffer to serialize to. */ void pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer); diff --git a/include/prism/internal/static_literals.h b/include/prism/internal/static_literals.h index 0512313259..d59002ac0a 100644 --- a/include/prism/internal/static_literals.h +++ b/include/prism/internal/static_literals.h @@ -1,8 +1,3 @@ -/** - * @file internal/static_literals.h - * - * A set of static literal nodes that can be checked for duplicates. - */ #ifndef PRISM_INTERNAL_STATIC_LITERALS_H #define PRISM_INTERNAL_STATIC_LITERALS_H @@ -10,21 +5,21 @@ #include "prism/buffer.h" #include "prism/line_offset_list.h" -/** +/* * An internal hash table for a set of nodes. */ typedef struct { - /** The array of nodes in the hash table. */ + /* The array of nodes in the hash table. */ pm_node_t **nodes; - /** The size of the hash table. */ + /* The size of the hash table. */ uint32_t size; - /** The space that has been allocated in the hash table. */ + /* The space that has been allocated in the hash table. */ uint32_t capacity; } pm_node_hash_t; -/** +/* * Certain sets of nodes (hash keys and when clauses) check for duplicate nodes * to alert the user of potential issues. To do this, we keep a set of the nodes * that have been seen so far, and compare whenever we find a new node. @@ -33,87 +28,70 @@ typedef struct { * that need to be performed. */ typedef struct { - /** + /* * This is the set of IntegerNode and SourceLineNode instances. */ pm_node_hash_t integer_nodes; - /** + /* * This is the set of FloatNode instances. */ pm_node_hash_t float_nodes; - /** + /* * This is the set of RationalNode and ImaginaryNode instances. */ pm_node_hash_t number_nodes; - /** + /* * This is the set of StringNode and SourceFileNode instances. */ pm_node_hash_t string_nodes; - /** + /* * This is the set of RegularExpressionNode instances. */ pm_node_hash_t regexp_nodes; - /** + /* * This is the set of SymbolNode instances. */ pm_node_hash_t symbol_nodes; - /** + /* * A pointer to the last TrueNode instance that was inserted, or NULL. */ pm_node_t *true_node; - /** + /* * A pointer to the last FalseNode instance that was inserted, or NULL. */ pm_node_t *false_node; - /** + /* * A pointer to the last NilNode instance that was inserted, or NULL. */ pm_node_t *nil_node; - /** + /* * A pointer to the last SourceEncodingNode instance that was inserted, or * NULL. */ pm_node_t *source_encoding_node; } pm_static_literals_t; -/** +/* * Add a node to the set of static literals. - * - * @param line_offsets The list of newline offsets to use to calculate lines. - * @param start The start of the source being parsed. - * @param start_line The line number that the parser starts on. - * @param literals The set of static literals to add the node to. - * @param node The node to add to the set. - * @param replace Whether to replace the previous node if one already exists. - * @return A pointer to the node that is being overwritten, if there is one. */ pm_node_t * pm_static_literals_add(const pm_line_offset_list_t *line_offsets, const uint8_t *start, int32_t start_line, pm_static_literals_t *literals, pm_node_t *node, bool replace); -/** +/* * Free the internal memory associated with the given static literals set. - * - * @param literals The set of static literals to free. */ void pm_static_literals_free(pm_static_literals_t *literals); -/** +/* * Create a string-based representation of the given static literal. - * - * @param buffer The buffer to write the string to. - * @param line_offsets The list of newline offsets to use to calculate lines. - * @param start The start of the source being parsed. - * @param start_line The line number that the parser starts on. - * @param encoding_name The name of the encoding of the source being parsed. - * @param node The node to create a string representation of. */ void pm_static_literal_inspect(pm_buffer_t *buffer, const pm_line_offset_list_t *line_offsets, const uint8_t *start, int32_t start_line, const char *encoding_name, const pm_node_t *node); diff --git a/include/prism/internal/strings.h b/include/prism/internal/strings.h index f46aa86a81..0199ed8d67 100644 --- a/include/prism/internal/strings.h +++ b/include/prism/internal/strings.h @@ -1,54 +1,35 @@ -/** - * @file internal/strings.h - * - * A generic string type that can have various ownership semantics. - */ #ifndef PRISM_INTERNAL_STRINGS_H #define PRISM_INTERNAL_STRINGS_H #include "prism/strings.h" -/** +/* * Defines an empty string. This is useful for initializing a string that will * be filled in later. */ #define PM_STRING_EMPTY ((pm_string_t) { .type = PM_STRING_CONSTANT, .source = NULL, .length = 0 }) -/** +/* * Initialize a shared string that is based on initial input. - * - * @param string The string to initialize. - * @param start The start of the string. - * @param end The end of the string. */ void pm_string_shared_init(pm_string_t *string, const uint8_t *start, const uint8_t *end); -/** +/* * Initialize an owned string that is responsible for freeing allocated memory. - * - * @param string The string to initialize. - * @param source The source of the string. - * @param length The length of the string. */ void pm_string_owned_init(pm_string_t *string, uint8_t *source, size_t length); -/** +/* * Ensure the string is owned. If it is not, then reinitialize it as owned and * copy over the previous source. - * - * @param string The string to ensure is owned. */ void pm_string_ensure_owned(pm_string_t *string); -/** +/* * Compare the underlying lengths and bytes of two strings. Returns 0 if the * strings are equal, a negative number if the left string is less than the * right string, and a positive number if the left string is greater than the * right string. - * - * @param left The left string to compare. - * @param right The right string to compare. - * @return The comparison result. */ int pm_string_compare(const pm_string_t *left, const pm_string_t *right); diff --git a/include/prism/internal/strncasecmp.h b/include/prism/internal/strncasecmp.h index c6cabe9c23..775f6a993e 100644 --- a/include/prism/internal/strncasecmp.h +++ b/include/prism/internal/strncasecmp.h @@ -1,15 +1,10 @@ -/** - * @file internal/strncasecmp.h - * - * A custom strncasecmp implementation. - */ #ifndef PRISM_INTERNAL_STRNCASECMP_H #define PRISM_INTERNAL_STRNCASECMP_H #include #include -/** +/* * Compare two strings, ignoring case, up to the given length. Returns 0 if the * strings are equal, a negative number if string1 is less than string2, or a * positive number if string1 is greater than string2. @@ -17,12 +12,6 @@ * Note that this is effectively our own implementation of strncasecmp, but it's * not available on all of the platforms we want to support so we're rolling it * here. - * - * @param string1 The first string to compare. - * @param string2 The second string to compare - * @param length The maximum number of characters to compare. - * @return 0 if the strings are equal, a negative number if string1 is less than - * string2, or a positive number if string1 is greater than string2. */ int pm_strncasecmp(const uint8_t *string1, const uint8_t *string2, size_t length); diff --git a/include/prism/internal/strpbrk.h b/include/prism/internal/strpbrk.h index ca5692d25c..124c528cb3 100644 --- a/include/prism/internal/strpbrk.h +++ b/include/prism/internal/strpbrk.h @@ -1,8 +1,3 @@ -/** - * @file internal/strpbrk.h - * - * A custom strpbrk implementation. - */ #ifndef PRISM_INTERNAL_STRPBRK_H #define PRISM_INTERNAL_STRPBRK_H @@ -11,7 +6,7 @@ #include #include -/** +/* * Here we have rolled our own version of strpbrk. The standard library strpbrk * has undefined behavior when the source string is not null-terminated. We want * to support strings that are not null-terminated because pm_parse does not @@ -29,15 +24,6 @@ * characters that are trailing bytes of multi-byte characters. For example, in * Shift-JIS, the backslash character can be a trailing byte. In that case we * need to take a slower path and iterate one multi-byte character at a time. - * - * @param parser The parser. - * @param source The source to search. - * @param charset The charset to search for. - * @param length The maximum number of bytes to search. - * @param validate Whether to validate that the source string is valid in the - * current encoding of the parser. - * @return A pointer to the first character in the source string that is in the - * charset, or NULL if no such character exists. */ const uint8_t * pm_strpbrk(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length, bool validate); diff --git a/include/prism/internal/tokens.h b/include/prism/internal/tokens.h index 05651bf5c8..3a983e54ae 100644 --- a/include/prism/internal/tokens.h +++ b/include/prism/internal/tokens.h @@ -1,16 +1,10 @@ -/** - * @file internal/tokens.h - */ #ifndef PRISM_INTERNAL_TOKENS_H #define PRISM_INTERNAL_TOKENS_H #include "prism/ast.h" -/** +/* * Returns the human name of the given token type. - * - * @param token_type The token type to convert to a human name. - * @return The human name of the given token type. */ const char * pm_token_str(pm_token_type_t token_type); diff --git a/include/prism/line_offset_list.h b/include/prism/line_offset_list.h index e839862fea..848bc49139 100644 --- a/include/prism/line_offset_list.h +++ b/include/prism/line_offset_list.h @@ -54,7 +54,7 @@ typedef struct { * @param list The list to search. * @param cursor The offset to search for. * @param start_line The line to start counting from. - * @return The line and column of the given offset. + * @returns The line and column of the given offset. */ PRISM_EXPORTED_FUNCTION pm_line_column_t pm_line_offset_list_line_column(const pm_line_offset_list_t *list, uint32_t cursor, int32_t start_line) PRISM_NONNULL(1); diff --git a/include/prism/magic_comments.h b/include/prism/magic_comments.h index 4941e94885..6d47cda985 100644 --- a/include/prism/magic_comments.h +++ b/include/prism/magic_comments.h @@ -17,17 +17,17 @@ typedef struct pm_magic_comment_t pm_magic_comment_t; /** * Returns the location of the key associated with the given magic comment. * - * @param comment the magic comment whose key location we want to get - * @return the location of the key associated with the given magic comment + * @param magic_comment the magic comment whose key location we want to get + * @returns the location of the key associated with the given magic comment */ -PRISM_EXPORTED_FUNCTION pm_location_t pm_magic_comment_key(const pm_magic_comment_t *comment) PRISM_NONNULL(1); +PRISM_EXPORTED_FUNCTION pm_location_t pm_magic_comment_key(const pm_magic_comment_t *magic_comment) PRISM_NONNULL(1); /** * Returns the location of the value associated with the given magic comment. * - * @param comment the magic comment whose value location we want to get - * @return the location of the value associated with the given magic comment + * @param magic_comment the magic comment whose value location we want to get + * @returns the location of the value associated with the given magic comment */ -PRISM_EXPORTED_FUNCTION pm_location_t pm_magic_comment_value(const pm_magic_comment_t *comment) PRISM_NONNULL(1); +PRISM_EXPORTED_FUNCTION pm_location_t pm_magic_comment_value(const pm_magic_comment_t *magic_comment) PRISM_NONNULL(1); #endif diff --git a/include/prism/node.h b/include/prism/node.h index 7db4dcd891..75bc3c9b2d 100644 --- a/include/prism/node.h +++ b/include/prism/node.h @@ -22,7 +22,7 @@ * Returns a string representation of the given node type. * * @param node_type The node type to convert to a string. - * @return A string representation of the given node type. + * @returns A string representation of the given node type. */ PRISM_EXPORTED_FUNCTION const char * pm_node_type(pm_node_type_t node_type); diff --git a/include/prism/options.h b/include/prism/options.h index 10834f28e7..1c7281d599 100644 --- a/include/prism/options.h +++ b/include/prism/options.h @@ -110,10 +110,8 @@ static const uint8_t PM_OPTIONS_COMMAND_LINE_X = 0x20; * Allocate a new options struct. If the options struct cannot be allocated, * this function aborts the process. * - * @return A new options struct with default values. It is the responsibility of - * the caller to free this struct using pm_options_free(). - * - * \public \memberof pm_options + * @returns A new options struct with default values. It is the responsibility + * of the caller to free this struct using pm_options_free(). */ PRISM_EXPORTED_FUNCTION pm_options_t * pm_options_new(void) PRISM_NODISCARD; @@ -121,8 +119,6 @@ PRISM_EXPORTED_FUNCTION pm_options_t * pm_options_new(void) PRISM_NODISCARD; * Free both the held memory of the given options struct and the struct itself. * * @param options The options struct to free. - * - * \public \memberof pm_options */ PRISM_EXPORTED_FUNCTION void pm_options_free(pm_options_t *options) PRISM_NONNULL(1); @@ -133,8 +129,6 @@ PRISM_EXPORTED_FUNCTION void pm_options_free(pm_options_t *options) PRISM_NONNUL * @param shebang_callback The shebang callback to set. * @param shebang_callback_data Any additional data that should be passed along * to the callback. - * - * \public \memberof pm_options */ PRISM_EXPORTED_FUNCTION void pm_options_shebang_callback_set(pm_options_t *options, pm_options_shebang_callback_t shebang_callback, void *shebang_callback_data) PRISM_NONNULL(1); @@ -142,9 +136,7 @@ PRISM_EXPORTED_FUNCTION void pm_options_shebang_callback_set(pm_options_t *optio * Get the filepath option on the given options struct. * * @param options The options struct to get the filepath from. - * @return The filepath. - * - * \public \memberof pm_options + * @returns The filepath. */ PRISM_EXPORTED_FUNCTION const pm_string_t * pm_options_filepath(const pm_options_t *options) PRISM_NONNULL(1); @@ -153,8 +145,6 @@ PRISM_EXPORTED_FUNCTION const pm_string_t * pm_options_filepath(const pm_options * * @param options The options struct to set the filepath on. * @param filepath The filepath to set. - * - * \public \memberof pm_options */ PRISM_EXPORTED_FUNCTION void pm_options_filepath_set(pm_options_t *options, const char *filepath) PRISM_NONNULL(1); @@ -163,8 +153,6 @@ PRISM_EXPORTED_FUNCTION void pm_options_filepath_set(pm_options_t *options, cons * * @param options The options struct to set the line on. * @param line The line to set. - * - * \public \memberof pm_options */ PRISM_EXPORTED_FUNCTION void pm_options_line_set(pm_options_t *options, int32_t line) PRISM_NONNULL(1); @@ -173,8 +161,6 @@ PRISM_EXPORTED_FUNCTION void pm_options_line_set(pm_options_t *options, int32_t * * @param options The options struct to set the encoding on. * @param encoding The encoding to set. - * - * \public \memberof pm_options */ PRISM_EXPORTED_FUNCTION void pm_options_encoding_set(pm_options_t *options, const char *encoding) PRISM_NONNULL(1); @@ -183,8 +169,6 @@ PRISM_EXPORTED_FUNCTION void pm_options_encoding_set(pm_options_t *options, cons * * @param options The options struct to set the encoding_locked value on. * @param encoding_locked The encoding_locked value to set. - * - * \public \memberof pm_options */ PRISM_EXPORTED_FUNCTION void pm_options_encoding_locked_set(pm_options_t *options, bool encoding_locked) PRISM_NONNULL(1); @@ -193,8 +177,6 @@ PRISM_EXPORTED_FUNCTION void pm_options_encoding_locked_set(pm_options_t *option * * @param options The options struct to set the frozen string literal value on. * @param frozen_string_literal The frozen string literal value to set. - * - * \public \memberof pm_options */ PRISM_EXPORTED_FUNCTION void pm_options_frozen_string_literal_set(pm_options_t *options, bool frozen_string_literal) PRISM_NONNULL(1); @@ -203,8 +185,6 @@ PRISM_EXPORTED_FUNCTION void pm_options_frozen_string_literal_set(pm_options_t * * * @param options The options struct to set the command line option on. * @param command_line The command_line value to set. - * - * \public \memberof pm_options */ PRISM_EXPORTED_FUNCTION void pm_options_command_line_set(pm_options_t *options, uint8_t command_line) PRISM_NONNULL(1); @@ -216,9 +196,7 @@ PRISM_EXPORTED_FUNCTION void pm_options_command_line_set(pm_options_t *options, * @param options The options struct to set the version on. * @param version The version to set. * @param length The length of the version string. - * @return Whether or not the version was parsed successfully. - * - * \public \memberof pm_options + * @returns Whether or not the version was parsed successfully. */ PRISM_EXPORTED_FUNCTION bool pm_options_version_set(pm_options_t *options, const char *version, size_t length) PRISM_NONNULL(1); @@ -227,8 +205,6 @@ PRISM_EXPORTED_FUNCTION bool pm_options_version_set(pm_options_t *options, const * Ruby that prism supports. * * @param options The options struct to set the version on. - * - * \public \memberof pm_options */ PRISM_EXPORTED_FUNCTION void pm_options_version_set_lowest(pm_options_t *options) PRISM_NONNULL(1); @@ -237,8 +213,6 @@ PRISM_EXPORTED_FUNCTION void pm_options_version_set_lowest(pm_options_t *options * Ruby that prism supports. * * @param options The options struct to set the version on. - * - * \public \memberof pm_options */ PRISM_EXPORTED_FUNCTION void pm_options_version_set_highest(pm_options_t *options) PRISM_NONNULL(1); @@ -247,8 +221,6 @@ PRISM_EXPORTED_FUNCTION void pm_options_version_set_highest(pm_options_t *option * * @param options The options struct to set the main script value on. * @param main_script The main script value to set. - * - * \public \memberof pm_options */ PRISM_EXPORTED_FUNCTION void pm_options_main_script_set(pm_options_t *options, bool main_script) PRISM_NONNULL(1); @@ -257,8 +229,6 @@ PRISM_EXPORTED_FUNCTION void pm_options_main_script_set(pm_options_t *options, b * * @param options The options struct to set the partial script value on. * @param partial_script The partial script value to set. - * - * \public \memberof pm_options */ PRISM_EXPORTED_FUNCTION void pm_options_partial_script_set(pm_options_t *options, bool partial_script) PRISM_NONNULL(1); @@ -266,8 +236,7 @@ PRISM_EXPORTED_FUNCTION void pm_options_partial_script_set(pm_options_t *options * Get the freeze option on the given options struct. * * @param options The options struct to get the freeze value from. - * - * \public \memberof pm_options + * @returns The freeze value. */ PRISM_EXPORTED_FUNCTION bool pm_options_freeze(const pm_options_t *options) PRISM_NONNULL(1); @@ -276,8 +245,6 @@ PRISM_EXPORTED_FUNCTION bool pm_options_freeze(const pm_options_t *options) PRIS * * @param options The options struct to set the freeze value on. * @param freeze The freeze value to set. - * - * \public \memberof pm_options */ PRISM_EXPORTED_FUNCTION void pm_options_freeze_set(pm_options_t *options, bool freeze) PRISM_NONNULL(1); @@ -286,9 +253,7 @@ PRISM_EXPORTED_FUNCTION void pm_options_freeze_set(pm_options_t *options, bool f * * @param options The options struct to initialize the scopes array on. * @param scopes_count The number of scopes to allocate. - * @return Whether or not the scopes array was initialized successfully. - * - * \public \memberof pm_options + * @returns Whether or not the scopes array was initialized successfully. */ PRISM_EXPORTED_FUNCTION bool pm_options_scopes_init(pm_options_t *options, size_t scopes_count) PRISM_NONNULL(1); @@ -298,9 +263,7 @@ PRISM_EXPORTED_FUNCTION bool pm_options_scopes_init(pm_options_t *options, size_ * * @param options The options struct to get the scope from. * @param index The index of the scope to get. - * @return A constant pointer to the scope at the given index. - * - * \public \memberof pm_options + * @returns A constant pointer to the scope at the given index. */ PRISM_EXPORTED_FUNCTION const pm_options_scope_t * pm_options_scope(const pm_options_t *options, size_t index) PRISM_NONNULL(1); @@ -310,9 +273,7 @@ PRISM_EXPORTED_FUNCTION const pm_options_scope_t * pm_options_scope(const pm_opt * * @param options The options struct to get the scope from. * @param index The index of the scope to get. - * @return A mutable pointer to the scope at the given index. - * - * \public \memberof pm_options + * @returns A mutable pointer to the scope at the given index. */ PRISM_EXPORTED_FUNCTION pm_options_scope_t * pm_options_scope_mut(pm_options_t *options, size_t index) PRISM_NONNULL(1); @@ -323,8 +284,6 @@ PRISM_EXPORTED_FUNCTION pm_options_scope_t * pm_options_scope_mut(pm_options_t * * * @param scope The scope struct to initialize. * @param locals_count The number of locals to allocate. - * - * \public \memberof pm_options */ PRISM_EXPORTED_FUNCTION void pm_options_scope_init(pm_options_scope_t *scope, size_t locals_count) PRISM_NONNULL(1); @@ -334,9 +293,7 @@ PRISM_EXPORTED_FUNCTION void pm_options_scope_init(pm_options_scope_t *scope, si * * @param scope The scope struct to get the local from. * @param index The index of the local to get. - * @return A constant pointer to the local at the given index. - * - * \public \memberof pm_options + * @returns A constant pointer to the local at the given index. */ PRISM_EXPORTED_FUNCTION const pm_string_t * pm_options_scope_local(const pm_options_scope_t *scope, size_t index) PRISM_NONNULL(1); @@ -346,9 +303,7 @@ PRISM_EXPORTED_FUNCTION const pm_string_t * pm_options_scope_local(const pm_opti * * @param scope The scope struct to get the local from. * @param index The index of the local to get. - * @return A mutable pointer to the local at the given index. - * - * \public \memberof pm_options + * @returns A mutable pointer to the local at the given index. */ PRISM_EXPORTED_FUNCTION pm_string_t * pm_options_scope_local_mut(pm_options_scope_t *scope, size_t index) PRISM_NONNULL(1); @@ -357,8 +312,6 @@ PRISM_EXPORTED_FUNCTION pm_string_t * pm_options_scope_local_mut(pm_options_scop * * @param scope The scope struct to set the forwarding on. * @param forwarding The forwarding value to set. - * - * \public \memberof pm_options */ PRISM_EXPORTED_FUNCTION void pm_options_scope_forwarding_set(pm_options_scope_t *scope, uint8_t forwarding) PRISM_NONNULL(1); diff --git a/include/prism/parser.h b/include/prism/parser.h index 6d9efa6485..6bf0672a0e 100644 --- a/include/prism/parser.h +++ b/include/prism/parser.h @@ -30,10 +30,8 @@ typedef struct pm_parser_t pm_parser_t; * @param size The size of the source. * @param options The optional options to use when parsing. These options must * live for the whole lifetime of this parser. - * @return The initialized parser. It is the responsibility of the caller to + * @returns The initialized parser. It is the responsibility of the caller to * free the parser with `pm_parser_free()`. - * - * \public \memberof pm_parser */ PRISM_EXPORTED_FUNCTION pm_parser_t * pm_parser_new(pm_arena_t *arena, const uint8_t *source, size_t size, const pm_options_t *options) PRISM_NODISCARD PRISM_NONNULL(1); @@ -41,8 +39,6 @@ PRISM_EXPORTED_FUNCTION pm_parser_t * pm_parser_new(pm_arena_t *arena, const uin * Free both the memory held by the given parser and the parser itself. * * @param parser The parser to free. - * - * \public \memberof pm_parser */ PRISM_EXPORTED_FUNCTION void pm_parser_free(pm_parser_t *parser) PRISM_NONNULL(1); @@ -64,8 +60,6 @@ typedef void (*pm_lex_callback_t)(pm_parser_t *parser, pm_token_t *token, void * * * @param parser The parser to register the callback with. * @param callback The callback to register. - * - * \public \memberof pm_parser */ PRISM_EXPORTED_FUNCTION void pm_parser_encoding_changed_callback_set(pm_parser_t *parser, pm_encoding_changed_callback_t callback) PRISM_NONNULL(1); @@ -75,8 +69,6 @@ PRISM_EXPORTED_FUNCTION void pm_parser_encoding_changed_callback_set(pm_parser_t * @param parser The parser to register the callback with. * @param data The opaque data to pass to the callback when it is called. * @param callback The callback to register. - * - * \public \memberof pm_parser */ PRISM_EXPORTED_FUNCTION void pm_parser_lex_callback_set(pm_parser_t *parser, pm_lex_callback_t callback, void *data) PRISM_NONNULL(1); @@ -84,7 +76,7 @@ PRISM_EXPORTED_FUNCTION void pm_parser_lex_callback_set(pm_parser_t *parser, pm_ * Returns the opaque data that is passed to the lex callback when it is called. * * @param parser The parser whose lex callback data we want to get. - * @return The opaque data that is passed to the lex callback when it is called. + * @returns The opaque data that is passed to the lex callback when it is called. */ PRISM_EXPORTED_FUNCTION void * pm_parser_lex_callback_data(const pm_parser_t *parser) PRISM_NONNULL(1); @@ -92,7 +84,7 @@ PRISM_EXPORTED_FUNCTION void * pm_parser_lex_callback_data(const pm_parser_t *pa * Returns the raw pointer to the start of the source that is being parsed. * * @param parser the parser whose start pointer we want to get - * @return the raw pointer to the start of the source that is being parsed + * @returns the raw pointer to the start of the source that is being parsed */ PRISM_EXPORTED_FUNCTION const uint8_t * pm_parser_start(const pm_parser_t *parser) PRISM_NONNULL(1); @@ -100,7 +92,7 @@ PRISM_EXPORTED_FUNCTION const uint8_t * pm_parser_start(const pm_parser_t *parse * Returns the raw pointer to the end of the source that is being parsed. * * @param parser the parser whose end pointer we want to get - * @return the raw pointer to the end of the source that is being parsed + * @returns the raw pointer to the end of the source that is being parsed */ PRISM_EXPORTED_FUNCTION const uint8_t * pm_parser_end(const pm_parser_t *parser) PRISM_NONNULL(1); @@ -108,7 +100,7 @@ PRISM_EXPORTED_FUNCTION const uint8_t * pm_parser_end(const pm_parser_t *parser) * Returns the line that the parser was considered to have started on. * * @param parser the parser whose start line we want to get - * @return the line that the parser was considered to have started on + * @returns the line that the parser was considered to have started on */ PRISM_EXPORTED_FUNCTION int32_t pm_parser_start_line(const pm_parser_t *parser) PRISM_NONNULL(1); @@ -116,7 +108,7 @@ PRISM_EXPORTED_FUNCTION int32_t pm_parser_start_line(const pm_parser_t *parser) * Returns the name of the encoding that is being used to parse the source. * * @param parser the parser whose encoding name we want to get - * @return the name of the encoding that is being used to parse the source + * @returns the name of the encoding that is being used to parse the source */ PRISM_EXPORTED_FUNCTION const char * pm_parser_encoding_name(const pm_parser_t *parser) PRISM_NONNULL(1); @@ -124,7 +116,7 @@ PRISM_EXPORTED_FUNCTION const char * pm_parser_encoding_name(const pm_parser_t * * Returns the line offsets that are associated with the given parser. * * @param parser the parser whose line offsets we want to get - * @return the line offsets that are associated with the given parser + * @returns the line offsets that are associated with the given parser */ PRISM_EXPORTED_FUNCTION const pm_line_offset_list_t * pm_parser_line_offsets(const pm_parser_t *parser) PRISM_NONNULL(1); @@ -133,7 +125,7 @@ PRISM_EXPORTED_FUNCTION const pm_line_offset_list_t * pm_parser_line_offsets(con * given parser. * * @param parser the parser whose data location we want to get - * @return the location of the __DATA__ section that is associated with the + * @returns the location of the __DATA__ section that is associated with the * given parser. If it is unset, then the length will be set to 0. */ PRISM_EXPORTED_FUNCTION const pm_location_t * pm_parser_data_loc(const pm_parser_t *parser) PRISM_NONNULL(1); @@ -143,7 +135,7 @@ PRISM_EXPORTED_FUNCTION const pm_location_t * pm_parser_data_loc(const pm_parser * valid if more input were appended, as opposed to being definitively invalid. * * @param parser the parser whose continuable status we want to get - * @return whether the given parser is continuable + * @returns whether the given parser is continuable */ PRISM_EXPORTED_FUNCTION bool pm_parser_continuable(const pm_parser_t *parser) PRISM_NONNULL(1); @@ -154,7 +146,7 @@ PRISM_EXPORTED_FUNCTION bool pm_parser_continuable(const pm_parser_t *parser) PR * niche use cases. Most consumers should avoid this function. * * @param parser the parser whose lex state we want to get - * @return the lex state of the parser + * @returns the lex state of the parser */ PRISM_EXPORTED_FUNCTION int pm_parser_lex_state(const pm_parser_t *parser) PRISM_NONNULL(1); @@ -162,7 +154,7 @@ PRISM_EXPORTED_FUNCTION int pm_parser_lex_state(const pm_parser_t *parser) PRISM * Returns the number of comments associated with the given parser. * * @param parser the parser whose comments we want to get the size of - * @return the number of comments associated with the given parser + * @returns the number of comments associated with the given parser */ PRISM_EXPORTED_FUNCTION size_t pm_parser_comments_size(const pm_parser_t *parser) PRISM_NONNULL(1); @@ -188,7 +180,7 @@ PRISM_EXPORTED_FUNCTION void pm_parser_comments_each(const pm_parser_t *parser, * Returns the number of magic comments associated with the given parser. * * @param parser the parser whose magic comments we want to get the size of - * @return the number of magic comments associated with the given parser + * @returns the number of magic comments associated with the given parser */ PRISM_EXPORTED_FUNCTION size_t pm_parser_magic_comments_size(const pm_parser_t *parser) PRISM_NONNULL(1); @@ -214,7 +206,7 @@ PRISM_EXPORTED_FUNCTION void pm_parser_magic_comments_each(const pm_parser_t *pa * Returns the number of errors associated with the given parser. * * @param parser the parser whose errors we want to get the size of - * @return the number of errors associated with the given parser + * @returns the number of errors associated with the given parser */ PRISM_EXPORTED_FUNCTION size_t pm_parser_errors_size(const pm_parser_t *parser) PRISM_NONNULL(1); @@ -222,7 +214,7 @@ PRISM_EXPORTED_FUNCTION size_t pm_parser_errors_size(const pm_parser_t *parser) * Returns the number of warnings associated with the given parser. * * @param parser the parser whose warnings we want to get the size of - * @return the number of warnings associated with the given parser + * @returns the number of warnings associated with the given parser */ PRISM_EXPORTED_FUNCTION size_t pm_parser_warnings_size(const pm_parser_t *parser) PRISM_NONNULL(1); @@ -264,7 +256,7 @@ PRISM_EXPORTED_FUNCTION void pm_parser_warnings_each(const pm_parser_t *parser, * * @param parser the parser whose constant pool constants we want to get the * size of - * @return the number of constants in the constant pool associated with the + * @returns the number of constants in the constant pool associated with the * given parser */ PRISM_EXPORTED_FUNCTION size_t pm_parser_constants_size(const pm_parser_t *parser) PRISM_NONNULL(1); @@ -292,9 +284,7 @@ PRISM_EXPORTED_FUNCTION void pm_parser_constants_each(const pm_parser_t *parser, * Initiate the parser with the given parser. * * @param parser The parser to use. - * @return The AST representing the source. - * - * \public \memberof pm_parser + * @returns The AST representing the source. */ PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse(pm_parser_t *parser) PRISM_NONNULL(1); diff --git a/include/prism/serialize.h b/include/prism/serialize.h index ee94801f6e..dba54d75f1 100644 --- a/include/prism/serialize.h +++ b/include/prism/serialize.h @@ -88,7 +88,7 @@ PRISM_EXPORTED_FUNCTION void pm_serialize_parse_lex(pm_buffer_t *buffer, const u * @param source The source to parse. * @param size The size of the source. * @param data The optional data to pass to the parser. - * @return True if the source parses without errors or warnings. + * @returns True if the source parses without errors or warnings. */ PRISM_EXPORTED_FUNCTION bool pm_serialize_parse_success_p(const uint8_t *source, size_t size, const char *data) PRISM_NONNULL(1); diff --git a/include/prism/stream.h b/include/prism/stream.h index 47325d667d..7bb4271255 100644 --- a/include/prism/stream.h +++ b/include/prism/stream.h @@ -38,7 +38,7 @@ typedef int (pm_parse_stream_feof_t)(void *stream); * @param stream_fgets The function to use to read from the stream. * @param stream_feof The function to use to determine if the stream has hit eof. * @param options The optional options to use when parsing. - * @return The AST representing the source. + * @returns The AST representing the source. */ PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse_stream(pm_parser_t **parser, pm_arena_t *arena, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const pm_options_t *options) PRISM_NONNULL(1, 2, 3); diff --git a/include/prism/string_query.h b/include/prism/string_query.h index 406aa952a9..6ee1a9d9b6 100644 --- a/include/prism/string_query.h +++ b/include/prism/string_query.h @@ -33,7 +33,7 @@ typedef enum { * @param source The source to check. * @param length The length of the source. * @param encoding_name The name of the encoding of the source. - * @return PM_STRING_QUERY_TRUE if the query is true, PM_STRING_QUERY_FALSE if + * @returns PM_STRING_QUERY_TRUE if the query is true, PM_STRING_QUERY_FALSE if * the query is false, and PM_STRING_QUERY_ERROR if the encoding was invalid. */ PRISM_EXPORTED_FUNCTION pm_string_query_t pm_string_query_local(const uint8_t *source, size_t length, const char *encoding_name) PRISM_NONNULL(1, 3); @@ -44,7 +44,7 @@ PRISM_EXPORTED_FUNCTION pm_string_query_t pm_string_query_local(const uint8_t *s * @param source The source to check. * @param length The length of the source. * @param encoding_name The name of the encoding of the source. - * @return PM_STRING_QUERY_TRUE if the query is true, PM_STRING_QUERY_FALSE if + * @returns PM_STRING_QUERY_TRUE if the query is true, PM_STRING_QUERY_FALSE if * the query is false, and PM_STRING_QUERY_ERROR if the encoding was invalid. */ PRISM_EXPORTED_FUNCTION pm_string_query_t pm_string_query_constant(const uint8_t *source, size_t length, const char *encoding_name) PRISM_NONNULL(1, 3); @@ -55,7 +55,7 @@ PRISM_EXPORTED_FUNCTION pm_string_query_t pm_string_query_constant(const uint8_t * @param source The source to check. * @param length The length of the source. * @param encoding_name The name of the encoding of the source. - * @return PM_STRING_QUERY_TRUE if the query is true, PM_STRING_QUERY_FALSE if + * @returns PM_STRING_QUERY_TRUE if the query is true, PM_STRING_QUERY_FALSE if * the query is false, and PM_STRING_QUERY_ERROR if the encoding was invalid. */ PRISM_EXPORTED_FUNCTION pm_string_query_t pm_string_query_method_name(const uint8_t *source, size_t length, const char *encoding_name) PRISM_NONNULL(1, 3); diff --git a/include/prism/strings.h b/include/prism/strings.h index 48d3f9b0d9..55059eb307 100644 --- a/include/prism/strings.h +++ b/include/prism/strings.h @@ -45,7 +45,7 @@ typedef struct { * Returns the size of the pm_string_t struct. This is necessary to allocate the * correct amount of memory in the FFI backend. * - * @return The size of the pm_string_t struct. + * @returns The size of the pm_string_t struct. */ PRISM_EXPORTED_FUNCTION size_t pm_string_sizeof(void); @@ -93,9 +93,7 @@ typedef enum { * * @param string The string to initialize. * @param filepath The filepath to read. - * @return The success of the read, indicated by the value of the enum. - * - * \public \memberof pm_string_t + * @returns The success of the read, indicated by the value of the enum. */ PRISM_EXPORTED_FUNCTION pm_string_init_result_t pm_string_mapped_init(pm_string_t *string, const char *filepath) PRISM_NONNULL(1, 2); @@ -106,9 +104,7 @@ PRISM_EXPORTED_FUNCTION pm_string_init_result_t pm_string_mapped_init(pm_string_ * * @param string The string to initialize. * @param filepath The filepath to read. - * @return The success of the read, indicated by the value of the enum. - * - * \public \memberof pm_string_t + * @returns The success of the read, indicated by the value of the enum. */ PRISM_EXPORTED_FUNCTION pm_string_init_result_t pm_string_file_init(pm_string_t *string, const char *filepath) PRISM_NONNULL(1, 2); @@ -116,9 +112,7 @@ PRISM_EXPORTED_FUNCTION pm_string_init_result_t pm_string_file_init(pm_string_t * Returns the length associated with the string. * * @param string The string to get the length of. - * @return The length of the string. - * - * \public \memberof pm_string_t + * @returns The length of the string. */ PRISM_EXPORTED_FUNCTION size_t pm_string_length(const pm_string_t *string) PRISM_NONNULL(1); @@ -126,9 +120,7 @@ PRISM_EXPORTED_FUNCTION size_t pm_string_length(const pm_string_t *string) PRISM * Returns the start pointer associated with the string. * * @param string The string to get the start pointer of. - * @return The start pointer of the string. - * - * \public \memberof pm_string_t + * @returns The start pointer of the string. */ PRISM_EXPORTED_FUNCTION const uint8_t * pm_string_source(const pm_string_t *string) PRISM_NONNULL(1); @@ -136,8 +128,6 @@ PRISM_EXPORTED_FUNCTION const uint8_t * pm_string_source(const pm_string_t *stri * Free the associated memory of the given string. * * @param string The string to free. - * - * \public \memberof pm_string_t */ PRISM_EXPORTED_FUNCTION void pm_string_cleanup(pm_string_t *string) PRISM_NONNULL(1); diff --git a/templates/include/prism/ast.h.erb b/templates/include/prism/ast.h.erb index e1b233918d..1909618fc1 100644 --- a/templates/include/prism/ast.h.erb +++ b/templates/include/prism/ast.h.erb @@ -51,7 +51,7 @@ typedef struct { * Returns a string representation of the given token type. * * @param token_type The type of the token to get the string representation of. - * @return A string representation of the given token type. This is meant for + * @returns A string representation of the given token type. This is meant for * debugging purposes and is not guaranteed to be stable across versions. */ PRISM_EXPORTED_FUNCTION const char * pm_token_type(pm_token_type_t token_type); @@ -262,7 +262,7 @@ typedef enum pm_<%= flag.human %> { <%- node.fields.each do |field| -%> * @param <%= field.name %> <%= field.comment ? Prism::Template::Doxygen.verbatim(field.comment.lines.first.strip) : "The #{field.name} field." %> <%- end -%> - * @return The newly allocated and initialized node. + * @returns The newly allocated and initialized node. */ PRISM_EXPORTED_FUNCTION pm_<%= node.human %>_t * pm_<%= node.human %>_new(pm_arena_t *arena, uint32_t node_id, pm_node_flags_t flags, pm_location_t location<%= params.empty? ? "" : ", #{params.join(", ")}" %>); <%- end -%> diff --git a/templates/include/prism/internal/diagnostic.h.erb b/templates/include/prism/internal/diagnostic.h.erb index fcbc2b6a70..ee44ff5382 100644 --- a/templates/include/prism/internal/diagnostic.h.erb +++ b/templates/include/prism/internal/diagnostic.h.erb @@ -1,8 +1,3 @@ -/** - * @file internal/diagnostic.h - * - * A list of diagnostics generated during parsing. - */ #ifndef PRISM_INTERNAL_DIAGNOSTIC_H #define PRISM_INTERNAL_DIAGNOSTIC_H @@ -11,7 +6,7 @@ #include "prism/arena.h" #include "prism/diagnostic.h" -/** +/* * The diagnostic IDs of all of the diagnostics, used to communicate the types * of errors between the parser and the user. */ @@ -27,51 +22,38 @@ typedef enum { <%- end -%> } pm_diagnostic_id_t; -/** +/* * This struct represents a diagnostic generated during parsing. */ struct pm_diagnostic_t { - /** The embedded base node. */ + /* The embedded base node. */ pm_list_node_t node; - /** The location of the diagnostic in the source. */ + /* The location of the diagnostic in the source. */ pm_location_t location; - /** The ID of the diagnostic. */ + /* The ID of the diagnostic. */ pm_diagnostic_id_t diag_id; - /** The message associated with the diagnostic. */ + /* The message associated with the diagnostic. */ const char *message; - /** + /* * The level of the diagnostic, see `pm_error_level_t` and * `pm_warning_level_t` for possible values. */ uint8_t level; }; -/** +/* * Append a diagnostic to the given list of diagnostics that is using shared * memory for its message. - * - * @param arena The arena to allocate from. - * @param list The list to append to. - * @param start The source offset of the start of the diagnostic. - * @param length The length of the diagnostic. - * @param diag_id The diagnostic ID. */ void pm_diagnostic_list_append(pm_arena_t *arena, pm_list_t *list, uint32_t start, uint32_t length, pm_diagnostic_id_t diag_id); -/** +/* * Append a diagnostic to the given list of diagnostics that is using a format * string for its message. - * - * @param arena The arena to allocate from. - * @param list The list to append to. - * @param start The source offset of the start of the diagnostic. - * @param length The length of the diagnostic. - * @param diag_id The diagnostic ID. - * @param ... The arguments to the format string for the message. */ void pm_diagnostic_list_append_format(pm_arena_t *arena, pm_list_t *list, uint32_t start, uint32_t length, pm_diagnostic_id_t diag_id, ...); diff --git a/templates/template.rb b/templates/template.rb index 78c8ac1954..8f7734dd43 100755 --- a/templates/template.rb +++ b/templates/template.rb @@ -53,7 +53,7 @@ def self.escape(value) module Doxygen # Similar to /verbatim ... /endverbatim but doesn't wrap the result in a code block. def self.verbatim(value) - value.gsub(/[\.*%!`#<>_+-]/, '\\\\\0') + value.gsub(/[*%!`#<>_+@-]/, '\\\\\0') end end From b66fbf9f854a31145cbf689754869f51140c0b79 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 18 Mar 2026 14:54:23 -0400 Subject: [PATCH 082/100] Clean up rake build --- prism.gemspec | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/prism.gemspec b/prism.gemspec index 4c9b685427..5db6327813 100644 --- a/prism.gemspec +++ b/prism.gemspec @@ -55,6 +55,8 @@ Gem::Specification.new do |spec| "include/prism/compiler/force_inline.h", "include/prism/compiler/format.h", "include/prism/compiler/inline.h", + "include/prism/compiler/nodiscard.h", + "include/prism/compiler/nonnull.h", "include/prism/compiler/unused.h", "include/prism/internal/allocator.h", "include/prism/internal/allocator_debug.h", @@ -81,6 +83,7 @@ Gem::Specification.new do |spec| "include/prism/internal/strncasecmp.h", "include/prism/internal/strings.h", "include/prism/internal/strpbrk.h", + "include/prism/internal/tokens.h", "include/prism/arena.h", "include/prism/ast.h", "include/prism/buffer.h", @@ -89,12 +92,15 @@ Gem::Specification.new do |spec| "include/prism/diagnostic.h", "include/prism/excludes.h", "include/prism/integer.h", + "include/prism/json.h", "include/prism/line_offset_list.h", "include/prism/magic_comments.h", "include/prism/node.h", "include/prism/options.h", "include/prism/parser.h", "include/prism/prettyprint.h", + "include/prism/serialize.h", + "include/prism/stream.h", "include/prism/string_query.h", "include/prism/strings.h", "include/prism/version.h", @@ -193,6 +199,7 @@ Gem::Specification.new do |spec| "src/diagnostic.c", "src/encoding.c", "src/integer.c", + "src/json.c", "src/line_offset_list.c", "src/list.c", "src/memchr.c", From 26731ccc92962e13de90c2cc029d78e88e11c89d Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 18 Mar 2026 15:05:18 -0400 Subject: [PATCH 083/100] Make sure we have at least one declaration in TUs --- include/prism/compiler/unused.h | 4 ++-- src/encoding.c | 2 +- src/json.c | 9 ++++++++- src/prism.c | 4 ++-- src/static_literals.c | 6 +++--- src/strpbrk.c | 2 +- templates/src/prettyprint.c.erb | 9 ++++++++- templates/src/serialize.c.erb | 9 ++++++++- 8 files changed, 33 insertions(+), 12 deletions(-) diff --git a/include/prism/compiler/unused.h b/include/prism/compiler/unused.h index fced007f9b..6a9e125dde 100644 --- a/include/prism/compiler/unused.h +++ b/include/prism/compiler/unused.h @@ -10,9 +10,9 @@ * compiler-agnostic way. */ #if defined(__GNUC__) -# define PRISM_ATTRIBUTE_UNUSED __attribute__((unused)) +# define PRISM_UNUSED __attribute__((unused)) #else -# define PRISM_ATTRIBUTE_UNUSED +# define PRISM_UNUSED #endif #endif diff --git a/src/encoding.c b/src/encoding.c index 0425a2c5b8..c9c2e13056 100644 --- a/src/encoding.c +++ b/src/encoding.c @@ -4094,7 +4094,7 @@ pm_encoding_ascii_isupper_char(const uint8_t *b, ptrdiff_t n) { * matter what the codepoint, so this function is shared between them. */ static size_t -pm_encoding_single_char_width(PRISM_ATTRIBUTE_UNUSED const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) { +pm_encoding_single_char_width(PRISM_UNUSED const uint8_t *b, PRISM_UNUSED ptrdiff_t n) { return 1; } diff --git a/src/json.c b/src/json.c index 0d72ca8368..57e3fc07df 100644 --- a/src/json.c +++ b/src/json.c @@ -11,7 +11,14 @@ /* We optionally support dumping to JSON. For systems that don not want or need * this functionality, it can be turned off with the PRISM_EXCLUDE_JSON define. */ -#ifndef PRISM_EXCLUDE_JSON +#ifdef PRISM_EXCLUDE_JSON + +#include "prism/compiler/unused.h" + +void +pm_dump_json(PRISM_UNUSED pm_buffer_t *buffer, PRISM_UNUSED const pm_parser_t *parser, PRISM_UNUSED const pm_node_t *node) {} + +#else #include "prism/internal/buffer.h" #include "prism/internal/constant_pool.h" diff --git a/src/prism.c b/src/prism.c index 18249acc71..b6afa8bb15 100644 --- a/src/prism.c +++ b/src/prism.c @@ -454,7 +454,7 @@ lex_state_set(pm_parser_t *parser, pm_lex_state_t state) { #endif #if PM_DEBUG_LOGGING -PRISM_ATTRIBUTE_UNUSED static void +PRISM_UNUSED static void debug_state(pm_parser_t *parser) { fprintf(stderr, "STATE: "); bool first = true; @@ -1073,7 +1073,7 @@ pm_locals_reads(pm_locals_t *locals, pm_constant_id_t name) { * written but not read in certain contexts. */ static void -pm_locals_order(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, pm_locals_t *locals, pm_constant_id_list_t *list, bool toplevel) { +pm_locals_order(pm_parser_t *parser, pm_locals_t *locals, pm_constant_id_list_t *list, bool toplevel) { pm_constant_id_list_init_capacity(parser->arena, list, locals->size); // If we're still below the threshold for switching to a hash, then we only diff --git a/src/static_literals.c b/src/static_literals.c index c66fa7724a..0a8ef62b2c 100644 --- a/src/static_literals.c +++ b/src/static_literals.c @@ -285,7 +285,7 @@ pm_compare_integer_nodes(const pm_static_literals_metadata_t *metadata, const pm * A comparison function for comparing two FloatNode instances. */ static int -pm_compare_float_nodes(PRISM_ATTRIBUTE_UNUSED const pm_static_literals_metadata_t *metadata, const pm_node_t *left, const pm_node_t *right) { +pm_compare_float_nodes(PRISM_UNUSED const pm_static_literals_metadata_t *metadata, const pm_node_t *left, const pm_node_t *right) { const double left_value = ((const pm_float_node_t *) left)->value; const double right_value = ((const pm_float_node_t *) right)->value; return PM_NUMERIC_COMPARISON(left_value, right_value); @@ -344,7 +344,7 @@ pm_string_value(const pm_node_t *node) { * A comparison function for comparing two nodes that have attached strings. */ static int -pm_compare_string_nodes(PRISM_ATTRIBUTE_UNUSED const pm_static_literals_metadata_t *metadata, const pm_node_t *left, const pm_node_t *right) { +pm_compare_string_nodes(PRISM_UNUSED const pm_static_literals_metadata_t *metadata, const pm_node_t *left, const pm_node_t *right) { const pm_string_t *left_string = pm_string_value(left); const pm_string_t *right_string = pm_string_value(right); return pm_string_compare(left_string, right_string); @@ -354,7 +354,7 @@ pm_compare_string_nodes(PRISM_ATTRIBUTE_UNUSED const pm_static_literals_metadata * A comparison function for comparing two RegularExpressionNode instances. */ static int -pm_compare_regular_expression_nodes(PRISM_ATTRIBUTE_UNUSED const pm_static_literals_metadata_t *metadata, const pm_node_t *left, const pm_node_t *right) { +pm_compare_regular_expression_nodes(PRISM_UNUSED const pm_static_literals_metadata_t *metadata, const pm_node_t *left, const pm_node_t *right) { const pm_regular_expression_node_t *left_regexp = (const pm_regular_expression_node_t *) left; const pm_regular_expression_node_t *right_regexp = (const pm_regular_expression_node_t *) right; diff --git a/src/strpbrk.c b/src/strpbrk.c index 41ab8eec3e..383707eb72 100644 --- a/src/strpbrk.c +++ b/src/strpbrk.c @@ -257,7 +257,7 @@ scan_strpbrk_ascii(pm_parser_t *parser, const uint8_t *source, size_t maximum, c #else static PRISM_INLINE bool -scan_strpbrk_ascii(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, PRISM_ATTRIBUTE_UNUSED const uint8_t *source, PRISM_ATTRIBUTE_UNUSED size_t maximum, PRISM_ATTRIBUTE_UNUSED const uint8_t *charset, size_t *index) { +scan_strpbrk_ascii(PRISM_UNUSED pm_parser_t *parser, PRISM_UNUSED const uint8_t *source, PRISM_UNUSED size_t maximum, PRISM_UNUSED const uint8_t *charset, size_t *index) { *index = 0; return false; } diff --git a/templates/src/prettyprint.c.erb b/templates/src/prettyprint.c.erb index cede4b9d02..9992f51096 100644 --- a/templates/src/prettyprint.c.erb +++ b/templates/src/prettyprint.c.erb @@ -4,7 +4,14 @@ /* We optionally support pretty printing nodes. For systems that don't want or * need this functionality, it can be turned off with the * PRISM_EXCLUDE_PRETTYPRINT define. */ -#ifndef PRISM_EXCLUDE_PRETTYPRINT +#ifdef PRISM_EXCLUDE_PRETTYPRINT + +#include "prism/compiler/unused.h" + +void +pm_prettyprint(PRISM_UNUSED pm_buffer_t *buffer, PRISM_UNUSED const pm_parser_t *parser, PRISM_UNUSED const pm_node_t *node) {} + +#else #include "prism/compiler/inline.h" #include "prism/internal/buffer.h" diff --git a/templates/src/serialize.c.erb b/templates/src/serialize.c.erb index 0263ef56cc..8c28cb89fc 100644 --- a/templates/src/serialize.c.erb +++ b/templates/src/serialize.c.erb @@ -3,7 +3,14 @@ /* We optionally support serializing to a binary string. For systems that do not * want or need this functionality, it can be turned off with the * PRISM_EXCLUDE_SERIALIZATION define. */ -#ifndef PRISM_EXCLUDE_SERIALIZATION +#ifdef PRISM_EXCLUDE_SERIALIZATION + +#include "prism/compiler/unused.h" + +void +pm_serialize_lex(PRISM_UNUSED pm_buffer_t *buffer, PRISM_UNUSED const uint8_t *source, PRISM_UNUSED size_t size, PRISM_UNUSED const char *data) {} + +#else #include "prism/compiler/inline.h" From cab3fd8c03d1334069fe76f170095c93a47189ff Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 18 Mar 2026 15:24:08 -0400 Subject: [PATCH 084/100] Fix up rust side of the build --- include/prism/parser.h | 19 ++++ rust/ruby-prism-sys/build/main.rs | 48 ++++++++-- rust/ruby-prism-sys/tests/node_tests.rs | 24 ++--- rust/ruby-prism-sys/tests/parser_tests.rs | 95 +++++++++---------- rust/ruby-prism-sys/tests/utils_tests.rs | 4 +- rust/ruby-prism/build.rs | 7 +- rust/ruby-prism/src/lib.rs | 42 ++++---- rust/ruby-prism/src/node.rs | 19 ++-- rust/ruby-prism/src/parse_result/comments.rs | 86 ++++++++--------- .../src/parse_result/diagnostics.rs | 48 +++++----- rust/ruby-prism/src/parse_result/mod.rs | 81 ++++++++++------ src/parser.c | 17 ++++ 12 files changed, 292 insertions(+), 198 deletions(-) diff --git a/include/prism/parser.h b/include/prism/parser.h index 6bf0672a0e..78e6c189ac 100644 --- a/include/prism/parser.h +++ b/include/prism/parser.h @@ -112,6 +112,15 @@ PRISM_EXPORTED_FUNCTION int32_t pm_parser_start_line(const pm_parser_t *parser) */ PRISM_EXPORTED_FUNCTION const char * pm_parser_encoding_name(const pm_parser_t *parser) PRISM_NONNULL(1); +/** + * Returns the frozen string literal value of the parser, as determined by the + * frozen_string_literal magic comment or the option set on the parser. + * + * @param parser the parser whose frozen string literal value we want to get + * @returns -1 if disabled, 0 if unset, 1 if enabled + */ +PRISM_EXPORTED_FUNCTION int8_t pm_parser_frozen_string_literal(const pm_parser_t *parser) PRISM_NONNULL(1); + /** * Returns the line offsets that are associated with the given parser. * @@ -280,6 +289,16 @@ typedef void (*pm_constant_callback_t)(const pm_constant_t *constant, void *data */ PRISM_EXPORTED_FUNCTION void pm_parser_constants_each(const pm_parser_t *parser, pm_constant_callback_t callback, void *data) PRISM_NONNULL(1); +/** + * Returns a pointer to the constant at the given id in the constant pool + * associated with the given parser. + * + * @param parser the parser whose constant pool we want to look up from + * @param constant_id the id of the constant to look up (1-based) + * @returns a pointer to the constant at the given id + */ +PRISM_EXPORTED_FUNCTION const pm_constant_t * pm_parser_constant(const pm_parser_t *parser, pm_constant_id_t constant_id) PRISM_NONNULL(1); + /** * Initiate the parser with the given parser. * diff --git a/rust/ruby-prism-sys/build/main.rs b/rust/ruby-prism-sys/build/main.rs index bf9c8e2db7..722da06970 100644 --- a/rust/ruby-prism-sys/build/main.rs +++ b/rust/ruby-prism-sys/build/main.rs @@ -128,24 +128,42 @@ fn generate_bindings(ruby_include_path: &Path) -> bindgen::Bindings { .sort_semantically(true) // Structs .allowlist_type("pm_comment_t") + .allowlist_type("pm_constant_t") .allowlist_type("pm_diagnostic_t") - .allowlist_type("pm_list_t") + .allowlist_type("pm_error_level_t") + .allowlist_type("pm_line_column_t") + .allowlist_type("pm_line_offset_list_t") + .allowlist_type("pm_location_t") .allowlist_type("pm_magic_comment_t") .allowlist_type("pm_node_t") .allowlist_type("pm_node_type") .allowlist_type("pm_options_t") .allowlist_type("pm_options_scope_t") - .allowlist_type("pm_parser_t") .allowlist_type("pm_string_t") + .allowlist_type("pm_warning_level_t") .allowlist_type(r"^pm_\w+_node_t") .allowlist_type(r"^pm_\w+_flags") // Enums .rustified_non_exhaustive_enum("pm_comment_type_t") + .rustified_non_exhaustive_enum("pm_error_level_t") .rustified_non_exhaustive_enum(r"pm_\w+_flags") .rustified_non_exhaustive_enum("pm_node_type") + .rustified_non_exhaustive_enum("pm_warning_level_t") // Functions - .allowlist_function("pm_arena_cleanup") + .allowlist_function("pm_arena_free") + .allowlist_function("pm_arena_new") + .allowlist_function("pm_comment_location") + .allowlist_function("pm_comment_type") + .allowlist_function("pm_constant_length") + .allowlist_function("pm_constant_start") + .allowlist_function("pm_diagnostic_error_level") + .allowlist_function("pm_diagnostic_location") + .allowlist_function("pm_diagnostic_message") + .allowlist_function("pm_diagnostic_type") + .allowlist_function("pm_diagnostic_warning_level") .allowlist_function("pm_line_offset_list_line_column") + .allowlist_function("pm_magic_comment_key") + .allowlist_function("pm_magic_comment_value") .allowlist_function("pm_options_command_line_set") .allowlist_function("pm_options_encoding_locked_set") .allowlist_function("pm_options_encoding_set") @@ -157,17 +175,33 @@ fn generate_bindings(ruby_include_path: &Path) -> bindgen::Bindings { .allowlist_function("pm_options_new") .allowlist_function("pm_options_partial_script_set") .allowlist_function("pm_options_scope_forwarding_set") - .allowlist_function("pm_options_scope_mut") .allowlist_function("pm_options_scope_init") .allowlist_function("pm_options_scope_local_mut") + .allowlist_function("pm_options_scope_mut") .allowlist_function("pm_options_scopes_init") .allowlist_function("pm_options_version_set") .allowlist_function("pm_parse") - .allowlist_function("pm_parser_cleanup") - .allowlist_function("pm_parser_init") + .allowlist_function("pm_parser_comments_each") + .allowlist_function("pm_parser_comments_size") + .allowlist_function("pm_parser_constant") + .allowlist_function("pm_parser_constants_each") + .allowlist_function("pm_parser_constants_size") + .allowlist_function("pm_parser_data_loc") + .allowlist_function("pm_parser_errors_each") + .allowlist_function("pm_parser_errors_size") + .allowlist_function("pm_parser_free") + .allowlist_function("pm_parser_frozen_string_literal") + .allowlist_function("pm_parser_line_offsets") + .allowlist_function("pm_parser_magic_comments_each") + .allowlist_function("pm_parser_magic_comments_size") + .allowlist_function("pm_parser_new") + .allowlist_function("pm_parser_start") + .allowlist_function("pm_parser_start_line") + .allowlist_function("pm_parser_warnings_each") + .allowlist_function("pm_parser_warnings_size") .allowlist_function("pm_size_to_native") - .allowlist_function("pm_string_constant_init") .allowlist_function("pm_string_cleanup") + .allowlist_function("pm_string_constant_init") .allowlist_function("pm_string_length") .allowlist_function("pm_string_source") .allowlist_function("pm_version") diff --git a/rust/ruby-prism-sys/tests/node_tests.rs b/rust/ruby-prism-sys/tests/node_tests.rs index 73c8c04d30..31040668b7 100644 --- a/rust/ruby-prism-sys/tests/node_tests.rs +++ b/rust/ruby-prism-sys/tests/node_tests.rs @@ -1,29 +1,25 @@ -use std::{ffi::CString, mem::MaybeUninit}; +use std::ffi::CString; -use ruby_prism_sys::{pm_arena_cleanup, pm_arena_t, pm_node_type}; -use ruby_prism_sys::{pm_parse, pm_parser_cleanup, pm_parser_init, pm_parser_t}; +use ruby_prism_sys::{pm_arena_free, pm_arena_new, pm_node_type}; +use ruby_prism_sys::{pm_parse, pm_parser_free, pm_parser_new}; #[test] fn node_test() { - let mut arena = MaybeUninit::::zeroed(); - let mut parser = MaybeUninit::::uninit(); let code = CString::new("class Foo; end").unwrap(); unsafe { - pm_parser_init( - arena.as_mut_ptr(), - parser.as_mut_ptr(), + let arena = pm_arena_new(); + let parser = pm_parser_new( + arena, code.as_ptr().cast::(), code.as_bytes().len(), std::ptr::null(), ); + let node = pm_parse(parser); - let parser = parser.assume_init_mut(); - let parsed_node = pm_parse(parser); + assert_eq!((*node).type_, pm_node_type::PM_PROGRAM_NODE as u16); - assert_eq!((*parsed_node).type_, pm_node_type::PM_PROGRAM_NODE as u16); - - pm_parser_cleanup(parser); - pm_arena_cleanup(arena.as_mut_ptr()); + pm_parser_free(parser); + pm_arena_free(arena); } } diff --git a/rust/ruby-prism-sys/tests/parser_tests.rs b/rust/ruby-prism-sys/tests/parser_tests.rs index 7efb374ae8..0cfc234de2 100644 --- a/rust/ruby-prism-sys/tests/parser_tests.rs +++ b/rust/ruby-prism-sys/tests/parser_tests.rs @@ -1,14 +1,24 @@ -use std::{ - ffi::{CStr, CString}, - mem::MaybeUninit, - path::Path, -}; +use std::ffi::{CStr, CString}; +use std::path::Path; use ruby_prism_sys::{ - pm_arena_cleanup, pm_arena_t, pm_comment_t, pm_comment_type_t, pm_diagnostic_t, pm_parse, pm_parser_cleanup, - pm_parser_init, pm_parser_t, + pm_arena_free, pm_arena_new, pm_comment_location, pm_comment_type, pm_comment_type_t, pm_diagnostic_location, + pm_diagnostic_message, pm_parse, pm_parser_comments_each, pm_parser_errors_each, pm_parser_free, pm_parser_new, }; +unsafe extern "C" fn collect_comment(comment: *const ruby_prism_sys::pm_comment_t, data: *mut std::ffi::c_void) { + let vec = &mut *(data.cast::>()); + vec.push(comment); +} + +unsafe extern "C" fn collect_diagnostic( + diagnostic: *const ruby_prism_sys::pm_diagnostic_t, + data: *mut std::ffi::c_void, +) { + let vec = &mut *(data.cast::>()); + vec.push(diagnostic); +} + fn ruby_file_contents() -> (CString, usize) { let rust_path = Path::new(env!("CARGO_MANIFEST_DIR")); let ruby_file_path = rust_path.join("../../lib/prism.rb").canonicalize().unwrap(); @@ -22,86 +32,75 @@ fn ruby_file_contents() -> (CString, usize) { fn init_test() { let (ruby_file_contents, len) = ruby_file_contents(); let source = ruby_file_contents.as_ptr().cast::(); - let mut arena = MaybeUninit::::zeroed(); - let mut parser = MaybeUninit::::uninit(); unsafe { - pm_parser_init(arena.as_mut_ptr(), parser.as_mut_ptr(), source, len, std::ptr::null()); - let parser = parser.assume_init_mut(); + let arena = pm_arena_new(); + let parser = pm_parser_new(arena, source, len, std::ptr::null()); - pm_parser_cleanup(parser); - pm_arena_cleanup(arena.as_mut_ptr()); + pm_parser_free(parser); + pm_arena_free(arena); } } #[test] fn comments_test() { let source = CString::new("# Meow!").unwrap(); - let mut arena = MaybeUninit::::zeroed(); - let mut parser = MaybeUninit::::uninit(); unsafe { - pm_parser_init( - arena.as_mut_ptr(), - parser.as_mut_ptr(), + let arena = pm_arena_new(); + let parser = pm_parser_new( + arena, source.as_ptr().cast::(), source.as_bytes().len(), std::ptr::null(), ); - let parser = parser.assume_init_mut(); let _node = pm_parse(parser); - let comment_list = &parser.comment_list; - let comment = comment_list.head as *const pm_comment_t; - assert_eq!((*comment).type_, pm_comment_type_t::PM_COMMENT_INLINE); + let mut comments: Vec<*const ruby_prism_sys::pm_comment_t> = Vec::new(); + pm_parser_comments_each(parser, Some(collect_comment), (&raw mut comments).cast()); + + assert_eq!(comments.len(), 1); + let comment = comments[0]; + assert_eq!(pm_comment_type(comment), pm_comment_type_t::PM_COMMENT_INLINE); - let location = { - let start = (*comment).location.start; - let end = (*comment).location.start + (*comment).location.length; - start..end - }; - assert_eq!(location, 0..7); + let location = pm_comment_location(comment); + assert_eq!(location.start..location.start + location.length, 0..7); - pm_parser_cleanup(parser); - pm_arena_cleanup(arena.as_mut_ptr()); + pm_parser_free(parser); + pm_arena_free(arena); } } #[test] fn diagnostics_test() { let source = CString::new("class Foo;").unwrap(); - let mut arena = MaybeUninit::::zeroed(); - let mut parser = MaybeUninit::::uninit(); unsafe { - pm_parser_init( - arena.as_mut_ptr(), - parser.as_mut_ptr(), + let arena = pm_arena_new(); + let parser = pm_parser_new( + arena, source.as_ptr().cast::(), source.as_bytes().len(), std::ptr::null(), ); - let parser = parser.assume_init_mut(); let _node = pm_parse(parser); - let error_list = &parser.error_list; - assert!(!error_list.head.is_null()); + let mut errors: Vec<*const ruby_prism_sys::pm_diagnostic_t> = Vec::new(); + pm_parser_errors_each(parser, Some(collect_diagnostic), (&raw mut errors).cast()); + + assert!(!errors.is_empty()); + let error = errors[0]; - let error = error_list.head as *const pm_diagnostic_t; - let message = CStr::from_ptr((*error).message); + let message = CStr::from_ptr(pm_diagnostic_message(error)); assert_eq!( message.to_string_lossy(), "unexpected end-of-input, assuming it is closing the parent top level context" ); - let location = { - let start = (*error).location.start; - let end = (*error).location.start + (*error).location.length; - start..end - }; - assert_eq!(location, 10..10); + let location = pm_diagnostic_location(error); + assert_eq!(location.start..location.start + location.length, 10..10); - pm_parser_cleanup(parser); - pm_arena_cleanup(arena.as_mut_ptr()); + pm_parser_free(parser); + pm_arena_free(arena); } } diff --git a/rust/ruby-prism-sys/tests/utils_tests.rs b/rust/ruby-prism-sys/tests/utils_tests.rs index 20cce8ea2e..9b9db446c2 100644 --- a/rust/ruby-prism-sys/tests/utils_tests.rs +++ b/rust/ruby-prism-sys/tests/utils_tests.rs @@ -14,8 +14,8 @@ fn version_test() { mod string { use ruby_prism_sys::{ - pm_string_cleanup, pm_string_length, pm_string_source, pm_string_t, pm_string_t__bindgen_ty_1, PM_STRING_CONSTANT, - PM_STRING_MAPPED, PM_STRING_OWNED, PM_STRING_SHARED, + pm_string_cleanup, pm_string_length, pm_string_source, pm_string_t, pm_string_t__bindgen_ty_1, + PM_STRING_CONSTANT, PM_STRING_MAPPED, PM_STRING_OWNED, PM_STRING_SHARED, }; use super::*; diff --git a/rust/ruby-prism/build.rs b/rust/ruby-prism/build.rs index 8faad957ab..1a3bb45a17 100644 --- a/rust/ruby-prism/build.rs +++ b/rust/ruby-prism/build.rs @@ -235,7 +235,7 @@ fn write_node(file: &mut File, flags: &[Flags], node: &Node) -> Result<(), Box {{", node.name)?; writeln!(file, " /// The pointer to the parser this node came from.")?; - writeln!(file, " parser: NonNull,")?; + writeln!(file, " parser: *const pm_parser_t,")?; writeln!(file)?; writeln!(file, " /// The raw pointer to the node allocated by prism.")?; writeln!(file, " pointer: *mut pm{}_t,", struct_name(&node.name))?; @@ -554,7 +554,6 @@ fn write_bindings(config: &Config) -> Result<(), Box> { file, r" use std::marker::PhantomData; -use std::ptr::NonNull; #[allow(clippy::wildcard_imports)] use ruby_prism_sys::*; @@ -581,7 +580,7 @@ use crate::{{ConstantId, ConstantList, Integer, Location, NodeList}}; writeln!(file, " /// The `{}` node", node.name)?; writeln!(file, " {} {{", node.name)?; writeln!(file, " /// The pointer to the associated parser this node came from.")?; - writeln!(file, " parser: NonNull,")?; + writeln!(file, " parser: *const pm_parser_t,")?; writeln!(file)?; writeln!(file, " /// The raw pointer to the node allocated by prism.")?; writeln!(file, " pointer: *mut pm{}_t,", struct_name(&node.name))?; @@ -606,7 +605,7 @@ impl<'pr> Node<'pr> {{ /// #[allow(clippy::not_unsafe_ptr_arg_deref)] #[allow(clippy::cast_ptr_alignment)] - pub(crate) fn new(parser: NonNull, node: *mut pm_node_t) -> Self {{ + pub(crate) fn new(parser: *const pm_parser_t, node: *mut pm_node_t) -> Self {{ match unsafe {{ (*node).type_ }} {{" )?; diff --git a/rust/ruby-prism/src/lib.rs b/rust/ruby-prism/src/lib.rs index b841af7807..6337957d20 100644 --- a/rust/ruby-prism/src/lib.rs +++ b/rust/ruby-prism/src/lib.rs @@ -8,6 +8,8 @@ // that doesn't follow the clippy rules. We don't want to see those warnings. #[allow(clippy::too_many_lines, clippy::use_self)] mod bindings { + use std::ptr::NonNull; + // In `build.rs`, we generate bindings based on the config.yml file. Here is // where we pull in those bindings and make them part of our library. include!(concat!(env!("OUT_DIR"), "/bindings.rs")); @@ -18,7 +20,6 @@ mod node_ext; mod parse_result; use std::ffi::CString; -use std::mem::MaybeUninit; use std::ptr::NonNull; pub use self::bindings::*; @@ -27,8 +28,8 @@ pub use self::node_ext::{ConstantPathError, FullName}; pub use self::parse_result::{Comment, CommentType, Comments, Diagnostic, Diagnostics, Location, MagicComment, MagicComments, ParseResult}; use ruby_prism_sys::{ - pm_arena_t, pm_options_command_line_set, pm_options_encoding_locked_set, pm_options_encoding_set, pm_options_filepath_set, pm_options_free, pm_options_frozen_string_literal_set, pm_options_line_set, pm_options_main_script_set, pm_options_new, pm_options_partial_script_set, pm_options_scope_forwarding_set, - pm_options_scope_mut, pm_options_scope_init, pm_options_scope_local_mut, pm_options_scopes_init, pm_options_t, pm_options_version_set, pm_parse, pm_parser_init, pm_parser_t, pm_string_constant_init, + pm_arena_new, pm_options_command_line_set, pm_options_encoding_locked_set, pm_options_encoding_set, pm_options_filepath_set, pm_options_free, pm_options_frozen_string_literal_set, pm_options_line_set, pm_options_main_script_set, pm_options_new, pm_options_partial_script_set, + pm_options_scope_forwarding_set, pm_options_scope_init, pm_options_scope_local_mut, pm_options_scope_mut, pm_options_scopes_init, pm_options_t, pm_options_version_set, pm_parse, pm_parser_new, pm_string_constant_init, }; /// The version of Ruby syntax to parse with. @@ -51,11 +52,21 @@ impl Version { /// `Latest` passes `NULL` to get the default behavior. unsafe fn set_on(self, opts: *mut pm_options_t) { match self { - Version::Latest => { pm_options_version_set(opts, std::ptr::null(), 0); }, - Version::CRuby3_3 => { pm_options_version_set(opts, c"3.3".as_ptr(), 3); }, - Version::CRuby3_4 => { pm_options_version_set(opts, c"3.4".as_ptr(), 3); }, - Version::CRuby3_5 => { pm_options_version_set(opts, c"3.5".as_ptr(), 3); }, - Version::CRuby4_1 => { pm_options_version_set(opts, c"4.1".as_ptr(), 3); }, + Self::Latest => { + pm_options_version_set(opts, std::ptr::null(), 0); + }, + Self::CRuby3_3 => { + pm_options_version_set(opts, c"3.3".as_ptr(), 3); + }, + Self::CRuby3_4 => { + pm_options_version_set(opts, c"3.4".as_ptr(), 3); + }, + Self::CRuby3_5 => { + pm_options_version_set(opts, c"3.5".as_ptr(), 3); + }, + Self::CRuby4_1 => { + pm_options_version_set(opts, c"4.1".as_ptr(), 3); + }, } } } @@ -335,18 +346,9 @@ impl Drop for ParseOptions { /// /// `options` must be a valid pointer to a `pm_options_t` or null. unsafe fn parse_impl(source: &[u8], options: *const pm_options_t) -> ParseResult<'_> { - let mut arena = Box::new(MaybeUninit::::zeroed().assume_init()); - let uninit = Box::new(MaybeUninit::::uninit()); - let uninit = Box::into_raw(uninit); - - pm_parser_init(arena.as_mut(), (*uninit).as_mut_ptr(), source.as_ptr(), source.len(), options); - - let parser = (*uninit).assume_init_mut(); - let parser = NonNull::new_unchecked(parser); - - let node = pm_parse(parser.as_ptr()); - let node = NonNull::new_unchecked(node); - + let arena = pm_arena_new(); + let parser = pm_parser_new(arena, source.as_ptr(), source.len(), options); + let node = NonNull::new_unchecked(pm_parse(parser)); ParseResult::new(source, arena, parser, node) } diff --git a/rust/ruby-prism/src/node.rs b/rust/ruby-prism/src/node.rs index cf44a119dc..e797c94b2e 100644 --- a/rust/ruby-prism/src/node.rs +++ b/rust/ruby-prism/src/node.rs @@ -18,7 +18,7 @@ use crate::Node; /// An iterator over the nodes in a list. pub struct NodeListIter<'pr> { - pub(crate) parser: NonNull, + pub(crate) parser: *const pm_parser_t, pub(crate) pointer: NonNull, pub(crate) index: usize, pub(crate) marker: PhantomData<&'pr mut pm_node_list>, @@ -40,7 +40,7 @@ impl<'pr> Iterator for NodeListIter<'pr> { /// A list of nodes. pub struct NodeList<'pr> { - pub(crate) parser: NonNull, + pub(crate) parser: *const pm_parser_t, pub(crate) pointer: NonNull, pub(crate) marker: PhantomData<&'pr mut pm_node_list>, } @@ -115,13 +115,13 @@ impl std::fmt::Debug for NodeList<'_> { /// A handle for a constant ID. pub struct ConstantId<'pr> { - pub(crate) parser: NonNull, + pub(crate) parser: *const pm_parser_t, pub(crate) id: pm_constant_id_t, pub(crate) marker: PhantomData<&'pr mut pm_constant_id_t>, } impl<'pr> ConstantId<'pr> { - pub(crate) const fn new(parser: NonNull, id: pm_constant_id_t) -> Self { + pub(crate) const fn new(parser: *const pm_parser_t, id: pm_constant_id_t) -> Self { ConstantId { parser, id, marker: PhantomData } } @@ -133,9 +133,10 @@ impl<'pr> ConstantId<'pr> { #[must_use] pub fn as_slice(&self) -> &'pr [u8] { unsafe { - let pool = &(*self.parser.as_ptr()).constant_pool; - let constant = &(*pool.constants.add((self.id - 1).try_into().unwrap())); - std::slice::from_raw_parts(constant.start, constant.length) + let constant = ruby_prism_sys::pm_parser_constant(self.parser, self.id); + let start = ruby_prism_sys::pm_constant_start(constant); + let length = ruby_prism_sys::pm_constant_length(constant); + std::slice::from_raw_parts(start, length) } } } @@ -148,7 +149,7 @@ impl std::fmt::Debug for ConstantId<'_> { /// An iterator over the constants in a list. pub struct ConstantListIter<'pr> { - pub(crate) parser: NonNull, + pub(crate) parser: *const pm_parser_t, pub(crate) pointer: NonNull, pub(crate) index: usize, pub(crate) marker: PhantomData<&'pr mut pm_constant_id_list_t>, @@ -171,7 +172,7 @@ impl<'pr> Iterator for ConstantListIter<'pr> { /// A list of constants. pub struct ConstantList<'pr> { /// The raw pointer to the parser where this list came from. - pub(crate) parser: NonNull, + pub(crate) parser: *const pm_parser_t, /// The raw pointer to the list allocated by prism. pub(crate) pointer: NonNull, diff --git a/rust/ruby-prism/src/parse_result/comments.rs b/rust/ruby-prism/src/parse_result/comments.rs index 767de6330a..9f6c80f83a 100644 --- a/rust/ruby-prism/src/parse_result/comments.rs +++ b/rust/ruby-prism/src/parse_result/comments.rs @@ -1,9 +1,8 @@ //! Comment handling for the prism parser. use std::marker::PhantomData; -use std::ptr::NonNull; -use ruby_prism_sys::{pm_comment_t, pm_comment_type_t, pm_magic_comment_t, pm_parser_t}; +use ruby_prism_sys::{pm_comment_location, pm_comment_t, pm_comment_type, pm_comment_type_t, pm_magic_comment_key, pm_magic_comment_t, pm_magic_comment_value, pm_parser_start, pm_parser_t}; use super::Location; @@ -19,16 +18,13 @@ pub enum CommentType { /// A comment that was found during parsing. #[derive(Debug)] pub struct Comment<'pr> { - content: NonNull, - parser: NonNull, + raw: *const pm_comment_t, + parser: *const pm_parser_t, marker: PhantomData<&'pr pm_comment_t>, } impl<'pr> Comment<'pr> { /// Returns the text of the comment. - /// - /// # Panics - /// Panics if the end offset is not greater than the start offset. #[must_use] pub fn text(&self) -> &[u8] { self.location().as_slice() @@ -37,7 +33,7 @@ impl<'pr> Comment<'pr> { /// Returns the type of the comment. #[must_use] pub fn type_(&self) -> CommentType { - let type_ = unsafe { self.content.as_ref().type_ }; + let type_ = unsafe { pm_comment_type(self.raw) }; if type_ == pm_comment_type_t::PM_COMMENT_EMBDOC { CommentType::EmbDocComment } else { @@ -47,22 +43,28 @@ impl<'pr> Comment<'pr> { /// The location of the comment in the source. #[must_use] - pub const fn location(&self) -> Location<'pr> { - Location::new(self.parser, unsafe { &self.content.as_ref().location }) + pub fn location(&self) -> Location<'pr> { + let loc = unsafe { pm_comment_location(self.raw) }; + Location { + parser: self.parser, + start: loc.start, + length: loc.length, + marker: PhantomData, + } } } -/// A struct created by the `comments` method on `ParseResult`. It can be used -/// to iterate over the comments in the parse result. +/// An iterator over comments collected from the parse result. pub struct Comments<'pr> { - comment: *mut pm_comment_t, - parser: NonNull, + ptrs: Vec<*const pm_comment_t>, + index: usize, + parser: *const pm_parser_t, marker: PhantomData<&'pr pm_comment_t>, } impl Comments<'_> { - pub(crate) const fn new(comment: *mut pm_comment_t, parser: NonNull) -> Self { - Comments { comment, parser, marker: PhantomData } + pub(crate) const fn new(ptrs: Vec<*const pm_comment_t>, parser: *const pm_parser_t) -> Self { + Comments { ptrs, index: 0, parser, marker: PhantomData } } } @@ -70,14 +72,10 @@ impl<'pr> Iterator for Comments<'pr> { type Item = Comment<'pr>; fn next(&mut self) -> Option { - if let Some(comment) = NonNull::new(self.comment) { - let current = Comment { - content: comment, - parser: self.parser, - marker: PhantomData, - }; - self.comment = unsafe { comment.as_ref().node.next.cast::() }; - Some(current) + if self.index < self.ptrs.len() { + let comment = self.ptrs[self.index]; + self.index += 1; + Some(Comment { raw: comment, parser: self.parser, marker: PhantomData }) } else { None } @@ -87,44 +85,44 @@ impl<'pr> Iterator for Comments<'pr> { /// A magic comment that was found during parsing. #[derive(Debug)] pub struct MagicComment<'pr> { - parser: NonNull, - comment: NonNull, + parser: *const pm_parser_t, + raw: *const pm_magic_comment_t, marker: PhantomData<&'pr pm_magic_comment_t>, } impl MagicComment<'_> { /// Returns the text of the comment's key. #[must_use] - pub const fn key(&self) -> &[u8] { + pub fn key(&self) -> &[u8] { unsafe { - let start = self.parser.as_ref().start.add(self.comment.as_ref().key.start as usize); - let len = self.comment.as_ref().key.length as usize; - std::slice::from_raw_parts(start, len) + let loc = pm_magic_comment_key(self.raw); + let start = pm_parser_start(self.parser).add(loc.start as usize); + std::slice::from_raw_parts(start, loc.length as usize) } } /// Returns the text of the comment's value. #[must_use] - pub const fn value(&self) -> &[u8] { + pub fn value(&self) -> &[u8] { unsafe { - let start = self.parser.as_ref().start.add(self.comment.as_ref().value.start as usize); - let len = self.comment.as_ref().value.length as usize; - std::slice::from_raw_parts(start, len) + let loc = pm_magic_comment_value(self.raw); + let start = pm_parser_start(self.parser).add(loc.start as usize); + std::slice::from_raw_parts(start, loc.length as usize) } } } -/// A struct created by the `magic_comments` method on `ParseResult`. It can be used -/// to iterate over the magic comments in the parse result. +/// An iterator over magic comments collected from the parse result. pub struct MagicComments<'pr> { - parser: NonNull, - comment: *mut pm_magic_comment_t, + ptrs: Vec<*const pm_magic_comment_t>, + index: usize, + parser: *const pm_parser_t, marker: PhantomData<&'pr pm_magic_comment_t>, } impl MagicComments<'_> { - pub(crate) const fn new(parser: NonNull, comment: *mut pm_magic_comment_t) -> Self { - MagicComments { parser, comment, marker: PhantomData } + pub(crate) const fn new(ptrs: Vec<*const pm_magic_comment_t>, parser: *const pm_parser_t) -> Self { + MagicComments { ptrs, index: 0, parser, marker: PhantomData } } } @@ -132,10 +130,10 @@ impl<'pr> Iterator for MagicComments<'pr> { type Item = MagicComment<'pr>; fn next(&mut self) -> Option { - if let Some(comment) = NonNull::new(self.comment) { - let current = MagicComment { parser: self.parser, comment, marker: PhantomData }; - self.comment = unsafe { comment.as_ref().node.next.cast::() }; - Some(current) + if self.index < self.ptrs.len() { + let comment = self.ptrs[self.index]; + self.index += 1; + Some(MagicComment { parser: self.parser, raw: comment, marker: PhantomData }) } else { None } diff --git a/rust/ruby-prism/src/parse_result/diagnostics.rs b/rust/ruby-prism/src/parse_result/diagnostics.rs index 00fc9ffe33..ba231fabb2 100644 --- a/rust/ruby-prism/src/parse_result/diagnostics.rs +++ b/rust/ruby-prism/src/parse_result/diagnostics.rs @@ -1,18 +1,17 @@ //! Diagnostic handling for parse errors and warnings. -use std::ffi::{c_char, CStr}; +use std::ffi::CStr; use std::marker::PhantomData; -use std::ptr::NonNull; -use ruby_prism_sys::{pm_diagnostic_t, pm_parser_t}; +use ruby_prism_sys::{pm_diagnostic_location, pm_diagnostic_message, pm_diagnostic_t, pm_parser_t}; use super::Location; /// A diagnostic message that came back from the parser. #[derive(Debug)] pub struct Diagnostic<'pr> { - diag: NonNull, - parser: NonNull, + raw: *const pm_diagnostic_t, + parser: *const pm_parser_t, marker: PhantomData<&'pr pm_diagnostic_t>, } @@ -21,34 +20,39 @@ impl<'pr> Diagnostic<'pr> { /// /// # Panics /// - /// Panics if the message is not able to be converted into a `CStr`. - /// + /// Panics if the message is not valid UTF-8. #[must_use] pub fn message(&self) -> &str { unsafe { - let message: *mut c_char = self.diag.as_ref().message.cast_mut(); + let message = pm_diagnostic_message(self.raw); CStr::from_ptr(message).to_str().expect("prism allows only UTF-8 for diagnostics.") } } /// The location of the diagnostic in the source. #[must_use] - pub const fn location(&self) -> Location<'pr> { - Location::new(self.parser, unsafe { &self.diag.as_ref().location }) + pub fn location(&self) -> Location<'pr> { + let loc = unsafe { pm_diagnostic_location(self.raw) }; + Location { + parser: self.parser, + start: loc.start, + length: loc.length, + marker: PhantomData, + } } } -/// A struct created by the `errors` or `warnings` methods on `ParseResult`. It -/// can be used to iterate over the diagnostics in the parse result. +/// An iterator over diagnostics collected from the parse result. pub struct Diagnostics<'pr> { - diagnostic: *mut pm_diagnostic_t, - parser: NonNull, + ptrs: Vec<*const pm_diagnostic_t>, + index: usize, + parser: *const pm_parser_t, marker: PhantomData<&'pr pm_diagnostic_t>, } impl Diagnostics<'_> { - pub(crate) const fn new(diagnostic: *mut pm_diagnostic_t, parser: NonNull) -> Self { - Diagnostics { diagnostic, parser, marker: PhantomData } + pub(crate) const fn new(ptrs: Vec<*const pm_diagnostic_t>, parser: *const pm_parser_t) -> Self { + Diagnostics { ptrs, index: 0, parser, marker: PhantomData } } } @@ -56,14 +60,14 @@ impl<'pr> Iterator for Diagnostics<'pr> { type Item = Diagnostic<'pr>; fn next(&mut self) -> Option { - if let Some(diagnostic) = NonNull::new(self.diagnostic) { - let current = Diagnostic { - diag: diagnostic, + if self.index < self.ptrs.len() { + let diagnostic = self.ptrs[self.index]; + self.index += 1; + Some(Diagnostic { + raw: diagnostic, parser: self.parser, marker: PhantomData, - }; - self.diagnostic = unsafe { diagnostic.as_ref().node.next.cast::() }; - Some(current) + }) } else { None } diff --git a/rust/ruby-prism/src/parse_result/mod.rs b/rust/ruby-prism/src/parse_result/mod.rs index 5e071b515b..82cae4b731 100644 --- a/rust/ruby-prism/src/parse_result/mod.rs +++ b/rust/ruby-prism/src/parse_result/mod.rs @@ -1,14 +1,14 @@ //! Parse result types for the prism parser. -//! -//! This module contains types related to the result of parsing, including -//! the main `ParseResult` struct, location tracking, comments, and diagnostics. mod comments; mod diagnostics; use std::ptr::NonNull; -use ruby_prism_sys::{pm_arena_cleanup, pm_arena_t, pm_comment_t, pm_diagnostic_t, pm_line_offset_list_line_column, pm_location_t, pm_magic_comment_t, pm_node_t, pm_parser_cleanup, pm_parser_t}; +use ruby_prism_sys::{ + pm_arena_free, pm_arena_t, pm_comment_t, pm_diagnostic_t, pm_line_offset_list_line_column, pm_location_t, pm_magic_comment_t, pm_node_t, pm_parser_comments_each, pm_parser_comments_size, pm_parser_data_loc, pm_parser_errors_each, pm_parser_errors_size, pm_parser_free, + pm_parser_frozen_string_literal, pm_parser_line_offsets, pm_parser_magic_comments_each, pm_parser_magic_comments_size, pm_parser_start, pm_parser_start_line, pm_parser_t, pm_parser_warnings_each, pm_parser_warnings_size, +}; pub use self::comments::{Comment, CommentType, Comments, MagicComment, MagicComments}; pub use self::diagnostics::{Diagnostic, Diagnostics}; @@ -17,7 +17,7 @@ use crate::Node; /// A range in the source file, represented as a start offset and length. pub struct Location<'pr> { - pub(crate) parser: NonNull, + pub(crate) parser: *const pm_parser_t, pub(crate) start: u32, pub(crate) length: u32, marker: std::marker::PhantomData<&'pr [u8]>, @@ -28,14 +28,14 @@ impl<'pr> Location<'pr> { #[must_use] pub fn as_slice(&self) -> &'pr [u8] { unsafe { - let parser_start = (*self.parser.as_ptr()).start; + let parser_start = pm_parser_start(self.parser); std::slice::from_raw_parts(parser_start.add(self.start as usize), self.length as usize) } } /// Return a Location from the given `pm_location_t`. #[must_use] - pub(crate) const fn new(parser: NonNull, location: &'pr pm_location_t) -> Self { + pub(crate) const fn new(parser: *const pm_parser_t, location: &'pr pm_location_t) -> Self { Location { parser, start: location.start, @@ -114,11 +114,10 @@ impl Location<'_> { /// Returns the line and column number for the given byte offset. fn line_column(&self, cursor: u32) -> (i32, u32) { - // SAFETY: We read the line_offsets and start_line from the parser, - // which is valid for the lifetime of this Location. unsafe { - let parser = self.parser.as_ptr(); - let result = pm_line_offset_list_line_column(&raw const (*parser).line_offsets, cursor, (*parser).start_line); + let line_offsets = pm_parser_line_offsets(self.parser); + let start_line = pm_parser_start_line(self.parser); + let result = pm_line_offset_list_line_column(line_offsets, cursor, start_line); (result.line, result.column) } } @@ -141,17 +140,35 @@ impl std::fmt::Debug for Location<'_> { } } +// C callback that collects comment pointers into a Vec +unsafe extern "C" fn collect_comment(comment: *const pm_comment_t, data: *mut std::ffi::c_void) { + let vec = &mut *(data.cast::>()); + vec.push(comment); +} + +// C callback that collects magic comment pointers into a Vec +unsafe extern "C" fn collect_magic_comment(comment: *const pm_magic_comment_t, data: *mut std::ffi::c_void) { + let vec = &mut *(data.cast::>()); + vec.push(comment); +} + +// C callback that collects diagnostic pointers into a Vec +unsafe extern "C" fn collect_diagnostic(diagnostic: *const pm_diagnostic_t, data: *mut std::ffi::c_void) { + let vec = &mut *(data.cast::>()); + vec.push(diagnostic); +} + /// The result of parsing a source string. #[derive(Debug)] pub struct ParseResult<'pr> { source: &'pr [u8], - arena: Box, - parser: NonNull, + arena: *mut pm_arena_t, + parser: *mut pm_parser_t, node: NonNull, } impl<'pr> ParseResult<'pr> { - pub(crate) const unsafe fn new(source: &'pr [u8], arena: Box, parser: NonNull, node: NonNull) -> Self { + pub(crate) const unsafe fn new(source: &'pr [u8], arena: *mut pm_arena_t, parser: *mut pm_parser_t, node: NonNull) -> Self { ParseResult { source, arena, parser, node } } @@ -164,7 +181,7 @@ impl<'pr> ParseResult<'pr> { /// Returns whether we found a `frozen_string_literal` magic comment with a true value. #[must_use] pub fn frozen_string_literals(&self) -> bool { - unsafe { (*self.parser.as_ptr()).frozen_string_literal == 1 } + unsafe { pm_parser_frozen_string_literal(self.parser) == 1 } } /// Returns a slice of the source string that was parsed using the given @@ -181,54 +198,63 @@ impl<'pr> ParseResult<'pr> { #[must_use] pub fn line_offsets(&self) -> &'pr [u32] { unsafe { - let list = &(*self.parser.as_ptr()).line_offsets; + let list = &*pm_parser_line_offsets(self.parser); std::slice::from_raw_parts(list.offsets, list.size) } } + /// Returns an iterator that can be used to iterate over the errors in the /// parse result. #[must_use] pub fn errors(&self) -> Diagnostics<'_> { + let size = unsafe { pm_parser_errors_size(self.parser) }; + let mut ptrs: Vec<*const pm_diagnostic_t> = Vec::with_capacity(size); unsafe { - let list = &mut (*self.parser.as_ptr()).error_list; - Diagnostics::new(list.head.cast::(), self.parser) + pm_parser_errors_each(self.parser, Some(collect_diagnostic), (&raw mut ptrs).cast()); } + Diagnostics::new(ptrs, self.parser) } /// Returns an iterator that can be used to iterate over the warnings in the /// parse result. #[must_use] pub fn warnings(&self) -> Diagnostics<'_> { + let size = unsafe { pm_parser_warnings_size(self.parser) }; + let mut ptrs: Vec<*const pm_diagnostic_t> = Vec::with_capacity(size); unsafe { - let list = &mut (*self.parser.as_ptr()).warning_list; - Diagnostics::new(list.head.cast::(), self.parser) + pm_parser_warnings_each(self.parser, Some(collect_diagnostic), (&raw mut ptrs).cast()); } + Diagnostics::new(ptrs, self.parser) } /// Returns an iterator that can be used to iterate over the comments in the /// parse result. #[must_use] pub fn comments(&self) -> Comments<'_> { + let size = unsafe { pm_parser_comments_size(self.parser) }; + let mut ptrs: Vec<*const pm_comment_t> = Vec::with_capacity(size); unsafe { - let list = &mut (*self.parser.as_ptr()).comment_list; - Comments::new(list.head.cast::(), self.parser) + pm_parser_comments_each(self.parser, Some(collect_comment), (&raw mut ptrs).cast()); } + Comments::new(ptrs, self.parser) } /// Returns an iterator that can be used to iterate over the magic comments in the /// parse result. #[must_use] pub fn magic_comments(&self) -> MagicComments<'_> { + let size = unsafe { pm_parser_magic_comments_size(self.parser) }; + let mut ptrs: Vec<*const pm_magic_comment_t> = Vec::with_capacity(size); unsafe { - let list = &mut (*self.parser.as_ptr()).magic_comment_list; - MagicComments::new(self.parser, list.head.cast::()) + pm_parser_magic_comments_each(self.parser, Some(collect_magic_comment), (&raw mut ptrs).cast()); } + MagicComments::new(ptrs, self.parser) } /// Returns an optional location of the __END__ marker and the rest of the content of the file. #[must_use] pub fn data_loc(&self) -> Option> { - let location = unsafe { &(*self.parser.as_ptr()).data_loc }; + let location = unsafe { &*pm_parser_data_loc(self.parser) }; if location.length == 0 { None } else { @@ -260,9 +286,8 @@ impl<'pr> ParseResult<'pr> { impl Drop for ParseResult<'_> { fn drop(&mut self) { unsafe { - pm_parser_cleanup(self.parser.as_ptr()); - drop(Box::from_raw(self.parser.as_ptr())); - pm_arena_cleanup(self.arena.as_mut()); + pm_parser_free(self.parser); + pm_arena_free(self.arena); } } } diff --git a/src/parser.c b/src/parser.c index b61fb6ce03..a65fc1a9d5 100644 --- a/src/parser.c +++ b/src/parser.c @@ -69,6 +69,14 @@ pm_parser_encoding_name(const pm_parser_t *parser) { return parser->encoding->name; } +/** + * Returns the frozen string literal value of the parser. + */ +int8_t +pm_parser_frozen_string_literal(const pm_parser_t *parser) { + return parser->frozen_string_literal; +} + /** * Returns the line offsets that are associated with the given parser. * @@ -249,3 +257,12 @@ pm_parser_constants_each(const pm_parser_t *parser, pm_constant_callback_t callb callback(constant, data); } } + +/** + * Returns a pointer to the constant at the given id in the constant pool + * associated with the given parser. + */ +const pm_constant_t * +pm_parser_constant(const pm_parser_t *parser, pm_constant_id_t constant_id) { + return pm_constant_pool_id_to_constant(&parser->constant_pool, constant_id); +} From 852bb0476d32881c8fe4bb1f28c5c396a0fbcd3c Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 18 Mar 2026 15:28:39 -0400 Subject: [PATCH 085/100] Ensure wasm build is happy --- src/json.c | 5 +---- templates/src/prettyprint.c.erb | 5 +---- templates/src/serialize.c.erb | 5 +---- 3 files changed, 3 insertions(+), 12 deletions(-) diff --git a/src/json.c b/src/json.c index 57e3fc07df..68b5ff3ccb 100644 --- a/src/json.c +++ b/src/json.c @@ -13,10 +13,7 @@ */ #ifdef PRISM_EXCLUDE_JSON -#include "prism/compiler/unused.h" - -void -pm_dump_json(PRISM_UNUSED pm_buffer_t *buffer, PRISM_UNUSED const pm_parser_t *parser, PRISM_UNUSED const pm_node_t *node) {} +void pm_dump_json(void) {} #else diff --git a/templates/src/prettyprint.c.erb b/templates/src/prettyprint.c.erb index 9992f51096..f12e55d726 100644 --- a/templates/src/prettyprint.c.erb +++ b/templates/src/prettyprint.c.erb @@ -6,10 +6,7 @@ * PRISM_EXCLUDE_PRETTYPRINT define. */ #ifdef PRISM_EXCLUDE_PRETTYPRINT -#include "prism/compiler/unused.h" - -void -pm_prettyprint(PRISM_UNUSED pm_buffer_t *buffer, PRISM_UNUSED const pm_parser_t *parser, PRISM_UNUSED const pm_node_t *node) {} +void pm_prettyprint(void) {} #else diff --git a/templates/src/serialize.c.erb b/templates/src/serialize.c.erb index 8c28cb89fc..c30487bbb0 100644 --- a/templates/src/serialize.c.erb +++ b/templates/src/serialize.c.erb @@ -5,10 +5,7 @@ * PRISM_EXCLUDE_SERIALIZATION define. */ #ifdef PRISM_EXCLUDE_SERIALIZATION -#include "prism/compiler/unused.h" - -void -pm_serialize_lex(PRISM_UNUSED pm_buffer_t *buffer, PRISM_UNUSED const uint8_t *source, PRISM_UNUSED size_t size, PRISM_UNUSED const char *data) {} +void pm_serialize_lex(void) {} #else From 149cc9d2a3087b071bfcc5d5e0af8e7fac8b38e5 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 18 Mar 2026 15:36:43 -0400 Subject: [PATCH 086/100] Final review --- include/prism/comments.h | 2 ++ include/prism/magic_comments.h | 2 ++ include/prism/options.h | 13 +++++++------ include/prism/version.h | 2 ++ 4 files changed, 13 insertions(+), 6 deletions(-) diff --git a/include/prism/comments.h b/include/prism/comments.h index 5938f388cd..2270d53889 100644 --- a/include/prism/comments.h +++ b/include/prism/comments.h @@ -1,5 +1,7 @@ /** * @file comments.h + * + * Types and functions related to comments found during parsing. */ #ifndef PRISM_COMMENTS_H #define PRISM_COMMENTS_H diff --git a/include/prism/magic_comments.h b/include/prism/magic_comments.h index 6d47cda985..c9d6b600e8 100644 --- a/include/prism/magic_comments.h +++ b/include/prism/magic_comments.h @@ -1,5 +1,7 @@ /** * @file magic_comments.h + * + * Types and functions related to magic comments found during parsing. */ #ifndef PRISM_MAGIC_COMMENTS_H #define PRISM_MAGIC_COMMENTS_H diff --git a/include/prism/options.h b/include/prism/options.h index 1c7281d599..37a713095e 100644 --- a/include/prism/options.h +++ b/include/prism/options.h @@ -6,6 +6,7 @@ #ifndef PRISM_OPTIONS_H #define PRISM_OPTIONS_H +#include "prism/compiler/exported.h" #include "prism/compiler/nodiscard.h" #include "prism/compiler/nonnull.h" @@ -25,7 +26,7 @@ typedef struct pm_options_scope_t pm_options_scope_t; typedef struct pm_options_t pm_options_t; /** - * String literals should be made frozen. + * String literals should not be frozen. */ #define PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED ((int8_t) -1) @@ -36,23 +37,23 @@ typedef struct pm_options_t pm_options_t; #define PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET ((int8_t) 0) /** - * String literals should be made mutable. + * String literals should be made frozen. */ #define PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED ((int8_t) 1) /** The default value for parameters. */ static const uint8_t PM_OPTIONS_SCOPE_FORWARDING_NONE = 0x0; -/** When the scope is fowarding with the * parameter. */ +/** When the scope is forwarding with the * parameter. */ static const uint8_t PM_OPTIONS_SCOPE_FORWARDING_POSITIONALS = 0x1; -/** When the scope is fowarding with the ** parameter. */ +/** When the scope is forwarding with the ** parameter. */ static const uint8_t PM_OPTIONS_SCOPE_FORWARDING_KEYWORDS = 0x2; -/** When the scope is fowarding with the & parameter. */ +/** When the scope is forwarding with the & parameter. */ static const uint8_t PM_OPTIONS_SCOPE_FORWARDING_BLOCK = 0x4; -/** When the scope is fowarding with the ... parameter. */ +/** When the scope is forwarding with the ... parameter. */ static const uint8_t PM_OPTIONS_SCOPE_FORWARDING_ALL = 0x8; /** diff --git a/include/prism/version.h b/include/prism/version.h index 99cc99158e..181b398462 100644 --- a/include/prism/version.h +++ b/include/prism/version.h @@ -6,6 +6,8 @@ #ifndef PRISM_VERSION_H #define PRISM_VERSION_H +#include "prism/compiler/exported.h" + /** * The major version of the Prism library as an int. */ From 665bcf30566d33ad5c8fc7bf59ae3b763169c545 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 18 Mar 2026 15:51:33 -0400 Subject: [PATCH 087/100] Rebase --- include/prism/internal/strpbrk.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/prism/internal/strpbrk.h b/include/prism/internal/strpbrk.h index 124c528cb3..d64156c002 100644 --- a/include/prism/internal/strpbrk.h +++ b/include/prism/internal/strpbrk.h @@ -3,6 +3,9 @@ #include "prism/parser.h" +/* The maximum number of bytes in a strpbrk charset. */ +#define PM_STRPBRK_CACHE_SIZE 16 + #include #include From 1c1e94849ef5e39c82e107e3e0914d7268031ba8 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 18 Mar 2026 16:48:07 -0400 Subject: [PATCH 088/100] Fix up bindings --- ext/prism/extension.c | 10 +++++ include/prism/internal/bit.h | 43 +++++++++++-------- .../org/jruby/parser/prism/wasm/Prism.java | 6 +-- lib/prism/ffi.rb | 3 ++ src/arena.c | 2 +- src/json.c | 10 ++--- src/prism.c | 1 + templates/ext/prism/api_node.c.erb | 2 + templates/src/json.c.erb | 6 +-- templates/src/prettyprint.c.erb | 4 +- templates/src/serialize.c.erb | 4 +- 11 files changed, 56 insertions(+), 35 deletions(-) diff --git a/ext/prism/extension.c b/ext/prism/extension.c index 0c9458e6cf..43e27d68c5 100644 --- a/ext/prism/extension.c +++ b/ext/prism/extension.c @@ -4,6 +4,8 @@ #include #endif +#include + // NOTE: this file should contain only bindings. All non-trivial logic should be // in libprism so it can be shared its the various callers. @@ -411,7 +413,11 @@ dump(int argc, VALUE *argv, VALUE self) { if (pm_options_freeze(options)) rb_obj_freeze(value); #ifdef PRISM_BUILD_DEBUG +#ifdef xfree_sized xfree_sized(dup, length); +#else + xfree(dup); +#endif #endif pm_string_cleanup(&input); @@ -968,7 +974,11 @@ parse(int argc, VALUE *argv, VALUE self) { VALUE value = parse_input(&input, options); #ifdef PRISM_BUILD_DEBUG +#ifdef xfree_sized xfree_sized(dup, length); +#else + xfree(dup); +#endif #endif pm_string_cleanup(&input); diff --git a/include/prism/internal/bit.h b/include/prism/internal/bit.h index 4eec494887..b0111a4c2c 100644 --- a/include/prism/internal/bit.h +++ b/include/prism/internal/bit.h @@ -11,27 +11,32 @@ * (matching the behavior of __builtin_ctzll and _BitScanForward64). */ #if defined(__GNUC__) || defined(__clang__) - #define pm_ctzll(v) ((unsigned) __builtin_ctzll(v)) +#define pm_ctzll(v) ((unsigned) __builtin_ctzll(v)) #elif defined(_MSC_VER) - #include - static PRISM_INLINE unsigned pm_ctzll(uint64_t v) { - unsigned long index; - _BitScanForward64(&index, v); - return (unsigned) index; - } +#include +#include + +static PRISM_INLINE unsigned +pm_ctzll(uint64_t v) { + unsigned long index; + _BitScanForward64(&index, v); + return (unsigned) index; +} #else - static PRISM_INLINE unsigned - pm_ctzll(uint64_t v) { - unsigned c = 0; - v &= (uint64_t) (-(int64_t) v); - if (v & 0x00000000FFFFFFFFULL) c += 0; else c += 32; - if (v & 0x0000FFFF0000FFFFULL) c += 0; else c += 16; - if (v & 0x00FF00FF00FF00FFULL) c += 0; else c += 8; - if (v & 0x0F0F0F0F0F0F0F0FULL) c += 0; else c += 4; - if (v & 0x3333333333333333ULL) c += 0; else c += 2; - if (v & 0x5555555555555555ULL) c += 0; else c += 1; - return c; - } +#include + +static PRISM_INLINE unsigned +pm_ctzll(uint64_t v) { + unsigned c = 0; + v &= (uint64_t) (-(int64_t) v); + if (v & 0x00000000FFFFFFFFULL) c += 0; else c += 32; + if (v & 0x0000FFFF0000FFFFULL) c += 0; else c += 16; + if (v & 0x00FF00FF00FF00FFULL) c += 0; else c += 8; + if (v & 0x0F0F0F0F0F0F0F0FULL) c += 0; else c += 4; + if (v & 0x3333333333333333ULL) c += 0; else c += 2; + if (v & 0x5555555555555555ULL) c += 0; else c += 1; + return c; +} #endif #endif diff --git a/java-wasm/src/main/java/org/jruby/parser/prism/wasm/Prism.java b/java-wasm/src/main/java/org/jruby/parser/prism/wasm/Prism.java index 67a127276c..71726703c3 100644 --- a/java-wasm/src/main/java/org/jruby/parser/prism/wasm/Prism.java +++ b/java-wasm/src/main/java/org/jruby/parser/prism/wasm/Prism.java @@ -40,8 +40,7 @@ public Prism(WasiOptions wasiOpts) { preOptionsPointer = exports.calloc(1, PACKED_OPTIONS_BUFFER_SIZE); preSourcePointer = exports.calloc(1, SOURCE_SIZE); - bufferPointer = exports.calloc(exports.pmBufferSizeof(), 1); - exports.pmBufferInit(bufferPointer); + bufferPointer = exports.pmBufferNew(); } public byte[] serialize(byte[] packedOptions, byte[] sourceBytes, int sourceLength) { @@ -60,7 +59,8 @@ public byte[] serialize(byte[] packedOptions, byte[] sourceBytes, int sourceLeng exports.calloc(1, packedOptions.length) : preOptionsPointer; instance.memory().write(optionsPointer, packedOptions); - exports.pmBufferClear(bufferPointer); + exports.pmBufferFree(bufferPointer); + bufferPointer = exports.pmBufferNew(); exports.pmSerializeParse( bufferPointer, sourcePointer, sourceLength, optionsPointer); diff --git a/lib/prism/ffi.rb b/lib/prism/ffi.rb index 7b22a1304c..5144ba72c4 100644 --- a/lib/prism/ffi.rb +++ b/lib/prism/ffi.rb @@ -59,6 +59,9 @@ def self.load_exported_functions_from(header, *functions, callbacks) # We only want to load the functions that we are interested in. next unless functions.any? { |function| line.include?(function) } + # Strip trailing attributes (PRISM_NODISCARD, PRISM_NONNULL(...), etc.) + line = line.sub(/\)(\s+PRISM_\w+(?:\([^)]*\))?)+\s*;/, ");") + # Parse the function declaration. unless /^PRISM_EXPORTED_FUNCTION (?.+) (?\w+)\((?.+)\);$/ =~ line raise "Could not parse #{line}" diff --git a/src/arena.c b/src/arena.c index dd998402f3..7ae02715df 100644 --- a/src/arena.c +++ b/src/arena.c @@ -113,5 +113,5 @@ pm_arena_cleanup(pm_arena_t *arena) { void pm_arena_free(pm_arena_t *arena) { pm_arena_cleanup(arena); - xfree(arena); + xfree(arena); } diff --git a/src/json.c b/src/json.c index 68b5ff3ccb..72975db724 100644 --- a/src/json.c +++ b/src/json.c @@ -8,14 +8,10 @@ #include "prism/json.h" -/* We optionally support dumping to JSON. For systems that don not want or need - * this functionality, it can be turned off with the PRISM_EXCLUDE_JSON define. - */ -#ifdef PRISM_EXCLUDE_JSON - -void pm_dump_json(void) {} +// Ensure this translation unit is never empty, even when JSON is excluded. +typedef int pm_json_unused_t; -#else +#ifndef PRISM_EXCLUDE_JSON #include "prism/internal/buffer.h" #include "prism/internal/constant_pool.h" diff --git a/src/prism.c b/src/prism.c index b6afa8bb15..39602fbfc2 100644 --- a/src/prism.c +++ b/src/prism.c @@ -30,6 +30,7 @@ #include "prism/excludes.h" #include "prism/serialize.h" +#include "prism/stream.h" #include "prism/version.h" #include diff --git a/templates/ext/prism/api_node.c.erb b/templates/ext/prism/api_node.c.erb index 6dd3a59372..506c2e87f8 100644 --- a/templates/ext/prism/api_node.c.erb +++ b/templates/ext/prism/api_node.c.erb @@ -2,6 +2,8 @@ #include "prism/extension.h" #include "prism/internal/allocator.h" +#include + extern VALUE rb_cPrism; extern VALUE rb_cPrismNode; extern VALUE rb_cPrismSource; diff --git a/templates/src/json.c.erb b/templates/src/json.c.erb index 66397b8fcd..5c4ab8d92a 100644 --- a/templates/src/json.c.erb +++ b/templates/src/json.c.erb @@ -1,8 +1,8 @@ #include "prism/json.h" -/* We optionally support dumping to JSON. For systems that don not want or need - * this functionality, it can be turned off with the PRISM_EXCLUDE_JSON define. - */ +// Ensure this translation unit is never empty, even when JSON is excluded. +typedef int pm_json_unused_t; + #ifndef PRISM_EXCLUDE_JSON #include "prism/internal/buffer.h" diff --git a/templates/src/prettyprint.c.erb b/templates/src/prettyprint.c.erb index f12e55d726..f12531d934 100644 --- a/templates/src/prettyprint.c.erb +++ b/templates/src/prettyprint.c.erb @@ -6,7 +6,9 @@ * PRISM_EXCLUDE_PRETTYPRINT define. */ #ifdef PRISM_EXCLUDE_PRETTYPRINT -void pm_prettyprint(void) {} +/* Ensure this translation unit is never empty, even when prettyprint is + * excluded. */ +typedef int pm_prettyprint_unused_t; #else diff --git a/templates/src/serialize.c.erb b/templates/src/serialize.c.erb index c30487bbb0..3d9811e5db 100644 --- a/templates/src/serialize.c.erb +++ b/templates/src/serialize.c.erb @@ -5,7 +5,9 @@ * PRISM_EXCLUDE_SERIALIZATION define. */ #ifdef PRISM_EXCLUDE_SERIALIZATION -void pm_serialize_lex(void) {} +/* Ensure this translation unit is never empty, even when serialization is + * excluded. */ +typedef int pm_serialize_unused_t; #else From 717e4e738dae021e99b7f1df4321021a2c45ddfd Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 18 Mar 2026 20:10:36 -0400 Subject: [PATCH 089/100] Ensure we free options before raising type errors --- ext/prism/extension.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/ext/prism/extension.c b/ext/prism/extension.c index 43e27d68c5..12bd1c4b98 100644 --- a/ext/prism/extension.c +++ b/ext/prism/extension.c @@ -312,6 +312,11 @@ string_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options) VALUE keywords; rb_scan_args(argc, argv, "1:", &string, &keywords); + if (!RB_TYPE_P(string, T_STRING)) { + pm_options_free(options); + rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected String)", rb_obj_class(string)); + } + extract_options(options, Qnil, keywords); input_load_string(input, string); } @@ -325,7 +330,11 @@ file_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options, V VALUE keywords; rb_scan_args(argc, argv, "1:", &filepath, &keywords); - Check_Type(filepath, T_STRING); + if (!RB_TYPE_P(filepath, T_STRING)) { + pm_options_free(options); + rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected String)", rb_obj_class(filepath)); + } + *encoded_filepath = rb_str_encode_ospath(filepath); extract_options(options, *encoded_filepath, keywords); From 6ba2c6424e5aa667572f798ae393343e68ff939e Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 18 Mar 2026 20:36:01 -0400 Subject: [PATCH 090/100] Add necessary functions for CRuby integration --- include/prism/constant_pool.h | 11 +++++++++++ include/prism/internal/constant_pool.h | 6 ------ include/prism/parser.h | 27 ++++++++++++++++++++++++++ src/parser.c | 25 ++++++++++++++++++++++++ 4 files changed, 63 insertions(+), 6 deletions(-) diff --git a/include/prism/constant_pool.h b/include/prism/constant_pool.h index 7868c584a7..7ca795a701 100644 --- a/include/prism/constant_pool.h +++ b/include/prism/constant_pool.h @@ -60,4 +60,15 @@ PRISM_EXPORTED_FUNCTION const uint8_t * pm_constant_start(const pm_constant_t *c */ PRISM_EXPORTED_FUNCTION size_t pm_constant_length(const pm_constant_t *constant) PRISM_NONNULL(1); +/** + * Find a constant in a constant pool. Returns the id of the constant, or 0 if + * the constant is not found. + * + * @param pool The constant pool to search. + * @param start A pointer to the start of the string to search for. + * @param length The length of the string to search for. + * @returns The id of the constant, or 0 if the constant is not found. + */ +PRISM_EXPORTED_FUNCTION pm_constant_id_t pm_constant_pool_find(const pm_constant_pool_t *pool, const uint8_t *start, size_t length) PRISM_NONNULL(1, 2); + #endif diff --git a/include/prism/internal/constant_pool.h b/include/prism/internal/constant_pool.h index 7ca265d594..f82fa4d717 100644 --- a/include/prism/internal/constant_pool.h +++ b/include/prism/internal/constant_pool.h @@ -95,12 +95,6 @@ void pm_constant_pool_init(pm_arena_t *arena, pm_constant_pool_t *pool, uint32_t /* Return a pointer to the constant indicated by the given constant id. */ pm_constant_t * pm_constant_pool_id_to_constant(const pm_constant_pool_t *pool, pm_constant_id_t constant_id); -/* - * Find a constant in a constant pool. Returns the id of the constant, or 0 if - * the constant is not found. - */ -pm_constant_id_t pm_constant_pool_find(const pm_constant_pool_t *pool, const uint8_t *start, size_t length); - /* * Insert a constant into a constant pool that is a slice of a source string. * Returns the id of the constant, or 0 if any potential calls to resize fail. diff --git a/include/prism/parser.h b/include/prism/parser.h index 78e6c189ac..c0d0a5b5e4 100644 --- a/include/prism/parser.h +++ b/include/prism/parser.h @@ -112,6 +112,33 @@ PRISM_EXPORTED_FUNCTION int32_t pm_parser_start_line(const pm_parser_t *parser) */ PRISM_EXPORTED_FUNCTION const char * pm_parser_encoding_name(const pm_parser_t *parser) PRISM_NONNULL(1); +/** + * Returns the width of the character at the given pointer in the encoding that + * is being used to parse the source. + * + * @param parser the parser whose encoding we want to use + * @param start a pointer to the start of the character + * @param remaining the number of bytes remaining in the source + * @returns the width of the character in bytes + */ +PRISM_EXPORTED_FUNCTION size_t pm_parser_encoding_char_width(const pm_parser_t *parser, const uint8_t *start, ptrdiff_t remaining) PRISM_NONNULL(1, 2); + +/** + * Returns whether or not the parser is using the US-ASCII encoding. + * + * @param parser the parser to check + * @returns true if the parser is using US-ASCII encoding, false otherwise + */ +PRISM_EXPORTED_FUNCTION bool pm_parser_encoding_us_ascii(const pm_parser_t *parser) PRISM_NONNULL(1); + +/** + * Returns the filepath that is being used to parse the source. + * + * @param parser the parser whose filepath we want to get + * @returns a pointer to the filepath string + */ +PRISM_EXPORTED_FUNCTION const pm_string_t * pm_parser_filepath(const pm_parser_t *parser) PRISM_NONNULL(1); + /** * Returns the frozen string literal value of the parser, as determined by the * frozen_string_literal magic comment or the option set on the parser. diff --git a/src/parser.c b/src/parser.c index a65fc1a9d5..9743691306 100644 --- a/src/parser.c +++ b/src/parser.c @@ -69,6 +69,31 @@ pm_parser_encoding_name(const pm_parser_t *parser) { return parser->encoding->name; } +/** + * Returns the width of the character at the given pointer in the encoding that + * is being used to parse the source. + */ +size_t +pm_parser_encoding_char_width(const pm_parser_t *parser, const uint8_t *start, ptrdiff_t remaining) { + return parser->encoding->char_width(start, remaining); +} + +/** + * Returns whether or not the parser is using the US-ASCII encoding. + */ +bool +pm_parser_encoding_us_ascii(const pm_parser_t *parser) { + return parser->encoding == PM_ENCODING_US_ASCII_ENTRY; +} + +/** + * Returns the filepath that is being used to parse the source. + */ +const pm_string_t * +pm_parser_filepath(const pm_parser_t *parser) { + return &parser->filepath; +} + /** * Returns the frozen string literal value of the parser. */ From d4a3ef9e435d3d7f330b3f924ce61a6ce754421f Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 18 Mar 2026 20:42:50 -0400 Subject: [PATCH 091/100] pm_parser_constant_find --- include/prism/constant_pool.h | 11 ----------- include/prism/internal/constant_pool.h | 6 ++++++ include/prism/parser.h | 11 +++++++++++ src/parser.c | 9 +++++++++ 4 files changed, 26 insertions(+), 11 deletions(-) diff --git a/include/prism/constant_pool.h b/include/prism/constant_pool.h index 7ca795a701..7868c584a7 100644 --- a/include/prism/constant_pool.h +++ b/include/prism/constant_pool.h @@ -60,15 +60,4 @@ PRISM_EXPORTED_FUNCTION const uint8_t * pm_constant_start(const pm_constant_t *c */ PRISM_EXPORTED_FUNCTION size_t pm_constant_length(const pm_constant_t *constant) PRISM_NONNULL(1); -/** - * Find a constant in a constant pool. Returns the id of the constant, or 0 if - * the constant is not found. - * - * @param pool The constant pool to search. - * @param start A pointer to the start of the string to search for. - * @param length The length of the string to search for. - * @returns The id of the constant, or 0 if the constant is not found. - */ -PRISM_EXPORTED_FUNCTION pm_constant_id_t pm_constant_pool_find(const pm_constant_pool_t *pool, const uint8_t *start, size_t length) PRISM_NONNULL(1, 2); - #endif diff --git a/include/prism/internal/constant_pool.h b/include/prism/internal/constant_pool.h index f82fa4d717..7ca265d594 100644 --- a/include/prism/internal/constant_pool.h +++ b/include/prism/internal/constant_pool.h @@ -95,6 +95,12 @@ void pm_constant_pool_init(pm_arena_t *arena, pm_constant_pool_t *pool, uint32_t /* Return a pointer to the constant indicated by the given constant id. */ pm_constant_t * pm_constant_pool_id_to_constant(const pm_constant_pool_t *pool, pm_constant_id_t constant_id); +/* + * Find a constant in a constant pool. Returns the id of the constant, or 0 if + * the constant is not found. + */ +pm_constant_id_t pm_constant_pool_find(const pm_constant_pool_t *pool, const uint8_t *start, size_t length); + /* * Insert a constant into a constant pool that is a slice of a source string. * Returns the id of the constant, or 0 if any potential calls to resize fail. diff --git a/include/prism/parser.h b/include/prism/parser.h index c0d0a5b5e4..cf613c2c77 100644 --- a/include/prism/parser.h +++ b/include/prism/parser.h @@ -139,6 +139,17 @@ PRISM_EXPORTED_FUNCTION bool pm_parser_encoding_us_ascii(const pm_parser_t *pars */ PRISM_EXPORTED_FUNCTION const pm_string_t * pm_parser_filepath(const pm_parser_t *parser) PRISM_NONNULL(1); +/** + * Find a constant in the parser's constant pool. Returns the id of the + * constant, or 0 if the constant is not found. + * + * @param parser the parser whose constant pool we want to search + * @param start a pointer to the start of the string to search for + * @param length the length of the string to search for + * @returns the id of the constant, or 0 if the constant is not found + */ +PRISM_EXPORTED_FUNCTION pm_constant_id_t pm_parser_constant_find(const pm_parser_t *parser, const uint8_t *start, size_t length) PRISM_NONNULL(1, 2); + /** * Returns the frozen string literal value of the parser, as determined by the * frozen_string_literal magic comment or the option set on the parser. diff --git a/src/parser.c b/src/parser.c index 9743691306..415cd31984 100644 --- a/src/parser.c +++ b/src/parser.c @@ -94,6 +94,15 @@ pm_parser_filepath(const pm_parser_t *parser) { return &parser->filepath; } +/** + * Find a constant in the parser's constant pool. Returns the id of the + * constant, or 0 if the constant is not found. + */ +pm_constant_id_t +pm_parser_constant_find(const pm_parser_t *parser, const uint8_t *start, size_t length) { + return pm_constant_pool_find(&parser->constant_pool, start, length); +} + /** * Returns the frozen string literal value of the parser. */ From a52c48186e626192124392cad83a61573bdf3393 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Thu, 19 Mar 2026 10:04:09 -0400 Subject: [PATCH 092/100] Also expose pm_constant_id_list_init, pm_constant_id_list_append, and pm_string_owned_init --- include/prism/constant_pool.h | 18 ++++++++++++++++++ include/prism/internal/constant_pool.h | 6 ------ include/prism/internal/strings.h | 5 ----- include/prism/strings.h | 9 +++++++++ 4 files changed, 27 insertions(+), 11 deletions(-) diff --git a/include/prism/constant_pool.h b/include/prism/constant_pool.h index 7868c584a7..dc03235c70 100644 --- a/include/prism/constant_pool.h +++ b/include/prism/constant_pool.h @@ -14,6 +14,8 @@ #include "prism/compiler/nodiscard.h" #include "prism/compiler/nonnull.h" +#include "prism/arena.h" + #include #include @@ -60,4 +62,20 @@ PRISM_EXPORTED_FUNCTION const uint8_t * pm_constant_start(const pm_constant_t *c */ PRISM_EXPORTED_FUNCTION size_t pm_constant_length(const pm_constant_t *constant) PRISM_NONNULL(1); +/** + * Initialize a list of constant ids. + * + * @param list The list to initialize. + */ +PRISM_EXPORTED_FUNCTION void pm_constant_id_list_init(pm_constant_id_list_t *list) PRISM_NONNULL(1); + +/** + * Append a constant id to a list of constant ids. + * + * @param arena The arena to use for allocations. + * @param list The list to append to. + * @param id The constant id to append. + */ +PRISM_EXPORTED_FUNCTION void pm_constant_id_list_append(pm_arena_t *arena, pm_constant_id_list_t *list, pm_constant_id_t id) PRISM_NONNULL(1, 2); + #endif diff --git a/include/prism/internal/constant_pool.h b/include/prism/internal/constant_pool.h index 7ca265d594..fa2be783f5 100644 --- a/include/prism/internal/constant_pool.h +++ b/include/prism/internal/constant_pool.h @@ -74,15 +74,9 @@ struct pm_constant_pool_t { */ #define PM_CONSTANT_ID_UNSET 0 -/* Initialize a list of constant ids. */ -void pm_constant_id_list_init(pm_constant_id_list_t *list); - /* Initialize a list of constant ids with a given capacity. */ void pm_constant_id_list_init_capacity(pm_arena_t *arena, pm_constant_id_list_t *list, size_t capacity); -/* Append a constant id to a list of constant ids. */ -void pm_constant_id_list_append(pm_arena_t *arena, pm_constant_id_list_t *list, pm_constant_id_t id); - /* Insert a constant id into a list of constant ids at the specified index. */ void pm_constant_id_list_insert(pm_constant_id_list_t *list, size_t index, pm_constant_id_t id); diff --git a/include/prism/internal/strings.h b/include/prism/internal/strings.h index 0199ed8d67..573eade290 100644 --- a/include/prism/internal/strings.h +++ b/include/prism/internal/strings.h @@ -14,11 +14,6 @@ */ void pm_string_shared_init(pm_string_t *string, const uint8_t *start, const uint8_t *end); -/* - * Initialize an owned string that is responsible for freeing allocated memory. - */ -void pm_string_owned_init(pm_string_t *string, uint8_t *source, size_t length); - /* * Ensure the string is owned. If it is not, then reinitialize it as owned and * copy over the previous source. diff --git a/include/prism/strings.h b/include/prism/strings.h index 55059eb307..ac2dd15d1e 100644 --- a/include/prism/strings.h +++ b/include/prism/strings.h @@ -58,6 +58,15 @@ PRISM_EXPORTED_FUNCTION size_t pm_string_sizeof(void); */ PRISM_EXPORTED_FUNCTION void pm_string_constant_init(pm_string_t *string, const char *source, size_t length) PRISM_NONNULL(1); +/** + * Initialize an owned string that is responsible for freeing allocated memory. + * + * @param string The string to initialize. + * @param source The source of the string. + * @param length The length of the string. + */ +PRISM_EXPORTED_FUNCTION void pm_string_owned_init(pm_string_t *string, uint8_t *source, size_t length) PRISM_NONNULL(1, 2); + /** * Represents the result of calling pm_string_mapped_init or * pm_string_file_init. We need this additional information because there is From f50c25b5c12ba1cb63784226f0a1b6867e93fdc7 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Thu, 19 Mar 2026 13:11:55 -0400 Subject: [PATCH 093/100] Introduce pm_source_t --- ext/prism/extension.c | 174 ++++---- include/prism.h | 1 + include/prism/internal/source.h | 72 ++++ include/prism/internal/strings.h | 11 +- include/prism/serialize.h | 11 +- include/prism/source.h | 138 +++++++ include/prism/stream.h | 23 +- include/prism/strings.h | 75 +--- lib/prism/ffi.rb | 104 ++--- rust/ruby-prism-sys/build/main.rs | 1 - rust/ruby-prism-sys/tests/utils_tests.rs | 92 +---- src/options.c | 1 + src/prism.c | 71 +--- src/source.c | 483 +++++++++++++++++++++++ src/strings.c | 311 +-------------- 15 files changed, 849 insertions(+), 719 deletions(-) create mode 100644 include/prism/internal/source.h create mode 100644 include/prism/source.h create mode 100644 src/source.c diff --git a/ext/prism/extension.c b/ext/prism/extension.c index 12bd1c4b98..a3ff4a1632 100644 --- a/ext/prism/extension.c +++ b/ext/prism/extension.c @@ -66,18 +66,6 @@ check_string(VALUE value) { return RSTRING_PTR(value); } -/** - * Load the contents and size of the given string into the given pm_string_t. - */ -static void -input_load_string(pm_string_t *input, VALUE string) { - // Check if the string is a string. If it's not, then raise a type error. - if (!RB_TYPE_P(string, T_STRING)) { - rb_raise(rb_eTypeError, "wrong argument type %" PRIsVALUE " (expected String)", rb_obj_class(string)); - } - - pm_string_constant_init(input, RSTRING_PTR(string), RSTRING_LEN(string)); -} /******************************************************************************/ /* Building C options from Ruby options */ @@ -306,8 +294,8 @@ extract_options(pm_options_t *options, VALUE filepath, VALUE keywords) { /** * Read options for methods that look like (source, **options). */ -static void -string_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options) { +static VALUE +string_options(int argc, VALUE *argv, pm_options_t *options) { VALUE string; VALUE keywords; rb_scan_args(argc, argv, "1:", &string, &keywords); @@ -318,14 +306,14 @@ string_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options) } extract_options(options, Qnil, keywords); - input_load_string(input, string); + return string; } /** * Read options for methods that look like (filepath, **options). */ -static void -file_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options, VALUE *encoded_filepath) { +static pm_source_t * +file_options(int argc, VALUE *argv, pm_options_t *options, VALUE *encoded_filepath) { VALUE filepath; VALUE keywords; rb_scan_args(argc, argv, "1:", &filepath, &keywords); @@ -339,12 +327,13 @@ file_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options, V extract_options(options, *encoded_filepath, keywords); const char *source = (const char *) pm_string_source(pm_options_filepath(options)); - pm_string_init_result_t result; + pm_source_init_result_t result; + pm_source_t *pm_src = pm_source_file_new(source, &result); - switch (result = pm_string_file_init(input, source)) { - case PM_STRING_INIT_SUCCESS: + switch (result) { + case PM_SOURCE_INIT_SUCCESS: break; - case PM_STRING_INIT_ERROR_GENERIC: { + case PM_SOURCE_INIT_ERROR_GENERIC: { pm_options_free(options); #ifdef _WIN32 @@ -356,7 +345,7 @@ file_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options, V rb_syserr_fail(e, source); break; } - case PM_STRING_INIT_ERROR_DIRECTORY: + case PM_SOURCE_INIT_ERROR_DIRECTORY: pm_options_free(options); rb_syserr_fail(EISDIR, source); break; @@ -365,6 +354,8 @@ file_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options, V rb_raise(rb_eRuntimeError, "Unknown error (%d) initializing file: %s", result, source); break; } + + return pm_src; } #ifndef PRISM_EXCLUDE_SERIALIZATION @@ -377,14 +368,14 @@ file_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options, V * Dump the AST corresponding to the given input to a string. */ static VALUE -dump_input(pm_string_t *input, const pm_options_t *options) { +dump_input(const uint8_t *input, size_t input_length, const pm_options_t *options) { pm_buffer_t *buffer = pm_buffer_new(); if (!buffer) { rb_raise(rb_eNoMemError, "failed to allocate memory"); } pm_arena_t *arena = pm_arena_new(); - pm_parser_t *parser = pm_parser_new(arena, pm_string_source(input), pm_string_length(input), options); + pm_parser_t *parser = pm_parser_new(arena, input, input_length, options); pm_node_t *node = pm_parse(parser); pm_serialize(parser, node, buffer); @@ -407,18 +398,19 @@ dump_input(pm_string_t *input, const pm_options_t *options) { */ static VALUE dump(int argc, VALUE *argv, VALUE self) { - pm_string_t input; pm_options_t *options = pm_options_new(); - string_options(argc, argv, &input, options); + VALUE string = string_options(argc, argv, options); + + const uint8_t *source = (const uint8_t *) RSTRING_PTR(string); + size_t length = RSTRING_LEN(string); #ifdef PRISM_BUILD_DEBUG - size_t length = pm_string_length(&input); char* dup = xmalloc(length); - memcpy(dup, pm_string_source(&input), length); - pm_string_constant_init(&input, dup, length); + memcpy(dup, source, length); + source = (const uint8_t *) dup; #endif - VALUE value = dump_input(&input, options); + VALUE value = dump_input(source, length, options); if (pm_options_freeze(options)) rb_obj_freeze(value); #ifdef PRISM_BUILD_DEBUG @@ -429,7 +421,6 @@ dump(int argc, VALUE *argv, VALUE self) { #endif #endif - pm_string_cleanup(&input); pm_options_free(options); return value; @@ -445,14 +436,13 @@ dump(int argc, VALUE *argv, VALUE self) { */ static VALUE dump_file(int argc, VALUE *argv, VALUE self) { - pm_string_t input; pm_options_t *options = pm_options_new(); VALUE encoded_filepath; - file_options(argc, argv, &input, options, &encoded_filepath); + pm_source_t *src = file_options(argc, argv, options, &encoded_filepath); - VALUE value = dump_input(&input, options); - pm_string_cleanup(&input); + VALUE value = dump_input(pm_source_source(src), pm_source_length(src), options); + pm_source_free(src); pm_options_free(options); return value; @@ -791,12 +781,12 @@ parse_lex_encoding_changed_callback(pm_parser_t *parser) { * the nodes and tokens. */ static VALUE -parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nodes) { +parse_lex_input(const uint8_t *input, size_t input_length, const pm_options_t *options, bool return_nodes) { pm_arena_t *arena = pm_arena_new(); - pm_parser_t *parser = pm_parser_new(arena, pm_string_source(input), pm_string_length(input), options); + pm_parser_t *parser = pm_parser_new(arena, input, input_length, options); pm_parser_encoding_changed_callback_set(parser, parse_lex_encoding_changed_callback); - VALUE source_string = rb_str_new((const char *) pm_string_source(input), pm_string_length(input)); + VALUE source_string = rb_str_new((const char *) input, input_length); VALUE offsets = rb_ary_new_capa(pm_parser_line_offsets(parser)->size); VALUE source = rb_funcall(rb_cPrismSource, rb_id_source_for, 3, source_string, LONG2NUM(pm_parser_start_line(parser)), offsets); @@ -858,12 +848,10 @@ parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nod */ static VALUE lex(int argc, VALUE *argv, VALUE self) { - pm_string_t input; pm_options_t *options = pm_options_new(); - string_options(argc, argv, &input, options); + VALUE string = string_options(argc, argv, options); - VALUE result = parse_lex_input(&input, options, false); - pm_string_cleanup(&input); + VALUE result = parse_lex_input((const uint8_t *) RSTRING_PTR(string), RSTRING_LEN(string), options, false); pm_options_free(options); return result; @@ -879,14 +867,13 @@ lex(int argc, VALUE *argv, VALUE self) { */ static VALUE lex_file(int argc, VALUE *argv, VALUE self) { - pm_string_t input; pm_options_t *options = pm_options_new(); VALUE encoded_filepath; - file_options(argc, argv, &input, options, &encoded_filepath); + pm_source_t *src = file_options(argc, argv, options, &encoded_filepath); - VALUE value = parse_lex_input(&input, options, false); - pm_string_cleanup(&input); + VALUE value = parse_lex_input(pm_source_source(src), pm_source_length(src), options, false); + pm_source_free(src); pm_options_free(options); return value; @@ -900,9 +887,9 @@ lex_file(int argc, VALUE *argv, VALUE self) { * Parse the given input and return a ParseResult instance. */ static VALUE -parse_input(pm_string_t *input, const pm_options_t *options) { +parse_input(const uint8_t *input, size_t input_length, const pm_options_t *options) { pm_arena_t *arena = pm_arena_new(); - pm_parser_t *parser = pm_parser_new(arena, pm_string_source(input), pm_string_length(input), options); + pm_parser_t *parser = pm_parser_new(arena, input, input_length, options); pm_node_t *node = pm_parse(parser); rb_encoding *encoding = rb_enc_find(pm_parser_encoding_name(parser)); @@ -969,18 +956,19 @@ parse_input(pm_string_t *input, const pm_options_t *options) { */ static VALUE parse(int argc, VALUE *argv, VALUE self) { - pm_string_t input; pm_options_t *options = pm_options_new(); - string_options(argc, argv, &input, options); + VALUE string = string_options(argc, argv, options); + + const uint8_t *source = (const uint8_t *) RSTRING_PTR(string); + size_t length = RSTRING_LEN(string); #ifdef PRISM_BUILD_DEBUG - size_t length = pm_string_length(&input); char* dup = xmalloc(length); - memcpy(dup, pm_string_source(&input), length); - pm_string_constant_init(&input, dup, length); + memcpy(dup, source, length); + source = (const uint8_t *) dup; #endif - VALUE value = parse_input(&input, options); + VALUE value = parse_input(source, length, options); #ifdef PRISM_BUILD_DEBUG #ifdef xfree_sized @@ -990,7 +978,6 @@ parse(int argc, VALUE *argv, VALUE self) { #endif #endif - pm_string_cleanup(&input); pm_options_free(options); return value; } @@ -1005,14 +992,13 @@ parse(int argc, VALUE *argv, VALUE self) { */ static VALUE parse_file(int argc, VALUE *argv, VALUE self) { - pm_string_t input; pm_options_t *options = pm_options_new(); VALUE encoded_filepath; - file_options(argc, argv, &input, options, &encoded_filepath); + pm_source_t *src = file_options(argc, argv, options, &encoded_filepath); - VALUE value = parse_input(&input, options); - pm_string_cleanup(&input); + VALUE value = parse_input(pm_source_source(src), pm_source_length(src), options); + pm_source_free(src); pm_options_free(options); return value; @@ -1022,9 +1008,9 @@ parse_file(int argc, VALUE *argv, VALUE self) { * Parse the given input and return nothing. */ static void -profile_input(pm_string_t *input, const pm_options_t *options) { +profile_input(const uint8_t *input, size_t input_length, const pm_options_t *options) { pm_arena_t *arena = pm_arena_new(); - pm_parser_t *parser = pm_parser_new(arena, pm_string_source(input), pm_string_length(input), options); + pm_parser_t *parser = pm_parser_new(arena, input, input_length, options); pm_parse(parser); pm_parser_free(parser); @@ -1042,12 +1028,10 @@ profile_input(pm_string_t *input, const pm_options_t *options) { */ static VALUE profile(int argc, VALUE *argv, VALUE self) { - pm_string_t input; pm_options_t *options = pm_options_new(); + VALUE string = string_options(argc, argv, options); - string_options(argc, argv, &input, options); - profile_input(&input, options); - pm_string_cleanup(&input); + profile_input((const uint8_t *) RSTRING_PTR(string), RSTRING_LEN(string), options); pm_options_free(options); return Qnil; @@ -1064,14 +1048,13 @@ profile(int argc, VALUE *argv, VALUE self) { */ static VALUE profile_file(int argc, VALUE *argv, VALUE self) { - pm_string_t input; pm_options_t *options = pm_options_new(); VALUE encoded_filepath; - file_options(argc, argv, &input, options, &encoded_filepath); + pm_source_t *src = file_options(argc, argv, options, &encoded_filepath); - profile_input(&input, options); - pm_string_cleanup(&input); + profile_input(pm_source_source(src), pm_source_length(src), options); + pm_source_free(src); pm_options_free(options); return Qnil; @@ -1123,18 +1106,18 @@ parse_stream(int argc, VALUE *argv, VALUE self) { pm_options_t *options = pm_options_new(); extract_options(options, Qnil, keywords); + pm_source_t *src = pm_source_stream_new((void *) stream, parse_stream_fgets, parse_stream_eof); pm_arena_t *arena = pm_arena_new(); pm_parser_t *parser; - pm_buffer_t *buffer = pm_buffer_new(); - pm_node_t *node = pm_parse_stream(&parser, arena, buffer, (void *) stream, parse_stream_fgets, parse_stream_eof, options); + pm_node_t *node = pm_parse_stream(&parser, arena, src, options); rb_encoding *encoding = rb_enc_find(pm_parser_encoding_name(parser)); VALUE source = pm_source_new(parser, encoding, pm_options_freeze(options)); VALUE value = pm_ast_new(parser, node, encoding, source, pm_options_freeze(options)); VALUE result = parse_result_create(rb_cPrismParseResult, parser, value, encoding, source, pm_options_freeze(options)); - pm_buffer_free(buffer); + pm_source_free(src); pm_parser_free(parser); pm_arena_free(arena); pm_options_free(options); @@ -1146,9 +1129,9 @@ parse_stream(int argc, VALUE *argv, VALUE self) { * Parse the given input and return an array of Comment objects. */ static VALUE -parse_input_comments(pm_string_t *input, const pm_options_t *options) { +parse_input_comments(const uint8_t *input, size_t input_length, const pm_options_t *options) { pm_arena_t *arena = pm_arena_new(); - pm_parser_t *parser = pm_parser_new(arena, pm_string_source(input), pm_string_length(input), options); + pm_parser_t *parser = pm_parser_new(arena, input, input_length, options); pm_parse(parser); rb_encoding *encoding = rb_enc_find(pm_parser_encoding_name(parser)); @@ -1172,12 +1155,10 @@ parse_input_comments(pm_string_t *input, const pm_options_t *options) { */ static VALUE parse_comments(int argc, VALUE *argv, VALUE self) { - pm_string_t input; pm_options_t *options = pm_options_new(); - string_options(argc, argv, &input, options); + VALUE string = string_options(argc, argv, options); - VALUE result = parse_input_comments(&input, options); - pm_string_cleanup(&input); + VALUE result = parse_input_comments((const uint8_t *) RSTRING_PTR(string), RSTRING_LEN(string), options); pm_options_free(options); return result; @@ -1193,14 +1174,13 @@ parse_comments(int argc, VALUE *argv, VALUE self) { */ static VALUE parse_file_comments(int argc, VALUE *argv, VALUE self) { - pm_string_t input; pm_options_t *options = pm_options_new(); VALUE encoded_filepath; - file_options(argc, argv, &input, options, &encoded_filepath); + pm_source_t *src = file_options(argc, argv, options, &encoded_filepath); - VALUE value = parse_input_comments(&input, options); - pm_string_cleanup(&input); + VALUE value = parse_input_comments(pm_source_source(src), pm_source_length(src), options); + pm_source_free(src); pm_options_free(options); return value; @@ -1223,12 +1203,10 @@ parse_file_comments(int argc, VALUE *argv, VALUE self) { */ static VALUE parse_lex(int argc, VALUE *argv, VALUE self) { - pm_string_t input; pm_options_t *options = pm_options_new(); - string_options(argc, argv, &input, options); + VALUE string = string_options(argc, argv, options); - VALUE value = parse_lex_input(&input, options, true); - pm_string_cleanup(&input); + VALUE value = parse_lex_input((const uint8_t *) RSTRING_PTR(string), RSTRING_LEN(string), options, true); pm_options_free(options); return value; @@ -1251,14 +1229,13 @@ parse_lex(int argc, VALUE *argv, VALUE self) { */ static VALUE parse_lex_file(int argc, VALUE *argv, VALUE self) { - pm_string_t input; pm_options_t *options = pm_options_new(); VALUE encoded_filepath; - file_options(argc, argv, &input, options, &encoded_filepath); + pm_source_t *src = file_options(argc, argv, options, &encoded_filepath); - VALUE value = parse_lex_input(&input, options, true); - pm_string_cleanup(&input); + VALUE value = parse_lex_input(pm_source_source(src), pm_source_length(src), options, true); + pm_source_free(src); pm_options_free(options); return value; @@ -1268,9 +1245,9 @@ parse_lex_file(int argc, VALUE *argv, VALUE self) { * Parse the given input and return true if it parses without errors. */ static VALUE -parse_input_success_p(pm_string_t *input, const pm_options_t *options) { +parse_input_success_p(const uint8_t *input, size_t input_length, const pm_options_t *options) { pm_arena_t *arena = pm_arena_new(); - pm_parser_t *parser = pm_parser_new(arena, pm_string_source(input), pm_string_length(input), options); + pm_parser_t *parser = pm_parser_new(arena, input, input_length, options); pm_parse(parser); @@ -1291,12 +1268,10 @@ parse_input_success_p(pm_string_t *input, const pm_options_t *options) { */ static VALUE parse_success_p(int argc, VALUE *argv, VALUE self) { - pm_string_t input; pm_options_t *options = pm_options_new(); - string_options(argc, argv, &input, options); + VALUE string = string_options(argc, argv, options); - VALUE result = parse_input_success_p(&input, options); - pm_string_cleanup(&input); + VALUE result = parse_input_success_p((const uint8_t *) RSTRING_PTR(string), RSTRING_LEN(string), options); pm_options_free(options); return result; @@ -1325,14 +1300,13 @@ parse_failure_p(int argc, VALUE *argv, VALUE self) { */ static VALUE parse_file_success_p(int argc, VALUE *argv, VALUE self) { - pm_string_t input; pm_options_t *options = pm_options_new(); VALUE encoded_filepath; - file_options(argc, argv, &input, options, &encoded_filepath); + pm_source_t *src = file_options(argc, argv, options, &encoded_filepath); - VALUE result = parse_input_success_p(&input, options); - pm_string_cleanup(&input); + VALUE result = parse_input_success_p(pm_source_source(src), pm_source_length(src), options); + pm_source_free(src); pm_options_free(options); return result; diff --git a/include/prism.h b/include/prism.h index d082e2f655..b342bb32c6 100644 --- a/include/prism.h +++ b/include/prism.h @@ -20,6 +20,7 @@ extern "C" { #include "prism/parser.h" #include "prism/prettyprint.h" #include "prism/serialize.h" +#include "prism/source.h" #include "prism/stream.h" #include "prism/string_query.h" #include "prism/version.h" diff --git a/include/prism/internal/source.h b/include/prism/internal/source.h new file mode 100644 index 0000000000..b3c2b55be3 --- /dev/null +++ b/include/prism/internal/source.h @@ -0,0 +1,72 @@ +#ifndef PRISM_INTERNAL_SOURCE_H +#define PRISM_INTERNAL_SOURCE_H + +#include "prism/source.h" +#include "prism/buffer.h" + +#include + +/* + * The type of source, which determines cleanup behavior. + */ +typedef enum { + /* Wraps existing constant memory, no cleanup. */ + PM_SOURCE_CONSTANT, + + /* Wraps existing shared memory (non-owning slice), no cleanup. */ + PM_SOURCE_SHARED, + + /* Owns a heap-allocated buffer, freed on cleanup. */ + PM_SOURCE_OWNED, + + /* Memory-mapped file, unmapped on cleanup. */ + PM_SOURCE_MAPPED, + + /* Stream source backed by a pm_buffer_t. */ + PM_SOURCE_STREAM +} pm_source_type_t; + +/* + * The internal representation of a source. + */ +struct pm_source_t { + /* A pointer to the start of the source data. */ + const uint8_t *source; + + /* The length of the source data in bytes. */ + size_t length; + + /* The type of the source. */ + pm_source_type_t type; + + /* Stream-specific data, only used for PM_SOURCE_STREAM sources. */ + struct { + /* The buffer that holds the accumulated stream data. */ + pm_buffer_t *buffer; + + /* The stream object to read from. */ + void *stream; + + /* The function to use to read from the stream. */ + pm_source_stream_fgets_t *fgets; + + /* The function to use to check if the stream is at EOF. */ + pm_source_stream_feof_t *feof; + + /* Whether the stream has reached EOF. */ + bool eof; + } stream; +}; + +/* + * Read from a stream into the source's internal buffer. This is used by + * pm_parse_stream to incrementally read the source. + */ +bool pm_source_stream_read(pm_source_t *source); + +/* + * Returns whether the stream source has reached EOF. + */ +bool pm_source_stream_eof(const pm_source_t *source); + +#endif diff --git a/include/prism/internal/strings.h b/include/prism/internal/strings.h index 573eade290..71b02020e3 100644 --- a/include/prism/internal/strings.h +++ b/include/prism/internal/strings.h @@ -14,12 +14,6 @@ */ void pm_string_shared_init(pm_string_t *string, const uint8_t *start, const uint8_t *end); -/* - * Ensure the string is owned. If it is not, then reinitialize it as owned and - * copy over the previous source. - */ -void pm_string_ensure_owned(pm_string_t *string); - /* * Compare the underlying lengths and bytes of two strings. Returns 0 if the * strings are equal, a negative number if the left string is less than the @@ -28,4 +22,9 @@ void pm_string_ensure_owned(pm_string_t *string); */ int pm_string_compare(const pm_string_t *left, const pm_string_t *right); +/* + * Free the associated memory of the given string. + */ +void pm_string_cleanup(pm_string_t *string); + #endif diff --git a/include/prism/serialize.h b/include/prism/serialize.h index dba54d75f1..786a1514bc 100644 --- a/include/prism/serialize.h +++ b/include/prism/serialize.h @@ -18,6 +18,7 @@ #include "prism/buffer.h" #include "prism/parser.h" +#include "prism/source.h" #include "prism/stream.h" /** @@ -40,16 +41,14 @@ PRISM_EXPORTED_FUNCTION void pm_serialize(pm_parser_t *parser, pm_node_t *node, PRISM_EXPORTED_FUNCTION void pm_serialize_parse(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) PRISM_NONNULL(1, 2); /** - * Parse and serialize the AST represented by the source that is read out of the - * given stream into to the given buffer. + * Parse and serialize the AST represented by the given source into the given + * buffer. * * @param buffer The buffer to serialize to. - * @param stream The stream to parse. - * @param stream_fgets The function to use to read from the stream. - * @param stream_feof The function to use to tell if the stream has hit eof. + * @param source The source to parse. * @param data The optional data to pass to the parser. */ -PRISM_EXPORTED_FUNCTION void pm_serialize_parse_stream(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const char *data) PRISM_NONNULL(1, 2); +PRISM_EXPORTED_FUNCTION void pm_serialize_parse_stream(pm_buffer_t *buffer, pm_source_t *source, const char *data) PRISM_NONNULL(1, 2); /** * Parse and serialize the comments in the given source to the given buffer. diff --git a/include/prism/source.h b/include/prism/source.h new file mode 100644 index 0000000000..897ce4b880 --- /dev/null +++ b/include/prism/source.h @@ -0,0 +1,138 @@ +/** + * @file source.h + * + * An opaque type representing the source code being parsed, regardless of + * origin (constant memory, file, memory-mapped file, or stream). + */ +#ifndef PRISM_SOURCE_H +#define PRISM_SOURCE_H + +#include "prism/compiler/exported.h" +#include "prism/compiler/filesystem.h" +#include "prism/compiler/nodiscard.h" +#include "prism/compiler/nonnull.h" + +#include +#include + +/** + * An opaque type representing source code being parsed. + */ +typedef struct pm_source_t pm_source_t; + +/** + * This function is used to retrieve a line of input from a stream. It closely + * mirrors that of fgets so that fgets can be used as the default + * implementation. + */ +typedef char * (pm_source_stream_fgets_t)(char *string, int size, void *stream); + +/** + * This function is used to check whether a stream is at EOF. It closely mirrors + * that of feof so that feof can be used as the default implementation. + */ +typedef int (pm_source_stream_feof_t)(void *stream); + +/** + * Represents the result of initializing a source from a file. + */ +typedef enum { + /** Indicates that the source was successfully initialized. */ + PM_SOURCE_INIT_SUCCESS = 0, + + /** + * Indicates a generic error from a source init function, where the type + * of error should be read from `errno` or `GetLastError()`. + */ + PM_SOURCE_INIT_ERROR_GENERIC = 1, + + /** + * Indicates that the file that was attempted to be opened was a directory. + */ + PM_SOURCE_INIT_ERROR_DIRECTORY = 2, + + /** + * Indicates that the file is not a regular file (e.g. a pipe or character + * device) and the caller should handle reading it. + */ + PM_SOURCE_INIT_ERROR_NON_REGULAR = 3 +} pm_source_init_result_t; + +/** + * Create a new source that wraps existing constant memory. The memory is not + * owned and will not be freed. + * + * @param data The pointer to the source data. + * @param length The length of the source data in bytes. + * @returns A new source, or NULL on allocation failure. + */ +PRISM_EXPORTED_FUNCTION pm_source_t * pm_source_constant_new(const uint8_t *data, size_t length) PRISM_NODISCARD; + +/** + * Create a new source that wraps existing shared memory. The memory is not + * owned and will not be freed. Semantically a "slice" of another source. + * + * @param data The pointer to the source data. + * @param length The length of the source data in bytes. + * @returns A new source, or NULL on allocation failure. + */ +PRISM_EXPORTED_FUNCTION pm_source_t * pm_source_shared_new(const uint8_t *data, size_t length) PRISM_NODISCARD; + +/** + * Create a new source by reading a file into a heap-allocated buffer. + * + * @param filepath The path to the file to read. + * @param result Out parameter for the result of the initialization. + * @returns A new source, or NULL on error (with result written to out param). + */ +PRISM_EXPORTED_FUNCTION pm_source_t * pm_source_file_new(const char *filepath, pm_source_init_result_t *result) PRISM_NODISCARD PRISM_NONNULL(1, 2); + +/** + * Create a new source by memory-mapping a file. Falls back to file reading on + * platforms without mmap support. + * + * If the file is a non-regular file (e.g. a pipe or character device), + * PM_SOURCE_INIT_ERROR_NON_REGULAR is returned, allowing the caller to handle + * it appropriately (e.g. by reading it through their own I/O layer). + * + * @param filepath The path to the file to read. + * @param open_flags Additional flags to pass to open(2) (e.g. O_NONBLOCK). + * @param result Out parameter for the result of the initialization. + * @returns A new source, or NULL on error (with result written to out param). + */ +PRISM_EXPORTED_FUNCTION pm_source_t * pm_source_mapped_new(const char *filepath, int open_flags, pm_source_init_result_t *result) PRISM_NODISCARD PRISM_NONNULL(1, 3); + +/** + * Create a new source by reading from a stream using the provided callbacks. + * + * @param stream The stream to read from. + * @param fgets The function to use to read from the stream. + * @param feof The function to use to check if the stream is at EOF. + * @returns A new source, or NULL on allocation failure. + */ +PRISM_EXPORTED_FUNCTION pm_source_t * pm_source_stream_new(void *stream, pm_source_stream_fgets_t *fgets, pm_source_stream_feof_t *feof) PRISM_NODISCARD; + +/** + * Free the given source and any memory it owns. + * + * @param source The source to free. + */ +PRISM_EXPORTED_FUNCTION void pm_source_free(pm_source_t *source) PRISM_NONNULL(1); + +/** + * Returns the length of the source data in bytes. + * + * @param source The source to get the length of. + * @returns The length of the source data. + */ +PRISM_EXPORTED_FUNCTION size_t pm_source_length(const pm_source_t *source) PRISM_NONNULL(1); + +/** + * Returns a pointer to the source data. + * + * @param source The source to get the data of. + * @returns A pointer to the source data. + */ +PRISM_EXPORTED_FUNCTION const uint8_t * pm_source_source(const pm_source_t *source) PRISM_NONNULL(1); + +#endif diff --git a/include/prism/stream.h b/include/prism/stream.h index 7bb4271255..678322b442 100644 --- a/include/prism/stream.h +++ b/include/prism/stream.h @@ -10,36 +10,19 @@ #include "prism/compiler/nonnull.h" #include "prism/arena.h" -#include "prism/buffer.h" #include "prism/options.h" #include "prism/parser.h" - -/** - * This function is used in pm_parse_stream() to retrieve a line of input from a - * stream. It closely mirrors that of fgets so that fgets can be used as the - * default implementation. - */ -typedef char * (pm_parse_stream_fgets_t)(char *string, int size, void *stream); - -/** - * This function is used in pm_parse_stream to check whether a stream is EOF. - * It closely mirrors that of feof so that feof can be used as the - * default implementation. - */ -typedef int (pm_parse_stream_feof_t)(void *stream); +#include "prism/source.h" /** * Parse a stream of Ruby source and return the tree. * * @param parser The out parameter to write the parser to. * @param arena The arena to use for all AST-lifetime allocations. - * @param buffer The buffer to use. - * @param stream The stream to parse. - * @param stream_fgets The function to use to read from the stream. - * @param stream_feof The function to use to determine if the stream has hit eof. + * @param source The source to use, created via pm_source_stream_new. * @param options The optional options to use when parsing. * @returns The AST representing the source. */ -PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse_stream(pm_parser_t **parser, pm_arena_t *arena, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const pm_options_t *options) PRISM_NONNULL(1, 2, 3); +PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse_stream(pm_parser_t **parser, pm_arena_t *arena, pm_source_t *source, const pm_options_t *options) PRISM_NONNULL(1, 2, 3); #endif diff --git a/include/prism/strings.h b/include/prism/strings.h index ac2dd15d1e..c2f120d07a 100644 --- a/include/prism/strings.h +++ b/include/prism/strings.h @@ -7,7 +7,6 @@ #define PRISM_STRINGS_H #include "prism/compiler/exported.h" -#include "prism/compiler/filesystem.h" #include "prism/compiler/nonnull.h" #include @@ -31,24 +30,11 @@ typedef struct { /** This is a slice of another string, and should not be freed. */ PM_STRING_SHARED, - /** This string owns its memory, and should be freed using `pm_string_cleanup()`. */ - PM_STRING_OWNED, - -#ifdef PRISM_HAS_MMAP - /** This string is a memory-mapped file, and should be freed using `pm_string_cleanup()`. */ - PM_STRING_MAPPED -#endif + /** This string owns its memory, and should be freed internally. */ + PM_STRING_OWNED } type; } pm_string_t; -/** - * Returns the size of the pm_string_t struct. This is necessary to allocate the - * correct amount of memory in the FFI backend. - * - * @returns The size of the pm_string_t struct. - */ -PRISM_EXPORTED_FUNCTION size_t pm_string_sizeof(void); - /** * Initialize a constant string that doesn't own its memory source. * @@ -67,56 +53,6 @@ PRISM_EXPORTED_FUNCTION void pm_string_constant_init(pm_string_t *string, const */ PRISM_EXPORTED_FUNCTION void pm_string_owned_init(pm_string_t *string, uint8_t *source, size_t length) PRISM_NONNULL(1, 2); -/** - * Represents the result of calling pm_string_mapped_init or - * pm_string_file_init. We need this additional information because there is - * not a platform-agnostic way to indicate that the file that was attempted to - * be opened was a directory. - */ -typedef enum { - /** Indicates that the string was successfully initialized. */ - PM_STRING_INIT_SUCCESS = 0, - - /** - * Indicates a generic error from a string_*_init function, where the type - * of error should be read from `errno` or `GetLastError()`. - */ - PM_STRING_INIT_ERROR_GENERIC = 1, - - /** - * Indicates that the file that was attempted to be opened was a directory. - */ - PM_STRING_INIT_ERROR_DIRECTORY = 2 -} pm_string_init_result_t; - -/** - * Read the file indicated by the filepath parameter into source and load its - * contents and size into the given `pm_string_t`. The given `pm_string_t` - * should be freed using `pm_string_cleanup` when it is no longer used. - * - * We want to use demand paging as much as possible in order to avoid having to - * read the entire file into memory (which could be detrimental to performance - * for large files). This means that if we're on windows we'll use - * `MapViewOfFile`, on POSIX systems that have access to `mmap` we'll use - * `mmap`, and on other POSIX systems we'll use `read`. - * - * @param string The string to initialize. - * @param filepath The filepath to read. - * @returns The success of the read, indicated by the value of the enum. - */ -PRISM_EXPORTED_FUNCTION pm_string_init_result_t pm_string_mapped_init(pm_string_t *string, const char *filepath) PRISM_NONNULL(1, 2); - -/** - * Read the file indicated by the filepath parameter into source and load its - * contents and size into the given `pm_string_t`. The given `pm_string_t` - * should be freed using `pm_string_cleanup` when it is no longer used. - * - * @param string The string to initialize. - * @param filepath The filepath to read. - * @returns The success of the read, indicated by the value of the enum. - */ -PRISM_EXPORTED_FUNCTION pm_string_init_result_t pm_string_file_init(pm_string_t *string, const char *filepath) PRISM_NONNULL(1, 2); - /** * Returns the length associated with the string. * @@ -133,11 +69,4 @@ PRISM_EXPORTED_FUNCTION size_t pm_string_length(const pm_string_t *string) PRISM */ PRISM_EXPORTED_FUNCTION const uint8_t * pm_string_source(const pm_string_t *string) PRISM_NONNULL(1); -/** - * Free the associated memory of the given string. - * - * @param string The string to free. - */ -PRISM_EXPORTED_FUNCTION void pm_string_cleanup(pm_string_t *string) PRISM_NONNULL(1); - #endif diff --git a/lib/prism/ffi.rb b/lib/prism/ffi.rb index 5144ba72c4..163dafef32 100644 --- a/lib/prism/ffi.rb +++ b/lib/prism/ffi.rb @@ -88,9 +88,9 @@ def self.load_exported_functions_from(header, *functions, callbacks) raise "Could not find functions #{functions.inspect}" unless functions.empty? end - callback :pm_parse_stream_fgets_t, [:pointer, :int, :pointer], :pointer - callback :pm_parse_stream_feof_t, [:pointer], :int - enum :pm_string_init_result_t, %i[PM_STRING_INIT_SUCCESS PM_STRING_INIT_ERROR_GENERIC PM_STRING_INIT_ERROR_DIRECTORY] + callback :pm_source_stream_fgets_t, [:pointer, :int, :pointer], :pointer + callback :pm_source_stream_feof_t, [:pointer], :int + enum :pm_source_init_result_t, %i[PM_SOURCE_INIT_SUCCESS PM_SOURCE_INIT_ERROR_GENERIC PM_SOURCE_INIT_ERROR_DIRECTORY PM_SOURCE_INIT_ERROR_NON_REGULAR] enum :pm_string_query_t, [:PM_STRING_QUERY_ERROR, -1, :PM_STRING_QUERY_FALSE, :PM_STRING_QUERY_TRUE] load_exported_functions_from( @@ -107,7 +107,7 @@ def self.load_exported_functions_from(header, *functions, callbacks) "pm_serialize_lex", "pm_serialize_parse_lex", "pm_serialize_parse_success_p", - [:pm_parse_stream_fgets_t, :pm_parse_stream_feof_t] + [] ) load_exported_functions_from( @@ -128,13 +128,14 @@ def self.load_exported_functions_from(header, *functions, callbacks) ) load_exported_functions_from( - "prism/strings.h", - "pm_string_mapped_init", - "pm_string_cleanup", - "pm_string_source", - "pm_string_length", - "pm_string_sizeof", - [] + "prism/source.h", + "pm_source_file_new", + "pm_source_mapped_new", + "pm_source_stream_new", + "pm_source_free", + "pm_source_source", + "pm_source_length", + [:pm_source_stream_fgets_t, :pm_source_stream_feof_t] ) # This object represents a pm_buffer_t. We only use it as an opaque pointer, @@ -172,11 +173,9 @@ def self.with end end - # This object represents a pm_string_t. We only use it as an opaque pointer, - # so it doesn't have to be an FFI::Struct. - class PrismString # :nodoc: - SIZEOF = LibRubyParser.pm_string_sizeof - + # This object represents source code to be parsed. For strings it wraps a + # pointer directly; for files it uses a pm_source_t under the hood. + class PrismSource # :nodoc: PLATFORM_EXPECTS_UTF8 = RbConfig::CONFIG["host_os"].match?(/bccwin|cygwin|djgpp|mingw|mswin|wince|darwin/i) @@ -193,7 +192,7 @@ def read @pointer.read_string(@length) end - # Yields a pm_string_t pointer to the given block. + # Yields a PrismSource backed by the given string to the block. def self.with_string(string) raise TypeError unless string.is_a?(String) @@ -207,32 +206,39 @@ def self.with_string(string) end end - # Yields a pm_string_t pointer to the given block. + # Yields a PrismSource to the given block, backed by a pm_source_t. def self.with_file(filepath) raise TypeError unless filepath.is_a?(String) # On Windows and Mac, it's expected that filepaths will be encoded in # UTF-8. If they are not, we need to convert them to UTF-8 before - # passing them into pm_string_mapped_init. + # passing them into pm_source_mapped_new. if PLATFORM_EXPECTS_UTF8 && (encoding = filepath.encoding) != Encoding::ASCII_8BIT && encoding != Encoding::UTF_8 filepath = filepath.encode(Encoding::UTF_8) end - FFI::MemoryPointer.new(SIZEOF) do |pm_string| - case (result = LibRubyParser.pm_string_mapped_init(pm_string, filepath)) - when :PM_STRING_INIT_SUCCESS - pointer = LibRubyParser.pm_string_source(pm_string) - length = LibRubyParser.pm_string_length(pm_string) + FFI::MemoryPointer.new(:int) do |result_ptr| + pm_source = LibRubyParser.pm_source_mapped_new(filepath, 0, result_ptr) + result = LibRubyParser.enum_type(:pm_source_init_result_t)[result_ptr.read_int] + + case result + when :PM_SOURCE_INIT_SUCCESS + pointer = LibRubyParser.pm_source_source(pm_source) + length = LibRubyParser.pm_source_length(pm_source) return yield new(pointer, length, false) - when :PM_STRING_INIT_ERROR_GENERIC + when :PM_SOURCE_INIT_ERROR_GENERIC raise SystemCallError.new(filepath, FFI.errno) - when :PM_STRING_INIT_ERROR_DIRECTORY + when :PM_SOURCE_INIT_ERROR_DIRECTORY raise Errno::EISDIR.new(filepath) + when :PM_SOURCE_INIT_ERROR_NON_REGULAR + # Fall back to reading the file through Ruby IO for non-regular + # files (pipes, character devices, etc.) + return with_string(File.read(filepath)) { |string| yield string } else - raise "Unknown error initializing pm_string_t: #{result.inspect}" + raise "Unknown error initializing pm_source_t: #{result.inspect}" end ensure - LibRubyParser.pm_string_cleanup(pm_string) + LibRubyParser.pm_source_free(pm_source) if pm_source && !pm_source.null? end end end @@ -248,29 +254,29 @@ def self.with_file(filepath) class << self # Mirror the Prism.dump API by using the serialization API. def dump(source, **options) - LibRubyParser::PrismString.with_string(source) { |string| dump_common(string, options) } + LibRubyParser::PrismSource.with_string(source) { |string| dump_common(string, options) } end # Mirror the Prism.dump_file API by using the serialization API. def dump_file(filepath, **options) options[:filepath] = filepath - LibRubyParser::PrismString.with_file(filepath) { |string| dump_common(string, options) } + LibRubyParser::PrismSource.with_file(filepath) { |string| dump_common(string, options) } end # Mirror the Prism.lex API by using the serialization API. def lex(code, **options) - LibRubyParser::PrismString.with_string(code) { |string| lex_common(string, code, options) } + LibRubyParser::PrismSource.with_string(code) { |string| lex_common(string, code, options) } end # Mirror the Prism.lex_file API by using the serialization API. def lex_file(filepath, **options) options[:filepath] = filepath - LibRubyParser::PrismString.with_file(filepath) { |string| lex_common(string, string.read, options) } + LibRubyParser::PrismSource.with_file(filepath) { |string| lex_common(string, string.read, options) } end # Mirror the Prism.parse API by using the serialization API. def parse(code, **options) - LibRubyParser::PrismString.with_string(code) { |string| parse_common(string, code, options) } + LibRubyParser::PrismSource.with_string(code) { |string| parse_common(string, code, options) } end # Mirror the Prism.parse_file API by using the serialization API. This uses @@ -278,7 +284,7 @@ def parse(code, **options) # when it is available. def parse_file(filepath, **options) options[:filepath] = filepath - LibRubyParser::PrismString.with_file(filepath) { |string| parse_common(string, string.read, options) } + LibRubyParser::PrismSource.with_file(filepath) { |string| parse_common(string, string.read, options) } end # Mirror the Prism.parse_stream API by using the serialization API. @@ -296,19 +302,19 @@ def parse_stream(stream, **options) eof_callback = -> (_) { stream.eof? } - # In the pm_serialize_parse_stream function it accepts a pointer to the - # IO object as a void* and then passes it through to the callback as the - # third argument, but it never touches it itself. As such, since we have - # access to the IO object already through the closure of the lambda, we - # can pass a null pointer here and not worry. - LibRubyParser.pm_serialize_parse_stream(buffer.pointer, nil, callback, eof_callback, dump_options(options)) - Prism.load(source, buffer.read, options.fetch(:freeze, false)) + pm_source = LibRubyParser.pm_source_stream_new(nil, callback, eof_callback) + begin + LibRubyParser.pm_serialize_parse_stream(buffer.pointer, pm_source, dump_options(options)) + Prism.load(source, buffer.read, options.fetch(:freeze, false)) + ensure + LibRubyParser.pm_source_free(pm_source) if pm_source && !pm_source.null? + end end end # Mirror the Prism.parse_comments API by using the serialization API. def parse_comments(code, **options) - LibRubyParser::PrismString.with_string(code) { |string| parse_comments_common(string, code, options) } + LibRubyParser::PrismSource.with_string(code) { |string| parse_comments_common(string, code, options) } end # Mirror the Prism.parse_file_comments API by using the serialization @@ -316,23 +322,23 @@ def parse_comments(code, **options) # to use mmap when it is available. def parse_file_comments(filepath, **options) options[:filepath] = filepath - LibRubyParser::PrismString.with_file(filepath) { |string| parse_comments_common(string, string.read, options) } + LibRubyParser::PrismSource.with_file(filepath) { |string| parse_comments_common(string, string.read, options) } end # Mirror the Prism.parse_lex API by using the serialization API. def parse_lex(code, **options) - LibRubyParser::PrismString.with_string(code) { |string| parse_lex_common(string, code, options) } + LibRubyParser::PrismSource.with_string(code) { |string| parse_lex_common(string, code, options) } end # Mirror the Prism.parse_lex_file API by using the serialization API. def parse_lex_file(filepath, **options) options[:filepath] = filepath - LibRubyParser::PrismString.with_file(filepath) { |string| parse_lex_common(string, string.read, options) } + LibRubyParser::PrismSource.with_file(filepath) { |string| parse_lex_common(string, string.read, options) } end # Mirror the Prism.parse_success? API by using the serialization API. def parse_success?(code, **options) - LibRubyParser::PrismString.with_string(code) { |string| parse_file_success_common(string, options) } + LibRubyParser::PrismSource.with_string(code) { |string| parse_file_success_common(string, options) } end # Mirror the Prism.parse_failure? API by using the serialization API. @@ -343,7 +349,7 @@ def parse_failure?(code, **options) # Mirror the Prism.parse_file_success? API by using the serialization API. def parse_file_success?(filepath, **options) options[:filepath] = filepath - LibRubyParser::PrismString.with_file(filepath) { |string| parse_file_success_common(string, options) } + LibRubyParser::PrismSource.with_file(filepath) { |string| parse_file_success_common(string, options) } end # Mirror the Prism.parse_file_failure? API by using the serialization API. @@ -353,7 +359,7 @@ def parse_file_failure?(filepath, **options) # Mirror the Prism.profile API by using the serialization API. def profile(source, **options) - LibRubyParser::PrismString.with_string(source) do |string| + LibRubyParser::PrismSource.with_string(source) do |string| LibRubyParser::PrismBuffer.with do |buffer| LibRubyParser.pm_serialize_parse(buffer.pointer, string.pointer, string.length, dump_options(options)) nil @@ -363,7 +369,7 @@ def profile(source, **options) # Mirror the Prism.profile_file API by using the serialization API. def profile_file(filepath, **options) - LibRubyParser::PrismString.with_file(filepath) do |string| + LibRubyParser::PrismSource.with_file(filepath) do |string| LibRubyParser::PrismBuffer.with do |buffer| options[:filepath] = filepath LibRubyParser.pm_serialize_parse(buffer.pointer, string.pointer, string.length, dump_options(options)) diff --git a/rust/ruby-prism-sys/build/main.rs b/rust/ruby-prism-sys/build/main.rs index 722da06970..594c01ad9c 100644 --- a/rust/ruby-prism-sys/build/main.rs +++ b/rust/ruby-prism-sys/build/main.rs @@ -200,7 +200,6 @@ fn generate_bindings(ruby_include_path: &Path) -> bindgen::Bindings { .allowlist_function("pm_parser_warnings_each") .allowlist_function("pm_parser_warnings_size") .allowlist_function("pm_size_to_native") - .allowlist_function("pm_string_cleanup") .allowlist_function("pm_string_constant_init") .allowlist_function("pm_string_length") .allowlist_function("pm_string_source") diff --git a/rust/ruby-prism-sys/tests/utils_tests.rs b/rust/ruby-prism-sys/tests/utils_tests.rs index 9b9db446c2..c619765c63 100644 --- a/rust/ruby-prism-sys/tests/utils_tests.rs +++ b/rust/ruby-prism-sys/tests/utils_tests.rs @@ -1,4 +1,4 @@ -use std::ffi::{CStr, CString}; +use std::ffi::CStr; #[test] fn version_test() { @@ -11,93 +11,3 @@ fn version_test() { assert_eq!(&cstring.to_string_lossy(), "1.9.0"); } - -mod string { - use ruby_prism_sys::{ - pm_string_cleanup, pm_string_length, pm_string_source, pm_string_t, pm_string_t__bindgen_ty_1, - PM_STRING_CONSTANT, PM_STRING_MAPPED, PM_STRING_OWNED, PM_STRING_SHARED, - }; - - use super::*; - - struct S { - c_string: CString, - pm_string: pm_string_t, - } - - impl S { - fn start_ptr(&self) -> *const u8 { - self.c_string.as_ptr().cast::() - } - } - - fn make_string(string_type: pm_string_t__bindgen_ty_1) -> S { - let c_string = CString::new("0123456789012345").unwrap(); - - let pm_string = pm_string_t { - type_: string_type, - source: c_string.as_ptr().cast::(), - length: c_string.as_bytes().len(), - }; - - S { c_string, pm_string } - } - - #[test] - fn shared_string_test() { - let mut s = make_string(PM_STRING_SHARED); - - unsafe { - let len = pm_string_length(&raw const s.pm_string); - assert_eq!(len, 16); - - let result_start = pm_string_source(&raw const s.pm_string); - assert_eq!(s.start_ptr(), result_start); - - pm_string_cleanup(&raw mut s.pm_string); - } - } - - #[test] - fn owned_string_test() { - let s = make_string(PM_STRING_OWNED); - - unsafe { - let result_len = pm_string_length(&raw const s.pm_string); - assert_eq!(result_len, 16); - - let result_start = pm_string_source(&raw const s.pm_string); - assert_eq!(s.pm_string.source, result_start); - - // Don't drop the pm_string--we don't own it anymore! - } - } - - #[test] - fn constant_string_test() { - let mut s = make_string(PM_STRING_CONSTANT); - - unsafe { - let result_len = pm_string_length(&raw const s.pm_string); - assert_eq!(result_len, 16); - - let result_start = pm_string_source(&raw const s.pm_string); - assert_eq!(s.pm_string.source, result_start); - - pm_string_cleanup(&raw mut s.pm_string); - } - } - - #[test] - fn mapped_string_test() { - let s = make_string(PM_STRING_MAPPED); - - unsafe { - let result_len = pm_string_length(&raw const s.pm_string); - assert_eq!(result_len, 16); - - let result_start = pm_string_source(&raw const s.pm_string); - assert_eq!(s.pm_string.source, result_start); - } - } -} diff --git a/src/options.c b/src/options.c index 59f1dd4f17..314335be97 100644 --- a/src/options.c +++ b/src/options.c @@ -4,6 +4,7 @@ #include "prism/internal/allocator.h" #include "prism/internal/char.h" +#include "prism/internal/strings.h" #include #include diff --git a/src/prism.c b/src/prism.c index 39602fbfc2..514891a8b6 100644 --- a/src/prism.c +++ b/src/prism.c @@ -22,6 +22,7 @@ #include "prism/internal/parser.h" #include "prism/internal/regexp.h" #include "prism/internal/serialize.h" +#include "prism/internal/source.h" #include "prism/internal/static_literals.h" #include "prism/internal/strings.h" #include "prism/internal/strncasecmp.h" @@ -22699,60 +22700,6 @@ pm_parse(pm_parser_t *parser) { return node; } -/** - * Read into the stream until the gets callback returns false. If the last read - * line from the stream matches an __END__ marker, then halt and return false, - * otherwise return true. - */ -static bool -pm_parse_stream_read(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof) { -#define LINE_SIZE 4096 - char line[LINE_SIZE]; - - while (memset(line, '\n', LINE_SIZE), stream_fgets(line, LINE_SIZE, stream) != NULL) { - size_t length = LINE_SIZE; - while (length > 0 && line[length - 1] == '\n') length--; - - if (length == LINE_SIZE) { - // If we read a line that is the maximum size and it doesn't end - // with a newline, then we'll just append it to the buffer and - // continue reading. - length--; - pm_buffer_append_string(buffer, line, length); - continue; - } - - // Append the line to the buffer. - length--; - pm_buffer_append_string(buffer, line, length); - - // Check if the line matches the __END__ marker. If it does, then stop - // reading and return false. In most circumstances, this means we should - // stop reading from the stream so that the DATA constant can pick it - // up. - switch (length) { - case 7: - if (strncmp(line, "__END__", 7) == 0) return false; - break; - case 8: - if (strncmp(line, "__END__\n", 8) == 0) return false; - break; - case 9: - if (strncmp(line, "__END__\r\n", 9) == 0) return false; - break; - } - - // All data should be read via gets. If the string returned by gets - // _doesn't_ end with a newline, then we assume we hit EOF condition. - if (stream_feof(stream)) { - break; - } - } - - return true; -#undef LINE_SIZE -} - /** * Parse a stream of Ruby source and return the tree. * @@ -22760,19 +22707,19 @@ pm_parse_stream_read(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t * can stream stdin in to Ruby so we need to support a streaming API. */ pm_node_t * -pm_parse_stream(pm_parser_t **parser, pm_arena_t *arena, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const pm_options_t *options) { - bool eof = pm_parse_stream_read(buffer, stream, stream_fgets, stream_feof); +pm_parse_stream(pm_parser_t **parser, pm_arena_t *arena, pm_source_t *source, const pm_options_t *options) { + bool eof = pm_source_stream_read(source); - pm_parser_t *tmp = pm_parser_new(arena, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options); + pm_parser_t *tmp = pm_parser_new(arena, pm_source_source(source), pm_source_length(source), options); pm_node_t *node = pm_parse(tmp); while (!eof && tmp->error_list.size > 0) { - eof = pm_parse_stream_read(buffer, stream, stream_fgets, stream_feof); + eof = pm_source_stream_read(source); pm_parser_free(tmp); pm_arena_cleanup(arena); - tmp = pm_parser_new(arena, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options); + tmp = pm_parser_new(arena, pm_source_source(source), pm_source_length(source), options); node = pm_parse(tmp); } @@ -22838,19 +22785,17 @@ pm_serialize_parse(pm_buffer_t *buffer, const uint8_t *source, size_t size, cons * given stream into to the given buffer. */ void -pm_serialize_parse_stream(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const char *data) { +pm_serialize_parse_stream(pm_buffer_t *buffer, pm_source_t *source, const char *data) { pm_arena_t arena = { 0 }; pm_parser_t *parser; pm_options_t options = { 0 }; pm_options_read(&options, data); - pm_buffer_t *parser_buffer = pm_buffer_new(); - pm_node_t *node = pm_parse_stream(&parser, &arena, parser_buffer, stream, stream_fgets, stream_feof, &options); + pm_node_t *node = pm_parse_stream(&parser, &arena, source, &options); pm_serialize_header(buffer); pm_serialize_content(parser, node, buffer); pm_buffer_append_byte(buffer, '\0'); - pm_buffer_free(parser_buffer); pm_parser_free(parser); pm_arena_cleanup(&arena); pm_options_cleanup(&options); diff --git a/src/source.c b/src/source.c new file mode 100644 index 0000000000..42f24ae82f --- /dev/null +++ b/src/source.c @@ -0,0 +1,483 @@ +#include "prism/internal/source.h" + +#include "prism/internal/allocator.h" +#include "prism/internal/buffer.h" + +#include +#include + +/* The following headers are necessary to read files using demand paging. */ +#ifdef _WIN32 +#include +#elif defined(_POSIX_MAPPED_FILES) +#include +#include +#include +#elif defined(PRISM_HAS_FILESYSTEM) +#include +#include +#endif + +static const uint8_t empty_source[] = ""; + +/** + * Allocate and initialize a pm_source_t with the given fields. + */ +static pm_source_t * +pm_source_alloc(const uint8_t *source, size_t length, pm_source_type_t type) { + pm_source_t *result = xmalloc(sizeof(pm_source_t)); + if (result == NULL) abort(); + + *result = (struct pm_source_t) { + .source = source, + .length = length, + .type = type + }; + + return result; +} + +/** + * Create a new source that wraps existing constant memory. + */ +pm_source_t * +pm_source_constant_new(const uint8_t *data, size_t length) { + return pm_source_alloc(data, length, PM_SOURCE_CONSTANT); +} + +/** + * Create a new source that wraps existing shared memory. + */ +pm_source_t * +pm_source_shared_new(const uint8_t *data, size_t length) { + return pm_source_alloc(data, length, PM_SOURCE_SHARED); +} + +#ifdef _WIN32 +/** + * Represents a file handle on Windows, where the path will need to be freed + * when the file is closed. + */ +typedef struct { + /** The path to the file, which will become allocated memory. */ + WCHAR *path; + + /** The size of the allocated path in bytes. */ + size_t path_size; + + /** The handle to the file, which will start as uninitialized memory. */ + HANDLE file; +} pm_source_file_handle_t; + +/** + * Open the file indicated by the filepath parameter for reading on Windows. + */ +static pm_source_init_result_t +pm_source_file_handle_open(pm_source_file_handle_t *handle, const char *filepath) { + int length = MultiByteToWideChar(CP_UTF8, 0, filepath, -1, NULL, 0); + if (length == 0) return PM_SOURCE_INIT_ERROR_GENERIC; + + handle->path_size = sizeof(WCHAR) * ((size_t) length); + handle->path = xmalloc(handle->path_size); + if ((handle->path == NULL) || (MultiByteToWideChar(CP_UTF8, 0, filepath, -1, handle->path, length) == 0)) { + xfree_sized(handle->path, handle->path_size); + return PM_SOURCE_INIT_ERROR_GENERIC; + } + + handle->file = CreateFileW(handle->path, GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_READONLY, NULL); + if (handle->file == INVALID_HANDLE_VALUE) { + pm_source_init_result_t result = PM_SOURCE_INIT_ERROR_GENERIC; + + if (GetLastError() == ERROR_ACCESS_DENIED) { + DWORD attributes = GetFileAttributesW(handle->path); + if ((attributes != INVALID_FILE_ATTRIBUTES) && (attributes & FILE_ATTRIBUTE_DIRECTORY)) { + result = PM_SOURCE_INIT_ERROR_DIRECTORY; + } + } + + xfree_sized(handle->path, handle->path_size); + return result; + } + + return PM_SOURCE_INIT_SUCCESS; +} + +/** + * Close the file handle and free the path. + */ +static void +pm_source_file_handle_close(pm_source_file_handle_t *handle) { + xfree_sized(handle->path, handle->path_size); + CloseHandle(handle->file); +} +#endif + +/** + * Create a new source by memory-mapping a file. + */ +pm_source_t * +pm_source_mapped_new(const char *filepath, int open_flags, pm_source_init_result_t *result) { +#ifdef _WIN32 + (void) open_flags; + + /* Open the file for reading. */ + pm_source_file_handle_t handle; + *result = pm_source_file_handle_open(&handle, filepath); + if (*result != PM_SOURCE_INIT_SUCCESS) return NULL; + + /* Get the file size. */ + DWORD file_size = GetFileSize(handle.file, NULL); + if (file_size == INVALID_FILE_SIZE) { + pm_source_file_handle_close(&handle); + *result = PM_SOURCE_INIT_ERROR_GENERIC; + return NULL; + } + + /* If the file is empty, then return a constant source. */ + if (file_size == 0) { + pm_source_file_handle_close(&handle); + *result = PM_SOURCE_INIT_SUCCESS; + return pm_source_alloc(empty_source, 0, PM_SOURCE_CONSTANT); + } + + /* Create a mapping of the file. */ + HANDLE mapping = CreateFileMapping(handle.file, NULL, PAGE_READONLY, 0, 0, NULL); + if (mapping == NULL) { + pm_source_file_handle_close(&handle); + *result = PM_SOURCE_INIT_ERROR_GENERIC; + return NULL; + } + + /* Map the file into memory. */ + uint8_t *source = (uint8_t *) MapViewOfFile(mapping, FILE_MAP_READ, 0, 0, 0); + CloseHandle(mapping); + pm_source_file_handle_close(&handle); + + if (source == NULL) { + *result = PM_SOURCE_INIT_ERROR_GENERIC; + return NULL; + } + + *result = PM_SOURCE_INIT_SUCCESS; + return pm_source_alloc(source, (size_t) file_size, PM_SOURCE_MAPPED); +#elif defined(_POSIX_MAPPED_FILES) + /* Open the file for reading. */ + int fd = open(filepath, O_RDONLY | open_flags); + if (fd == -1) { + *result = PM_SOURCE_INIT_ERROR_GENERIC; + return NULL; + } + + /* Stat the file to get the file size. */ + struct stat sb; + if (fstat(fd, &sb) == -1) { + close(fd); + *result = PM_SOURCE_INIT_ERROR_GENERIC; + return NULL; + } + + /* Ensure it is a file and not a directory. */ + if (S_ISDIR(sb.st_mode)) { + close(fd); + *result = PM_SOURCE_INIT_ERROR_DIRECTORY; + return NULL; + } + + /* + * For non-regular files (pipes, character devices), return a specific + * error so the caller can handle reading through their own I/O layer. + */ + if (!S_ISREG(sb.st_mode)) { + close(fd); + *result = PM_SOURCE_INIT_ERROR_NON_REGULAR; + return NULL; + } + + /* mmap the file descriptor to virtually get the contents. */ + size_t size = (size_t) sb.st_size; + + if (size == 0) { + close(fd); + *result = PM_SOURCE_INIT_SUCCESS; + return pm_source_alloc(empty_source, 0, PM_SOURCE_CONSTANT); + } + + uint8_t *source = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0); + if (source == MAP_FAILED) { + close(fd); + *result = PM_SOURCE_INIT_ERROR_GENERIC; + return NULL; + } + + close(fd); + *result = PM_SOURCE_INIT_SUCCESS; + return pm_source_alloc(source, size, PM_SOURCE_MAPPED); +#else + (void) open_flags; + return pm_source_file_new(filepath, result); +#endif +} + +/** + * Create a new source by reading a file into a heap-allocated buffer. + */ +pm_source_t * +pm_source_file_new(const char *filepath, pm_source_init_result_t *result) { +#ifdef _WIN32 + /* Open the file for reading. */ + pm_source_file_handle_t handle; + *result = pm_source_file_handle_open(&handle, filepath); + if (*result != PM_SOURCE_INIT_SUCCESS) return NULL; + + /* Get the file size. */ + const DWORD file_size = GetFileSize(handle.file, NULL); + if (file_size == INVALID_FILE_SIZE) { + pm_source_file_handle_close(&handle); + *result = PM_SOURCE_INIT_ERROR_GENERIC; + return NULL; + } + + /* If the file is empty, return a constant source. */ + if (file_size == 0) { + pm_source_file_handle_close(&handle); + *result = PM_SOURCE_INIT_SUCCESS; + return pm_source_alloc(empty_source, 0, PM_SOURCE_CONSTANT); + } + + /* Create a buffer to read the file into. */ + uint8_t *source = xmalloc(file_size); + if (source == NULL) { + pm_source_file_handle_close(&handle); + *result = PM_SOURCE_INIT_ERROR_GENERIC; + return NULL; + } + + /* Read the contents of the file. */ + DWORD bytes_read; + if (!ReadFile(handle.file, source, file_size, &bytes_read, NULL)) { + xfree_sized(source, file_size); + pm_source_file_handle_close(&handle); + *result = PM_SOURCE_INIT_ERROR_GENERIC; + return NULL; + } + + /* Check the number of bytes read. */ + if (bytes_read != file_size) { + xfree_sized(source, file_size); + pm_source_file_handle_close(&handle); + *result = PM_SOURCE_INIT_ERROR_GENERIC; + return NULL; + } + + pm_source_file_handle_close(&handle); + *result = PM_SOURCE_INIT_SUCCESS; + return pm_source_alloc(source, (size_t) file_size, PM_SOURCE_OWNED); +#elif defined(PRISM_HAS_FILESYSTEM) + /* Open the file for reading. */ + int fd = open(filepath, O_RDONLY); + if (fd == -1) { + *result = PM_SOURCE_INIT_ERROR_GENERIC; + return NULL; + } + + /* Stat the file to get the file size. */ + struct stat sb; + if (fstat(fd, &sb) == -1) { + close(fd); + *result = PM_SOURCE_INIT_ERROR_GENERIC; + return NULL; + } + + /* Ensure it is a file and not a directory. */ + if (S_ISDIR(sb.st_mode)) { + close(fd); + *result = PM_SOURCE_INIT_ERROR_DIRECTORY; + return NULL; + } + + /* Check the size to see if it's empty. */ + size_t size = (size_t) sb.st_size; + if (size == 0) { + close(fd); + *result = PM_SOURCE_INIT_SUCCESS; + return pm_source_alloc(empty_source, 0, PM_SOURCE_CONSTANT); + } + + const size_t length = (size_t) size; + uint8_t *source = xmalloc(length); + if (source == NULL) { + close(fd); + *result = PM_SOURCE_INIT_ERROR_GENERIC; + return NULL; + } + + ssize_t bytes_read = read(fd, source, length); + close(fd); + + if (bytes_read == -1 || (size_t) bytes_read != length) { + xfree_sized(source, length); + *result = PM_SOURCE_INIT_ERROR_GENERIC; + return NULL; + } + + *result = PM_SOURCE_INIT_SUCCESS; + return pm_source_alloc(source, length, PM_SOURCE_OWNED); +#else + (void) filepath; + *result = PM_SOURCE_INIT_ERROR_GENERIC; + perror("pm_source_file_new is not implemented for this platform"); + return NULL; +#endif +} + +/** + * Create a new source by reading from a stream. This allocates the source + * but does not read from the stream yet. Use pm_source_stream_read to read + * data. + */ +pm_source_t * +pm_source_stream_new(void *stream, pm_source_stream_fgets_t *fgets, pm_source_stream_feof_t *feof) { + pm_source_t *source = pm_source_alloc(NULL, 0, PM_SOURCE_STREAM); + source->stream.buffer = pm_buffer_new(); + source->stream.stream = stream; + source->stream.fgets = fgets; + source->stream.feof = feof; + source->stream.eof = false; + + return source; +} + +/** + * Read from the stream into the source's internal buffer until __END__ is + * encountered or EOF is reached. Updates the source pointer and length. + * + * Returns true if EOF was reached, false if __END__ was encountered. + */ +bool +pm_source_stream_read(pm_source_t *source) { + pm_buffer_t *buffer = source->stream.buffer; + +#define LINE_SIZE 4096 + char line[LINE_SIZE]; + + while (memset(line, '\n', LINE_SIZE), source->stream.fgets(line, LINE_SIZE, source->stream.stream) != NULL) { + size_t length = LINE_SIZE; + while (length > 0 && line[length - 1] == '\n') length--; + + if (length == LINE_SIZE) { + /* + * If we read a line that is the maximum size and it doesn't end + * with a newline, then we'll just append it to the buffer and + * continue reading. + */ + length--; + pm_buffer_append_string(buffer, line, length); + continue; + } + + /* Append the line to the buffer. */ + length--; + pm_buffer_append_string(buffer, line, length); + + /* + * Check if the line matches the __END__ marker. If it does, then stop + * reading and return false. In most circumstances, this means we should + * stop reading from the stream so that the DATA constant can pick it + * up. + */ + switch (length) { + case 7: + if (strncmp(line, "__END__", 7) == 0) { + source->source = (const uint8_t *) pm_buffer_value(buffer); + source->length = pm_buffer_length(buffer); + return false; + } + break; + case 8: + if (strncmp(line, "__END__\n", 8) == 0) { + source->source = (const uint8_t *) pm_buffer_value(buffer); + source->length = pm_buffer_length(buffer); + return false; + } + break; + case 9: + if (strncmp(line, "__END__\r\n", 9) == 0) { + source->source = (const uint8_t *) pm_buffer_value(buffer); + source->length = pm_buffer_length(buffer); + return false; + } + break; + } + + /* + * All data should be read via gets. If the string returned by gets + * _doesn't_ end with a newline, then we assume we hit EOF condition. + */ + if (source->stream.feof(source->stream.stream)) { + break; + } + } + +#undef LINE_SIZE + + source->stream.eof = true; + source->source = (const uint8_t *) pm_buffer_value(buffer); + source->length = pm_buffer_length(buffer); + return true; +} + +/** + * Returns whether the stream source has reached EOF. + */ +bool +pm_source_stream_eof(const pm_source_t *source) { + return source->stream.eof; +} + +/** + * Free the given source and any memory it owns. + */ +void +pm_source_free(pm_source_t *source) { + switch (source->type) { + case PM_SOURCE_CONSTANT: + case PM_SOURCE_SHARED: + /* No cleanup needed for the data. */ + break; + case PM_SOURCE_OWNED: + xfree_sized((void *) source->source, source->length); + break; + case PM_SOURCE_MAPPED: +#if defined(_WIN32) + if (source->length > 0) { + UnmapViewOfFile((void *) source->source); + } +#elif defined(_POSIX_MAPPED_FILES) + if (source->length > 0) { + munmap((void *) source->source, source->length); + } +#endif + break; + case PM_SOURCE_STREAM: + pm_buffer_free(source->stream.buffer); + break; + } + + xfree_sized(source, sizeof(pm_source_t)); +} + +/** + * Returns the length of the source data in bytes. + */ +size_t +pm_source_length(const pm_source_t *source) { + return source->length; +} + +/** + * Returns a pointer to the source data. + */ +const uint8_t * +pm_source_source(const pm_source_t *source) { + return source->source; +} diff --git a/src/strings.c b/src/strings.c index 88bb5ad24f..1a37a9d80e 100644 --- a/src/strings.c +++ b/src/strings.c @@ -6,29 +6,6 @@ #include #include -/* The following headers are necessary to read files using demand paging. */ -#ifdef _WIN32 -#include -#elif defined(_POSIX_MAPPED_FILES) -#include -#include -#include -#elif defined(PRISM_HAS_FILESYSTEM) -#include -#include -#endif - -static const uint8_t empty_source[] = ""; - -/** - * Returns the size of the pm_string_t struct. This is necessary to allocate the - * correct amount of memory in the FFI backend. - */ -size_t -pm_string_sizeof(void) { - return sizeof(pm_string_t); -} - /** * Initialize a shared string that is based on initial input. */ @@ -67,282 +44,6 @@ pm_string_constant_init(pm_string_t *string, const char *source, size_t length) }; } -#ifdef _WIN32 -/** - * Represents a file handle on Windows, where the path will need to be freed - * when the file is closed. - */ -typedef struct { - /** The path to the file, which will become allocated memory. */ - WCHAR *path; - - /** The handle to the file, which will start as uninitialized memory. */ - HANDLE file; -} pm_string_file_handle_t; - -/** - * Open the file indicated by the filepath parameter for reading on Windows. - * Perform any kind of normalization that needs to happen on the filepath. - */ -static pm_string_init_result_t -pm_string_file_handle_open(pm_string_file_handle_t *handle, const char *filepath) { - int length = MultiByteToWideChar(CP_UTF8, 0, filepath, -1, NULL, 0); - if (length == 0) return PM_STRING_INIT_ERROR_GENERIC; - - const size_t path_size = sizeof(WCHAR) * ((size_t) length); - handle->path = xmalloc(path_size); - if ((handle->path == NULL) || (MultiByteToWideChar(CP_UTF8, 0, filepath, -1, handle->path, length) == 0)) { - xfree_sized(handle->path, path_size); - return PM_STRING_INIT_ERROR_GENERIC; - } - - handle->file = CreateFileW(handle->path, GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_READONLY, NULL); - if (handle->file == INVALID_HANDLE_VALUE) { - pm_string_init_result_t result = PM_STRING_INIT_ERROR_GENERIC; - - if (GetLastError() == ERROR_ACCESS_DENIED) { - DWORD attributes = GetFileAttributesW(handle->path); - if ((attributes != INVALID_FILE_ATTRIBUTES) && (attributes & FILE_ATTRIBUTE_DIRECTORY)) { - result = PM_STRING_INIT_ERROR_DIRECTORY; - } - } - - xfree_sized(handle->path, path_size); - return result; - } - - return PM_STRING_INIT_SUCCESS; -} - -/** - * Close the file handle and free the path. - */ -static void -pm_string_file_handle_close(pm_string_file_handle_t *handle) { - xfree(handle->path); - CloseHandle(handle->file); -} -#endif - -/** - * Read the file indicated by the filepath parameter into source and load its - * contents and size into the given `pm_string_t`. The given `pm_string_t` - * should be freed using `pm_string_cleanup` when it is no longer used. - * - * We want to use demand paging as much as possible in order to avoid having to - * read the entire file into memory (which could be detrimental to performance - * for large files). This means that if we're on windows we'll use - * `MapViewOfFile`, on POSIX systems that have access to `mmap` we'll use - * `mmap`, and on other POSIX systems we'll use `read`. - */ -pm_string_init_result_t -pm_string_mapped_init(pm_string_t *string, const char *filepath) { -#ifdef _WIN32 - // Open the file for reading. - pm_string_file_handle_t handle; - pm_string_init_result_t result = pm_string_file_handle_open(&handle, filepath); - if (result != PM_STRING_INIT_SUCCESS) return result; - - // Get the file size. - DWORD file_size = GetFileSize(handle.file, NULL); - if (file_size == INVALID_FILE_SIZE) { - pm_string_file_handle_close(&handle); - return PM_STRING_INIT_ERROR_GENERIC; - } - - // If the file is empty, then we don't need to do anything else, we'll set - // the source to a constant empty string and return. - if (file_size == 0) { - pm_string_file_handle_close(&handle); - *string = (pm_string_t) { .type = PM_STRING_CONSTANT, .source = empty_source, .length = 0 }; - return PM_STRING_INIT_SUCCESS; - } - - // Create a mapping of the file. - HANDLE mapping = CreateFileMapping(handle.file, NULL, PAGE_READONLY, 0, 0, NULL); - if (mapping == NULL) { - pm_string_file_handle_close(&handle); - return PM_STRING_INIT_ERROR_GENERIC; - } - - // Map the file into memory. - uint8_t *source = (uint8_t *) MapViewOfFile(mapping, FILE_MAP_READ, 0, 0, 0); - CloseHandle(mapping); - pm_string_file_handle_close(&handle); - - if (source == NULL) { - return PM_STRING_INIT_ERROR_GENERIC; - } - - *string = (pm_string_t) { .type = PM_STRING_MAPPED, .source = source, .length = (size_t) file_size }; - return PM_STRING_INIT_SUCCESS; -#elif defined(_POSIX_MAPPED_FILES) - // Open the file for reading - int fd = open(filepath, O_RDONLY); - if (fd == -1) { - return PM_STRING_INIT_ERROR_GENERIC; - } - - // Stat the file to get the file size - struct stat sb; - if (fstat(fd, &sb) == -1) { - close(fd); - return PM_STRING_INIT_ERROR_GENERIC; - } - - // Ensure it is a file and not a directory - if (S_ISDIR(sb.st_mode)) { - close(fd); - return PM_STRING_INIT_ERROR_DIRECTORY; - } - - // mmap the file descriptor to virtually get the contents - size_t size = (size_t) sb.st_size; - uint8_t *source = NULL; - - if (size == 0) { - close(fd); - *string = (pm_string_t) { .type = PM_STRING_CONSTANT, .source = empty_source, .length = 0 }; - return PM_STRING_INIT_SUCCESS; - } - - source = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0); - if (source == MAP_FAILED) { - close(fd); - return PM_STRING_INIT_ERROR_GENERIC; - } - - close(fd); - *string = (pm_string_t) { .type = PM_STRING_MAPPED, .source = source, .length = size }; - return PM_STRING_INIT_SUCCESS; -#else - return pm_string_file_init(string, filepath); -#endif -} - -/** - * Read the file indicated by the filepath parameter into source and load its - * contents and size into the given `pm_string_t`. The given `pm_string_t` - * should be freed using `pm_string_cleanup` when it is no longer used. - */ -pm_string_init_result_t -pm_string_file_init(pm_string_t *string, const char *filepath) { -#ifdef _WIN32 - // Open the file for reading. - pm_string_file_handle_t handle; - pm_string_init_result_t result = pm_string_file_handle_open(&handle, filepath); - if (result != PM_STRING_INIT_SUCCESS) return result; - - // Get the file size. - const DWORD file_size = GetFileSize(handle.file, NULL); - if (file_size == INVALID_FILE_SIZE) { - pm_string_file_handle_close(&handle); - return PM_STRING_INIT_ERROR_GENERIC; - } - - // If the file is empty, then we don't need to do anything else, we'll set - // the source to a constant empty string and return. - if (file_size == 0) { - pm_string_file_handle_close(&handle); - *string = (pm_string_t) { .type = PM_STRING_CONSTANT, .source = empty_source, .length = 0 }; - return PM_STRING_INIT_SUCCESS; - } - - // Create a buffer to read the file into. - uint8_t *source = xmalloc(file_size); - if (source == NULL) { - pm_string_file_handle_close(&handle); - return PM_STRING_INIT_ERROR_GENERIC; - } - - // Read the contents of the file - DWORD bytes_read; - if (!ReadFile(handle.file, source, file_size, &bytes_read, NULL)) { - pm_string_file_handle_close(&handle); - return PM_STRING_INIT_ERROR_GENERIC; - } - - // Check the number of bytes read - if (bytes_read != file_size) { - xfree_sized(source, file_size); - pm_string_file_handle_close(&handle); - return PM_STRING_INIT_ERROR_GENERIC; - } - - pm_string_file_handle_close(&handle); - *string = (pm_string_t) { .type = PM_STRING_OWNED, .source = source, .length = (size_t) file_size }; - return PM_STRING_INIT_SUCCESS; -#elif defined(PRISM_HAS_FILESYSTEM) - // Open the file for reading - int fd = open(filepath, O_RDONLY); - if (fd == -1) { - return PM_STRING_INIT_ERROR_GENERIC; - } - - // Stat the file to get the file size - struct stat sb; - if (fstat(fd, &sb) == -1) { - close(fd); - return PM_STRING_INIT_ERROR_GENERIC; - } - - // Ensure it is a file and not a directory - if (S_ISDIR(sb.st_mode)) { - close(fd); - return PM_STRING_INIT_ERROR_DIRECTORY; - } - - // Check the size to see if it's empty - size_t size = (size_t) sb.st_size; - if (size == 0) { - close(fd); - *string = (pm_string_t) { .type = PM_STRING_CONSTANT, .source = empty_source, .length = 0 }; - return PM_STRING_INIT_SUCCESS; - } - - const size_t length = (size_t) size; - uint8_t *source = xmalloc(length); - if (source == NULL) { - close(fd); - return PM_STRING_INIT_ERROR_GENERIC; - } - - long bytes_read = (long) read(fd, source, length); - close(fd); - - if (bytes_read == -1) { - xfree_sized(source, length); - return PM_STRING_INIT_ERROR_GENERIC; - } - - *string = (pm_string_t) { .type = PM_STRING_OWNED, .source = source, .length = length }; - return PM_STRING_INIT_SUCCESS; -#else - (void) string; - (void) filepath; - perror("pm_string_file_init is not implemented for this platform"); - return PM_STRING_INIT_ERROR_GENERIC; -#endif -} - -/** - * Ensure the string is owned. If it is not, then reinitialize it as owned and - * copy over the previous source. - */ -void -pm_string_ensure_owned(pm_string_t *string) { - if (string->type == PM_STRING_OWNED) return; - - size_t length = pm_string_length(string); - const uint8_t *source = pm_string_source(string); - - uint8_t *memory = xmalloc(length); - if (!memory) return; - - pm_string_owned_init(string, memory, length); - memcpy((void *) string->source, source, length); -} - /** * Compare the underlying lengths and bytes of two strings. Returns 0 if the * strings are equal, a negative number if the left string is less than the @@ -384,17 +85,7 @@ pm_string_source(const pm_string_t *string) { */ void pm_string_cleanup(pm_string_t *string) { - void *memory = (void *) string->source; - if (string->type == PM_STRING_OWNED) { - xfree(memory); -#ifdef PRISM_HAS_MMAP - } else if (string->type == PM_STRING_MAPPED && string->length) { -#if defined(_WIN32) - UnmapViewOfFile(memory); -#elif defined(_POSIX_MAPPED_FILES) - munmap(memory, string->length); -#endif -#endif /* PRISM_HAS_MMAP */ + xfree((void *) string->source); } } From 603e482c105368f949dd27fed104f1e97257a0e8 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Thu, 19 Mar 2026 13:23:03 -0400 Subject: [PATCH 094/100] Use xfree_sized everywhere possible --- src/arena.c | 2 +- src/integer.c | 2 +- src/options.c | 2 +- src/prism.c | 4 ++-- src/strings.c | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/arena.c b/src/arena.c index 7ae02715df..64a731649d 100644 --- a/src/arena.c +++ b/src/arena.c @@ -113,5 +113,5 @@ pm_arena_cleanup(pm_arena_t *arena) { void pm_arena_free(pm_arena_t *arena) { pm_arena_cleanup(arena); - xfree(arena); + xfree_sized(arena, sizeof(pm_arena_t)); } diff --git a/src/integer.c b/src/integer.c index 1b69dbdceb..2190ae6e26 100644 --- a/src/integer.c +++ b/src/integer.c @@ -17,7 +17,7 @@ static void pm_integer_free(pm_integer_t *integer) { if (integer->values) { - xfree(integer->values); + xfree_sized(integer->values, integer->length * sizeof(uint32_t)); } } diff --git a/src/options.c b/src/options.c index 314335be97..8973c933f8 100644 --- a/src/options.c +++ b/src/options.c @@ -49,7 +49,7 @@ pm_options_cleanup(pm_options_t *options) { void pm_options_free(pm_options_t *options) { pm_options_cleanup(options); - xfree(options); + xfree_sized(options, sizeof(pm_options_t)); } /** diff --git a/src/prism.c b/src/prism.c index 514891a8b6..b43b8ddc1d 100644 --- a/src/prism.c +++ b/src/prism.c @@ -2285,7 +2285,7 @@ pm_integer_arena_move(pm_arena_t *arena, pm_integer_t *integer) { size_t byte_size = integer->length * sizeof(uint32_t); uint32_t *old_values = integer->values; integer->values = (uint32_t *) pm_arena_memdup(arena, old_values, byte_size, PRISM_ALIGNOF(uint32_t)); - xfree(old_values); + xfree_sized(old_values, byte_size); } } @@ -22525,7 +22525,7 @@ pm_parser_cleanup(pm_parser_t *parser) { void pm_parser_free(pm_parser_t *parser) { pm_parser_cleanup(parser); - xfree(parser); + xfree_sized(parser, sizeof(pm_parser_t)); } /** diff --git a/src/strings.c b/src/strings.c index 1a37a9d80e..f140b82ad9 100644 --- a/src/strings.c +++ b/src/strings.c @@ -86,6 +86,6 @@ pm_string_source(const pm_string_t *string) { void pm_string_cleanup(pm_string_t *string) { if (string->type == PM_STRING_OWNED) { - xfree((void *) string->source); + xfree_sized((void *) string->source, string->length); } } From 0c6494a3e1cfd00fde7b7d6bb70467b106980e98 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Thu, 19 Mar 2026 14:24:47 -0400 Subject: [PATCH 095/100] pm_source_owned_new --- include/prism/source.h | 16 +++++++++++++--- src/source.c | 8 ++++++++ 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/include/prism/source.h b/include/prism/source.h index 897ce4b880..2f29845578 100644 --- a/include/prism/source.h +++ b/include/prism/source.h @@ -64,7 +64,7 @@ typedef enum { * * @param data The pointer to the source data. * @param length The length of the source data in bytes. - * @returns A new source, or NULL on allocation failure. + * @returns A new source. Aborts on allocation failure. */ PRISM_EXPORTED_FUNCTION pm_source_t * pm_source_constant_new(const uint8_t *data, size_t length) PRISM_NODISCARD; @@ -74,10 +74,20 @@ PRISM_EXPORTED_FUNCTION pm_source_t * pm_source_constant_new(const uint8_t *data * * @param data The pointer to the source data. * @param length The length of the source data in bytes. - * @returns A new source, or NULL on allocation failure. + * @returns A new source. Aborts on allocation failure. */ PRISM_EXPORTED_FUNCTION pm_source_t * pm_source_shared_new(const uint8_t *data, size_t length) PRISM_NODISCARD; +/** + * Create a new source that owns its memory. The memory will be freed with + * xfree when the source is freed. + * + * @param data The pointer to the heap-allocated source data. + * @param length The length of the source data in bytes. + * @returns A new source. Aborts on allocation failure. + */ +PRISM_EXPORTED_FUNCTION pm_source_t * pm_source_owned_new(uint8_t *data, size_t length) PRISM_NODISCARD; + /** * Create a new source by reading a file into a heap-allocated buffer. * @@ -108,7 +118,7 @@ PRISM_EXPORTED_FUNCTION pm_source_t * pm_source_mapped_new(const char *filepath, * @param stream The stream to read from. * @param fgets The function to use to read from the stream. * @param feof The function to use to check if the stream is at EOF. - * @returns A new source, or NULL on allocation failure. + * @returns A new source. Aborts on allocation failure. */ PRISM_EXPORTED_FUNCTION pm_source_t * pm_source_stream_new(void *stream, pm_source_stream_fgets_t *fgets, pm_source_stream_feof_t *feof) PRISM_NODISCARD; diff --git a/src/source.c b/src/source.c index 42f24ae82f..f61cb19c1b 100644 --- a/src/source.c +++ b/src/source.c @@ -53,6 +53,14 @@ pm_source_shared_new(const uint8_t *data, size_t length) { return pm_source_alloc(data, length, PM_SOURCE_SHARED); } +/** + * Create a new source that owns its memory. + */ +pm_source_t * +pm_source_owned_new(uint8_t *data, size_t length) { + return pm_source_alloc(data, length, PM_SOURCE_OWNED); +} + #ifdef _WIN32 /** * Represents a file handle on Windows, where the path will need to be freed From eb398af793177e78447f142a9b824227aecbde94 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Thu, 19 Mar 2026 14:39:34 -0400 Subject: [PATCH 096/100] Revert xfree_sized for integer --- src/integer.c | 2 +- src/prism.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/integer.c b/src/integer.c index 2190ae6e26..1b69dbdceb 100644 --- a/src/integer.c +++ b/src/integer.c @@ -17,7 +17,7 @@ static void pm_integer_free(pm_integer_t *integer) { if (integer->values) { - xfree_sized(integer->values, integer->length * sizeof(uint32_t)); + xfree(integer->values); } } diff --git a/src/prism.c b/src/prism.c index b43b8ddc1d..db2a331c00 100644 --- a/src/prism.c +++ b/src/prism.c @@ -2285,7 +2285,7 @@ pm_integer_arena_move(pm_arena_t *arena, pm_integer_t *integer) { size_t byte_size = integer->length * sizeof(uint32_t); uint32_t *old_values = integer->values; integer->values = (uint32_t *) pm_arena_memdup(arena, old_values, byte_size, PRISM_ALIGNOF(uint32_t)); - xfree_sized(old_values, byte_size); + xfree(old_values); } } From 75eb63e10261b6f7e0ecf04ecea48331e24f9abd Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Thu, 19 Mar 2026 14:41:16 -0400 Subject: [PATCH 097/100] Fix up gemspec build --- prism.gemspec | 3 +++ 1 file changed, 3 insertions(+) diff --git a/prism.gemspec b/prism.gemspec index 5db6327813..47f96fffc3 100644 --- a/prism.gemspec +++ b/prism.gemspec @@ -79,6 +79,7 @@ Gem::Specification.new do |spec| "include/prism/internal/parser.h", "include/prism/internal/regexp.h", "include/prism/internal/serialize.h", + "include/prism/internal/source.h", "include/prism/internal/static_literals.h", "include/prism/internal/strncasecmp.h", "include/prism/internal/strings.h", @@ -100,6 +101,7 @@ Gem::Specification.new do |spec| "include/prism/parser.h", "include/prism/prettyprint.h", "include/prism/serialize.h", + "include/prism/source.h", "include/prism/stream.h", "include/prism/string_query.h", "include/prism/strings.h", @@ -210,6 +212,7 @@ Gem::Specification.new do |spec| "src/prism.c", "src/regexp.c", "src/serialize.c", + "src/source.c", "src/static_literals.c", "src/string_query.c", "src/strings.c", From b5683c8708feb032728fa90295b3a0986aedba11 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Thu, 19 Mar 2026 15:16:12 -0400 Subject: [PATCH 098/100] Fix up FFI in Ractors reading internal ivar --- lib/prism/ffi.rb | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/lib/prism/ffi.rb b/lib/prism/ffi.rb index 163dafef32..6b9bde51ea 100644 --- a/lib/prism/ffi.rb +++ b/lib/prism/ffi.rb @@ -90,9 +90,14 @@ def self.load_exported_functions_from(header, *functions, callbacks) callback :pm_source_stream_fgets_t, [:pointer, :int, :pointer], :pointer callback :pm_source_stream_feof_t, [:pointer], :int - enum :pm_source_init_result_t, %i[PM_SOURCE_INIT_SUCCESS PM_SOURCE_INIT_ERROR_GENERIC PM_SOURCE_INIT_ERROR_DIRECTORY PM_SOURCE_INIT_ERROR_NON_REGULAR] + pm_source_init_result_values = %i[PM_SOURCE_INIT_SUCCESS PM_SOURCE_INIT_ERROR_GENERIC PM_SOURCE_INIT_ERROR_DIRECTORY PM_SOURCE_INIT_ERROR_NON_REGULAR] + enum :pm_source_init_result_t, pm_source_init_result_values enum :pm_string_query_t, [:PM_STRING_QUERY_ERROR, -1, :PM_STRING_QUERY_FALSE, :PM_STRING_QUERY_TRUE] + # Ractor-safe lookup table for pm_source_init_result_t, since FFI's + # enum_type accesses module instance variables that are not shareable. + SOURCE_INIT_RESULT = pm_source_init_result_values.freeze + load_exported_functions_from( "prism/version.h", "pm_version", @@ -219,9 +224,8 @@ def self.with_file(filepath) FFI::MemoryPointer.new(:int) do |result_ptr| pm_source = LibRubyParser.pm_source_mapped_new(filepath, 0, result_ptr) - result = LibRubyParser.enum_type(:pm_source_init_result_t)[result_ptr.read_int] - case result + case SOURCE_INIT_RESULT[result_ptr.read_int] when :PM_SOURCE_INIT_SUCCESS pointer = LibRubyParser.pm_source_source(pm_source) length = LibRubyParser.pm_source_length(pm_source) @@ -235,7 +239,7 @@ def self.with_file(filepath) # files (pipes, character devices, etc.) return with_string(File.read(filepath)) { |string| yield string } else - raise "Unknown error initializing pm_source_t: #{result.inspect}" + raise "Unknown error initializing pm_source_t: #{result_ptr.read_int}" end ensure LibRubyParser.pm_source_free(pm_source) if pm_source && !pm_source.null? From eb1d518736f5b497a6c10b8fee2a175a1d5c61da Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Thu, 19 Mar 2026 15:58:26 -0400 Subject: [PATCH 099/100] Rename strings to stringy because of linux conflicts --- include/prism/internal/{strings.h => stringy.h} | 6 +++--- include/prism/options.h | 2 +- include/prism/{strings.h => stringy.h} | 6 +++--- prism.gemspec | 6 +++--- src/options.c | 2 +- src/prism.c | 2 +- src/regexp.c | 2 +- src/static_literals.c | 2 +- src/{strings.c => stringy.c} | 2 +- templates/include/prism/ast.h.erb | 2 +- 10 files changed, 16 insertions(+), 16 deletions(-) rename include/prism/internal/{strings.h => stringy.h} (89%) rename include/prism/{strings.h => stringy.h} (96%) rename src/{strings.c => stringy.c} (98%) diff --git a/include/prism/internal/strings.h b/include/prism/internal/stringy.h similarity index 89% rename from include/prism/internal/strings.h rename to include/prism/internal/stringy.h index 71b02020e3..1aaa23ea75 100644 --- a/include/prism/internal/strings.h +++ b/include/prism/internal/stringy.h @@ -1,7 +1,7 @@ -#ifndef PRISM_INTERNAL_STRINGS_H -#define PRISM_INTERNAL_STRINGS_H +#ifndef PRISM_INTERNAL_STRINGY_H +#define PRISM_INTERNAL_STRINGY_H -#include "prism/strings.h" +#include "prism/stringy.h" /* * Defines an empty string. This is useful for initializing a string that will diff --git a/include/prism/options.h b/include/prism/options.h index 37a713095e..2b823aebff 100644 --- a/include/prism/options.h +++ b/include/prism/options.h @@ -10,7 +10,7 @@ #include "prism/compiler/nodiscard.h" #include "prism/compiler/nonnull.h" -#include "prism/strings.h" +#include "prism/stringy.h" #include #include diff --git a/include/prism/strings.h b/include/prism/stringy.h similarity index 96% rename from include/prism/strings.h rename to include/prism/stringy.h index c2f120d07a..0d64387ac3 100644 --- a/include/prism/strings.h +++ b/include/prism/stringy.h @@ -1,10 +1,10 @@ /** - * @file strings.h + * @file stringy.h * * A generic string type that can have various ownership semantics. */ -#ifndef PRISM_STRINGS_H -#define PRISM_STRINGS_H +#ifndef PRISM_STRINGY_H +#define PRISM_STRINGY_H #include "prism/compiler/exported.h" #include "prism/compiler/nonnull.h" diff --git a/prism.gemspec b/prism.gemspec index 47f96fffc3..d489a37af4 100644 --- a/prism.gemspec +++ b/prism.gemspec @@ -82,7 +82,7 @@ Gem::Specification.new do |spec| "include/prism/internal/source.h", "include/prism/internal/static_literals.h", "include/prism/internal/strncasecmp.h", - "include/prism/internal/strings.h", + "include/prism/internal/stringy.h", "include/prism/internal/strpbrk.h", "include/prism/internal/tokens.h", "include/prism/arena.h", @@ -104,7 +104,7 @@ Gem::Specification.new do |spec| "include/prism/source.h", "include/prism/stream.h", "include/prism/string_query.h", - "include/prism/strings.h", + "include/prism/stringy.h", "include/prism/version.h", "lib/prism.rb", "lib/prism/compiler.rb", @@ -215,7 +215,7 @@ Gem::Specification.new do |spec| "src/source.c", "src/static_literals.c", "src/string_query.c", - "src/strings.c", + "src/stringy.c", "src/strncasecmp.c", "src/strpbrk.c", "src/tokens.c" diff --git a/src/options.c b/src/options.c index 8973c933f8..b589865a2a 100644 --- a/src/options.c +++ b/src/options.c @@ -4,7 +4,7 @@ #include "prism/internal/allocator.h" #include "prism/internal/char.h" -#include "prism/internal/strings.h" +#include "prism/internal/stringy.h" #include #include diff --git a/src/prism.c b/src/prism.c index db2a331c00..53c33cee39 100644 --- a/src/prism.c +++ b/src/prism.c @@ -24,7 +24,7 @@ #include "prism/internal/serialize.h" #include "prism/internal/source.h" #include "prism/internal/static_literals.h" -#include "prism/internal/strings.h" +#include "prism/internal/stringy.h" #include "prism/internal/strncasecmp.h" #include "prism/internal/strpbrk.h" #include "prism/internal/tokens.h" diff --git a/src/regexp.c b/src/regexp.c index 05ef3b6b41..cc17aa4d09 100644 --- a/src/regexp.c +++ b/src/regexp.c @@ -8,7 +8,7 @@ #include "prism/internal/encoding.h" #include "prism/internal/memchr.h" #include "prism/internal/parser.h" -#include "prism/internal/strings.h" +#include "prism/internal/stringy.h" #include "prism/internal/strncasecmp.h" #include diff --git a/src/static_literals.c b/src/static_literals.c index 0a8ef62b2c..9af1eadf5d 100644 --- a/src/static_literals.c +++ b/src/static_literals.c @@ -7,7 +7,7 @@ #include "prism/internal/buffer.h" #include "prism/internal/integer.h" #include "prism/internal/isinf.h" -#include "prism/internal/strings.h" +#include "prism/internal/stringy.h" #include #include diff --git a/src/strings.c b/src/stringy.c similarity index 98% rename from src/strings.c rename to src/stringy.c index f140b82ad9..d6f4c4a777 100644 --- a/src/strings.c +++ b/src/stringy.c @@ -1,4 +1,4 @@ -#include "prism/internal/strings.h" +#include "prism/internal/stringy.h" #include "prism/internal/allocator.h" diff --git a/templates/include/prism/ast.h.erb b/templates/include/prism/ast.h.erb index 1909618fc1..3b3be25e76 100644 --- a/templates/include/prism/ast.h.erb +++ b/templates/include/prism/ast.h.erb @@ -14,7 +14,7 @@ #include "prism/arena.h" #include "prism/constant_pool.h" #include "prism/integer.h" -#include "prism/strings.h" +#include "prism/stringy.h" #include #include From ba16ae22561179998e4208e4c37262b47f693c11 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Thu, 19 Mar 2026 16:49:16 -0400 Subject: [PATCH 100/100] Move PRISM_NODISCARD to the correct position --- include/prism/arena.h | 2 +- include/prism/buffer.h | 2 +- include/prism/options.h | 2 +- include/prism/parser.h | 2 +- include/prism/source.h | 12 ++++++------ 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/include/prism/arena.h b/include/prism/arena.h index a637e9cef3..e1fa8fc6ad 100644 --- a/include/prism/arena.h +++ b/include/prism/arena.h @@ -25,7 +25,7 @@ typedef struct pm_arena_t pm_arena_t; * the caller to free the arena using pm_arena_free when it is no longer * needed. */ -PRISM_EXPORTED_FUNCTION pm_arena_t * pm_arena_new(void) PRISM_NODISCARD; +PRISM_EXPORTED_FUNCTION PRISM_NODISCARD pm_arena_t * pm_arena_new(void); /** * Frees both the held memory and the arena itself. diff --git a/include/prism/buffer.h b/include/prism/buffer.h index 12844d60ff..24b572d2c3 100644 --- a/include/prism/buffer.h +++ b/include/prism/buffer.h @@ -24,7 +24,7 @@ typedef struct pm_buffer_t pm_buffer_t; * @returns A pointer to the initialized buffer. The caller is responsible for * freeing the buffer with pm_buffer_free. */ -PRISM_EXPORTED_FUNCTION pm_buffer_t * pm_buffer_new(void) PRISM_NODISCARD; +PRISM_EXPORTED_FUNCTION PRISM_NODISCARD pm_buffer_t * pm_buffer_new(void); /** * Free both the memory held by the buffer and the buffer itself. diff --git a/include/prism/options.h b/include/prism/options.h index 2b823aebff..1b6ff4af1f 100644 --- a/include/prism/options.h +++ b/include/prism/options.h @@ -114,7 +114,7 @@ static const uint8_t PM_OPTIONS_COMMAND_LINE_X = 0x20; * @returns A new options struct with default values. It is the responsibility * of the caller to free this struct using pm_options_free(). */ -PRISM_EXPORTED_FUNCTION pm_options_t * pm_options_new(void) PRISM_NODISCARD; +PRISM_EXPORTED_FUNCTION PRISM_NODISCARD pm_options_t * pm_options_new(void); /** * Free both the held memory of the given options struct and the struct itself. diff --git a/include/prism/parser.h b/include/prism/parser.h index cf613c2c77..2c8c4b3a7a 100644 --- a/include/prism/parser.h +++ b/include/prism/parser.h @@ -33,7 +33,7 @@ typedef struct pm_parser_t pm_parser_t; * @returns The initialized parser. It is the responsibility of the caller to * free the parser with `pm_parser_free()`. */ -PRISM_EXPORTED_FUNCTION pm_parser_t * pm_parser_new(pm_arena_t *arena, const uint8_t *source, size_t size, const pm_options_t *options) PRISM_NODISCARD PRISM_NONNULL(1); +PRISM_EXPORTED_FUNCTION PRISM_NODISCARD pm_parser_t * pm_parser_new(pm_arena_t *arena, const uint8_t *source, size_t size, const pm_options_t *options) PRISM_NONNULL(1); /** * Free both the memory held by the given parser and the parser itself. diff --git a/include/prism/source.h b/include/prism/source.h index 2f29845578..c79987d3fb 100644 --- a/include/prism/source.h +++ b/include/prism/source.h @@ -66,7 +66,7 @@ typedef enum { * @param length The length of the source data in bytes. * @returns A new source. Aborts on allocation failure. */ -PRISM_EXPORTED_FUNCTION pm_source_t * pm_source_constant_new(const uint8_t *data, size_t length) PRISM_NODISCARD; +PRISM_EXPORTED_FUNCTION PRISM_NODISCARD pm_source_t * pm_source_constant_new(const uint8_t *data, size_t length); /** * Create a new source that wraps existing shared memory. The memory is not @@ -76,7 +76,7 @@ PRISM_EXPORTED_FUNCTION pm_source_t * pm_source_constant_new(const uint8_t *data * @param length The length of the source data in bytes. * @returns A new source. Aborts on allocation failure. */ -PRISM_EXPORTED_FUNCTION pm_source_t * pm_source_shared_new(const uint8_t *data, size_t length) PRISM_NODISCARD; +PRISM_EXPORTED_FUNCTION PRISM_NODISCARD pm_source_t * pm_source_shared_new(const uint8_t *data, size_t length); /** * Create a new source that owns its memory. The memory will be freed with @@ -86,7 +86,7 @@ PRISM_EXPORTED_FUNCTION pm_source_t * pm_source_shared_new(const uint8_t *data, * @param length The length of the source data in bytes. * @returns A new source. Aborts on allocation failure. */ -PRISM_EXPORTED_FUNCTION pm_source_t * pm_source_owned_new(uint8_t *data, size_t length) PRISM_NODISCARD; +PRISM_EXPORTED_FUNCTION PRISM_NODISCARD pm_source_t * pm_source_owned_new(uint8_t *data, size_t length); /** * Create a new source by reading a file into a heap-allocated buffer. @@ -95,7 +95,7 @@ PRISM_EXPORTED_FUNCTION pm_source_t * pm_source_owned_new(uint8_t *data, size_t * @param result Out parameter for the result of the initialization. * @returns A new source, or NULL on error (with result written to out param). */ -PRISM_EXPORTED_FUNCTION pm_source_t * pm_source_file_new(const char *filepath, pm_source_init_result_t *result) PRISM_NODISCARD PRISM_NONNULL(1, 2); +PRISM_EXPORTED_FUNCTION PRISM_NODISCARD pm_source_t * pm_source_file_new(const char *filepath, pm_source_init_result_t *result) PRISM_NONNULL(1, 2); /** * Create a new source by memory-mapping a file. Falls back to file reading on @@ -110,7 +110,7 @@ PRISM_EXPORTED_FUNCTION pm_source_t * pm_source_file_new(const char *filepath, p * @param result Out parameter for the result of the initialization. * @returns A new source, or NULL on error (with result written to out param). */ -PRISM_EXPORTED_FUNCTION pm_source_t * pm_source_mapped_new(const char *filepath, int open_flags, pm_source_init_result_t *result) PRISM_NODISCARD PRISM_NONNULL(1, 3); +PRISM_EXPORTED_FUNCTION PRISM_NODISCARD pm_source_t * pm_source_mapped_new(const char *filepath, int open_flags, pm_source_init_result_t *result) PRISM_NONNULL(1, 3); /** * Create a new source by reading from a stream using the provided callbacks. @@ -120,7 +120,7 @@ PRISM_EXPORTED_FUNCTION pm_source_t * pm_source_mapped_new(const char *filepath, * @param feof The function to use to check if the stream is at EOF. * @returns A new source. Aborts on allocation failure. */ -PRISM_EXPORTED_FUNCTION pm_source_t * pm_source_stream_new(void *stream, pm_source_stream_fgets_t *fgets, pm_source_stream_feof_t *feof) PRISM_NODISCARD; +PRISM_EXPORTED_FUNCTION PRISM_NODISCARD pm_source_t * pm_source_stream_new(void *stream, pm_source_stream_fgets_t *fgets, pm_source_stream_feof_t *feof); /** * Free the given source and any memory it owns.